├── .gitignore
├── Efficiency
    ├── Efficiency-BTUs-data.ipynb
    ├── Efficiency-BTUs.ipynb
    ├── Efficiency-Coincidence-Water-Gas-Elec.ipynb
    ├── Efficiency-Emissions-Density.ipynb
    ├── Efficiency-Percent-PV-Home-Usage.ipynb
    ├── Efficiency-Water_vs_Energy.ipynb
    └── ercot_emissions_2018.csv
├── ElectricVehicle
    ├── Data-Extraction--EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb
    ├── Data-Extraction--EV_charging_alignment_with_at_home_rooftop_solar.ipynb
    ├── Data-Extraction--EV_overall_household_demand.ipynb
    ├── EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb
    ├── EV_charging_alignment_with_at_home_rooftop_solar.ipynb
    └── EV_overall_household_demand.ipynb
├── HVAC
    ├── AC Comparison JupyterHub Example Book .ipynb
    ├── HVAC-Cooling-During-4CP.ipynb
    └── HVAC-Solar-Generation-Alignment.ipynb
├── LICENSE
├── PV
    ├── Data-Extraction--PV-South-vs-West.ipynb
    ├── Data-Extraction--PV-storage-savings-4CP.ipynb
    ├── PV-South-vs-West.ipynb
    └── PV-storage-savings-4CP.ipynb
├── README.md
└── config
    ├── config.txt
    └── read_config.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Mac Directory thingies
132 | .DS_Store
133 | 


--------------------------------------------------------------------------------
/Efficiency/Efficiency-BTUs-data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dataport Database Extraction Notebook for the Efficiency-BTUs Notebook\n",
  8 |     "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n",
  9 |     "You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared directory on the JupyterHub server if you would like to use the ones exported by this notebook."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "#import packages\n",
 19 |     "import pandas as pd\n",
 20 |     "import psycopg2\n",
 21 |     "import sqlalchemy as sqla\n",
 22 |     "import os\n",
 23 |     "import numpy as np\n",
 24 |     "import sys\n",
 25 |     "sys.path.insert(0,'..')\n",
 26 |     "from config.read_config import get_database_config\n",
 27 |     "%matplotlib inline\n",
 28 |     "sys.executable  # shows you your path to the python you're using\n",
 29 |     "\n",
 30 |     "# read in db credentials from config/config.txt\n",
 31 |     "# * make sure you add those to the config/config.txt file! *\n",
 32 |     "\n",
 33 |     "database_config = get_database_config(\"../config/config.txt\")"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# get our DB connection\n",
 43 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 44 |     "                                                                     database_config['password'],\n",
 45 |     "                                                                     database_config['hostname'],\n",
 46 |     "                                                                     database_config['port'],\n",
 47 |     "                                                                     database_config['database']\n",
 48 |     "                                                                     ))"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "#Select a list of Texas homes from dataport metadata having good gas data availabilty\n",
 58 |     "query = \"\"\"select distinct dataid, egauge_1min_data_availability, gas_data_availability, grid, solar from other_datasets.metadata \n",
 59 |     "                                         \n",
 60 |     "                                          where grid = 'yes'\n",
 61 |     "                                          and egauge_1min_min_time <= '2018-03-01' \n",
 62 |     "                                          and egauge_1min_max_time > '2018-09-01'\n",
 63 |     "                                          and city='Austin'\n",
 64 |     "                                          and (egauge_1min_data_availability like '100%' \n",
 65 |     "                                               or \n",
 66 |     "                                               egauge_1min_data_availability like '9%')\n",
 67 |     "                                          and gas_ert_min_time <= '2018-03-01'\n",
 68 |     "                                          and gas_ert_max_time > '2018-09-01'\n",
 69 |     "                                          and\n",
 70 |     "                                              (\n",
 71 |     "                                              gas_data_availability like '100%'\n",
 72 |     "                                              or\n",
 73 |     "                                              gas_data_availability like '9%'\n",
 74 |     "                                              or\n",
 75 |     "                                              gas_data_availability like '8%'\n",
 76 |     "                                              or\n",
 77 |     "                                              gas_data_availability like '7%'\n",
 78 |     "                                              )\n",
 79 |     "                                           LIMIT 50\n",
 80 |     "                                          ;\n",
 81 |     "         \"\"\"\n",
 82 |     "\n",
 83 |     "df = pd.read_sql_query(sqla.text(query), engine)\n",
 84 |     "df"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# export to a zipped csv\n",
 94 |     "compression_opts = dict(method='zip',\n",
 95 |     "                        archive_name='efficiency_btus_metadata.csv')\n",
 96 |     "df.to_csv('efficiency_btus_metadata.zip', index=False,\n",
 97 |     "          compression=compression_opts)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "# grab dataids and convert them to a string to put into the SQL query\n",
107 |     "dataids_list = df['dataid'].tolist()\n",
108 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
109 |     "dataids_str = ','.join(list(map(str, dataids_list)))\n",
110 |     "dataids_str"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "#Pull electricity data for selected homes.\n",
120 |     "data = \"\"\"select dataid,localminute::timestamp,solar,grid \n",
121 |     "               from electricity.eg_realpower_1min \n",
122 |     "               where localminute >= '2018-03-01' and localminute <  '2018-09-01' \"\"\"\n",
123 |     "data = data + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
124 |     "\n",
125 |     "# create a dataframe with the data from the sql query\n",
126 |     "data_df = pd.read_sql_query(sqla.text(data), engine)\n",
127 |     "\n",
128 |     "data_df"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "# export to a zipped csv\n",
138 |     "compression_opts = dict(method='zip',\n",
139 |     "                        archive_name='efficiency_btus_electricity_data.csv')\n",
140 |     "data_df.to_csv('efficiency_btus_electricity_data.zip', index=False,\n",
141 |     "          compression=compression_opts)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "# Pull gas data for same homes and time period\n",
151 |     "gas_sql = \"\"\"select * \n",
152 |     "               from water_and_gas.gas_ert \n",
153 |     "               where readtime >= '2018-03-01' and readtime <  '2018-09-01' \"\"\"\n",
154 |     "gas_sql = gas_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
155 |     "\n",
156 |     "# create a dataframe with the data from the sql query\n",
157 |     "gas_df = pd.read_sql_query(sqla.text(gas_sql), engine)\n",
158 |     "\n",
159 |     "gas_df"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "# export to a zipped csv\n",
169 |     "compression_opts = dict(method='zip',\n",
170 |     "                        archive_name='efficiency_btus_gas_data.csv')\n",
171 |     "gas_df.to_csv('efficiency_btus_gas_data.zip', index=False,\n",
172 |     "          compression=compression_opts)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": [
181 |     "# lets go get some blucube water data now\n",
182 |     "#Pull data for selected homes.\n",
183 |     "water_sql = \"\"\"SELECT * FROM water_and_gas.blucube_water_data\n",
184 |     "               where epoch_timestamp >= '2018-03-01' and epoch_timestamp <  '2018-09-01' \"\"\"\n",
185 |     "water_sql = water_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
186 |     "\n",
187 |     "# create a dataframe with the data from the sql query\n",
188 |     "water_df = pd.read_sql_query(sqla.text(water_sql), engine)\n",
189 |     "\n",
190 |     "water_df"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "# export to a zipped csv\n",
200 |     "compression_opts = dict(method='zip',\n",
201 |     "                        archive_name='efficiency_btus_water_data.csv')\n",
202 |     "water_df.to_csv('efficiency_btus_water_data.zip', index=False,\n",
203 |     "          compression=compression_opts)"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": []
212 |   }
213 |  ],
214 |  "metadata": {
215 |   "kernelspec": {
216 |    "display_name": "Python 3",
217 |    "language": "python",
218 |    "name": "python3"
219 |   },
220 |   "language_info": {
221 |    "codemirror_mode": {
222 |     "name": "ipython",
223 |     "version": 3
224 |    },
225 |    "file_extension": ".py",
226 |    "mimetype": "text/x-python",
227 |    "name": "python",
228 |    "nbconvert_exporter": "python",
229 |    "pygments_lexer": "ipython3",
230 |    "version": "3.8.5"
231 |   }
232 |  },
233 |  "nbformat": 4,
234 |  "nbformat_minor": 4
235 | }
236 | 


--------------------------------------------------------------------------------
/Efficiency/Efficiency-BTUs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Visualization of energy intensity converting electricity use to embedded BTU based on national averages:\n",
  8 |     "\n",
  9 |     "The `Efficiency-BTUs-data.ipynb` notebook in this same directory will also perform the database queries and export the files as zipped csv files to this directory. You would just need to change the `pg.read_csv()` calls to look in the current directory instead of the /shared one."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "#import packages\n",
 19 |     "import pandas as pd\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import psycopg2\n",
 22 |     "import sqlalchemy as sqla\n",
 23 |     "import os\n",
 24 |     "import sys\n",
 25 |     "sys.path.insert(0,'..')\n",
 26 |     "from config.read_config import get_database_config\n",
 27 |     "import numpy as np\n",
 28 |     "%matplotlib inline\n",
 29 |     "sys.executable  # shows you your path to the python you're using"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "# read in db credentials from ../config/config.txt\n",
 39 |     "# * make sure you add those to the ../config/config.txt file! *\n",
 40 |     "\n",
 41 |     "## Uncomment the following line to use the live database queries\n",
 42 |     "'''\n",
 43 |     "database_config = get_database_config(\"../config/config.txt\")\n",
 44 |     "'''\n",
 45 |     "#"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# get our DB connection\n",
 55 |     "\n",
 56 |     "# uncomment if you want to use the live queries to the database instead of the prepared data\n",
 57 |     "'''\n",
 58 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 59 |     "                                                                     database_config['password'],\n",
 60 |     "                                                                     database_config['hostname'],\n",
 61 |     "                                                                     database_config['port'],\n",
 62 |     "                                                                     database_config['database']\n",
 63 |     "                                                                     ))\n",
 64 |     "'''\n"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "#Select a list of Texas homes from dataport metadata having good gas data availabilty\n",
 74 |     "\n",
 75 |     "# Uncomment the following block to use live database queries\n",
 76 |     "'''\n",
 77 |     "query = \"\"\"select distinct dataid, egauge_1min_data_availability, gas_data_availability, grid, solar from other_datasets.metadata \n",
 78 |     "                                         \n",
 79 |     "                                          where grid = 'yes'\n",
 80 |     "                                          and egauge_1min_min_time <= '2018-03-01' \n",
 81 |     "                                          and egauge_1min_max_time > '2018-09-01'\n",
 82 |     "                                          and city='Austin'\n",
 83 |     "                                          and (egauge_1min_data_availability like '100%' \n",
 84 |     "                                               or \n",
 85 |     "                                               egauge_1min_data_availability like '9%')\n",
 86 |     "                                          and gas_ert_min_time <= '2018-03-01'\n",
 87 |     "                                          and gas_ert_max_time > '2018-09-01'\n",
 88 |     "                                          and\n",
 89 |     "                                              (\n",
 90 |     "                                              gas_data_availability like '100%'\n",
 91 |     "                                              or\n",
 92 |     "                                              gas_data_availability like '9%'\n",
 93 |     "                                              or\n",
 94 |     "                                              gas_data_availability like '8%'\n",
 95 |     "                                              or\n",
 96 |     "                                              gas_data_availability like '7%'\n",
 97 |     "                                              )\n",
 98 |     "                                           LIMIT 50\n",
 99 |     "                                          ;\n",
100 |     "         \"\"\"\n",
101 |     "\n",
102 |     "df = pd.read_sql_query(sqla.text(query), engine)\n",
103 |     "df\n",
104 |     "'''\n",
105 |     "\n",
106 |     "# otherwise we'll read from the prepared/extracted zipped data files\n",
107 |     "df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_metadata.zip', compression='zip')\n",
108 |     "df"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "# grab dataids and convert them to a string to put into the SQL query\n",
118 |     "dataids_list = df['dataid'].tolist()\n",
119 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
120 |     "dataids_str = ','.join(list(map(str, dataids_list)))"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "# Uncomment the following block to use live database queries\n",
130 |     "\n",
131 |     "'''\n",
132 |     "#Pull electricity data for selected homes.\n",
133 |     "data = \"\"\"select dataid,localminute::timestamp,solar,grid \n",
134 |     "               from electricity.eg_realpower_1min \n",
135 |     "               where localminute >= '2018-03-01' and localminute <  '2018-09-01' \"\"\"\n",
136 |     "data = data + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
137 |     "\n",
138 |     "# create a dataframe with the data from the sql query\n",
139 |     "data_df = pd.read_sql_query(sqla.text(data), engine)\n",
140 |     "'''\n",
141 |     "\n",
142 |     "# otherwise we'll read in the already prepared electricity data\n",
143 |     "data_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_electricity_data.zip', compression='zip')\n",
144 |     "\n",
145 |     "data_df"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "# convert 'localminute' to a datetime\n",
155 |     "data_df['datetime'] = pd.to_datetime(data_df['localminute'])\n",
156 |     "\n",
157 |     "# index by datetime \n",
158 |     "data_df = data_df.set_index('datetime')\n",
159 |     "\n",
160 |     "# bring to central timezone\n",
161 |     "data_df = data_df.tz_localize(tz='US/Central')\n",
162 |     "data_df"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "# add hour of day to df\n",
172 |     "data_df['hour'] = data_df.index.hour\n",
173 |     "data_df"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "# If there's no grid value, throw the row out\n",
183 |     "data_df.dropna(subset=['grid'], inplace=True)\n",
184 |     "data_df"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "# replace solar NaNs with 0\n",
194 |     "data_df['solar'].fillna(value=0, inplace=True)\n",
195 |     "print(data_df['solar'].isna().sum())\n",
196 |     "\n",
197 |     "# calculate the use, the grid minus the solar (we're actually adding them because solar generation shows up negative in the database)\n",
198 |     "data_df['use'] = data_df['grid'] + data_df['solar']\n",
199 |     "data_df"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "# group by the hour and take the mean to get the hourly average use\n",
209 |     "usage = data_df.groupby(['hour']).mean()\n",
210 |     "usage"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "code",
215 |    "execution_count": null,
216 |    "metadata": {},
217 |    "outputs": [],
218 |    "source": [
219 |     "# calc btus for kWh (1kWh = 3412 BTUs)\n",
220 |     "usage['Electrical BTUs'] = usage['use'] * 3412\n",
221 |     "usage"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "metadata": {},
228 |    "outputs": [],
229 |    "source": [
230 |     "# Read in insolation data downloaded from Solar Forecast Arbitor for Austin for the same timeframe\n",
231 |     "# https://dashboard.solarforecastarbiter.org/observations/c6d40462-7e49-11e9-aef1-0a580a8003e9\n",
232 |     "insol = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/Austin_TX_ghi_2018-03-01T06_00_00+00_00-2018-10-01T06_00_00+00_00.csv.zip', skiprows=2)\n",
233 |     "insol['datetime'] = pd.to_datetime(insol['timestamp'])\n",
234 |     "insol = insol.set_index('datetime')\n",
235 |     "insol = insol.tz_convert(tz='US/Central')\n",
236 |     "insol"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": [
245 |     "# add hour of day to df\n",
246 |     "insol['hour'] = insol.index.hour\n",
247 |     "insol"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "# one could potentially look at the quality flag to determine if we want to keep the row or not\n",
257 |     "insol = insol.drop(columns=['quality_flag'])\n",
258 |     "insol.describe()"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": null,
264 |    "metadata": {},
265 |    "outputs": [],
266 |    "source": [
267 |     "# group into hour of the day and take the mean\n",
268 |     "grouped = insol.groupby(['hour']).mean()\n",
269 |     "grouped"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": null,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "# Plot insolation vs grid usage for hour of the day\n",
279 |     "\n",
280 |     "fig, ax1 = plt.subplots(figsize=(20,10))\n",
281 |     "\n",
282 |     "color = 'tab:orange'\n",
283 |     "\n",
284 |     "ax1.set_xlabel('Hour of Day')\n",
285 |     "ax1.set_ylabel('Insolation or GHI (W/m^2)')\n",
286 |     "p1, = ax1.plot(grouped.index, grouped['value'],label=\"Insolation or GHI (W/m^2)\", color=color)\n",
287 |     "ax1.tick_params(axis='y', labelcolor=color)\n",
288 |     "\n",
289 |     "ax2 = ax1.twinx()\n",
290 |     "color = 'tab:blue'\n",
291 |     "ax2.set_ylabel('Usage (kWh)')\n",
292 |     "p2, = ax2.plot(grouped.index, usage['use'], label=\"Usage (kWh)\", color=color)\n",
293 |     "ax2.tick_params(axis='y', labelcolor=color)\n",
294 |     "\n",
295 |     "ax1.legend((p1, p2), ('Insolation (W/m^2)', 'Usage (kWh)'), loc='upper right', shadow=True)\n",
296 |     "\n",
297 |     "plt.show()"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "# Read gas data for same homes and time period\n",
307 |     "\n",
308 |     "# uncomment the following block to do the live db query\n",
309 |     "'''\n",
310 |     "gas_sql = \"\"\"select * \n",
311 |     "               from water_and_gas.gas_ert \n",
312 |     "               where readtime >= '2018-03-01' and readtime <  '2018-09-01' \"\"\"\n",
313 |     "gas_sql = gas_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
314 |     "\n",
315 |     "# create a dataframe with the data from the sql query\n",
316 |     "gas_df = pd.read_sql_query(sqla.text(gas_sql), engine)\n",
317 |     "\n",
318 |     "'''\n",
319 |     "\n",
320 |     "# read from the prepared csv.zip file\n",
321 |     "gas_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_gas_data.zip', compression='zip')\n",
322 |     "\n",
323 |     "gas_df"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "# convert readtime to a datetime, set the index, and convert to Central Time\n",
333 |     "pd.options.display.max_rows = 500\n",
334 |     "gas_df['datetime'] = pd.to_datetime(gas_df['readtime'], utc=True)\n",
335 |     "gas_df = gas_df.set_index('datetime')\n",
336 |     "gas_df = gas_df.tz_convert(tz='US/Central')\n",
337 |     "gas_df = gas_df.drop(columns=['readtime'])\n",
338 |     "gas_df"
339 |    ]
340 |   },
341 |   {
342 |    "cell_type": "code",
343 |    "execution_count": null,
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": [
347 |     "# the gas (and water data for that matter) are cumulative meter readings, meaning that they gradually increase as more gas flows. \n",
348 |     "# So you can have the same reading many times in a row, or you can have gaps with no readings until more gas is used.\n",
349 |     "# You can also have the meter get reset to 0, so we're going to do some gymnastics to deal with all of that\n",
350 |     "\n",
351 |     "# group by dataid and hour then run a diff on the rows\n",
352 |     "gas_df_group = gas_df.groupby(['dataid', pd.Grouper(freq='H')]).max().diff()\n",
353 |     "\n",
354 |     "gas_df_group = gas_df_group.dropna()\n",
355 |     "\n",
356 |     "# zero out the negative diffs because that's the meter being reset and going from some high number reading to 0 (This Needs Looking AT!!!! IDK if it's the right thing to do!)\n",
357 |     "gas_df_group['meter_value'] = gas_df_group['meter_value'].clip(0)\n",
358 |     "gas_df_group.describe()"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "code",
363 |    "execution_count": null,
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": [
367 |     "# gas is measured in ft^3 convert to BTUs with (1CCF = 103,700BTU = 30.4kWh). (Per EIA's calculator, 1 cubic ft of natural gas is 1,037BTU.)\n",
368 |     "\n",
369 |     "# calculate the BTUs of gas used\n",
370 |     "gas_df_group['Gas BTUs'] = gas_df_group['meter_value'] * 1037\n",
371 |     "gas_df_group"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {},
378 |    "outputs": [],
379 |    "source": [
380 |     "# add hour of day to gas\n",
381 |     "gas_df_group = gas_df_group.reset_index()\n",
382 |     "gas_df_group = gas_df_group.set_index('datetime')\n",
383 |     "gas_df_group['hour'] = gas_df_group.index.hour\n",
384 |     "gas_df_group"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": null,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "# take the mean over all the same hours of the day across all dataids\n",
394 |     "gas_hr_mean = gas_df_group.groupby(['hour']).mean()\n",
395 |     "gas_hr_mean"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": null,
401 |    "metadata": {},
402 |    "outputs": [],
403 |    "source": [
404 |     "# don't need those columns anymore\n",
405 |     "gas_hr_mean = gas_hr_mean.drop(columns=['dataid','meter_value'])"
406 |    ]
407 |   },
408 |   {
409 |    "cell_type": "code",
410 |    "execution_count": null,
411 |    "metadata": {},
412 |    "outputs": [],
413 |    "source": [
414 |     "# the first row is a bit meaningless as a diff, so we're dropping it\n",
415 |     "gas_hr_mean = gas_hr_mean.drop([0])\n",
416 |     "gas_hr_mean"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": null,
422 |    "metadata": {},
423 |    "outputs": [],
424 |    "source": [
425 |     "# Let's have a peek at it\n",
426 |     "gas_hr_mean.plot(figsize=(20,10), grid=True, x_compat=True, title=\"Gas Use in BTUs Per Hour of Day\")"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {},
433 |    "outputs": [],
434 |    "source": [
435 |     "# combine electrical usage and gas as btus\n",
436 |     "combined = pd.merge(left=usage['Electrical BTUs'], right=gas_hr_mean['Gas BTUs'], left_on=usage.index, right_on=gas_hr_mean.index)\n",
437 |     "combined = combined.drop(columns=['key_0'])\n",
438 |     "combined"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": null,
444 |    "metadata": {},
445 |    "outputs": [],
446 |    "source": [
447 |     "# let's see how those look on a plot. \n",
448 |     "# Plotting with 2 y axes because of the scale difference between them\n",
449 |     "\n",
450 |     "fig, ax1 = plt.subplots(figsize=(20,10))\n",
451 |     "\n",
452 |     "color = 'tab:orange'\n",
453 |     "\n",
454 |     "ax1.set_xlabel('Hour of Day')\n",
455 |     "ax1.set_ylabel('Electrical Use as BTUs')\n",
456 |     "p1, = ax1.plot(usage.index, usage['Electrical BTUs'],label=\"Electrical Use as BTUs\", color=color)\n",
457 |     "ax1.tick_params(axis='y', labelcolor=color)\n",
458 |     "\n",
459 |     "ax2 = ax1.twinx()\n",
460 |     "color = 'tab:blue'\n",
461 |     "ax2.set_ylabel('Gas Use as BTUs')\n",
462 |     "p2, = ax2.plot(gas_hr_mean.index, gas_hr_mean['Gas BTUs'], label=\"Gas Use as BTUs\", color=color)\n",
463 |     "ax2.tick_params(axis='y', labelcolor=color)\n",
464 |     "\n",
465 |     "ax1.legend((p1, p2), ('Electrical Use as BTUs', 'Gas Use as BTUs'), loc='upper right', shadow=True)\n",
466 |     "\n",
467 |     "plt.show()"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": null,
473 |    "metadata": {},
474 |    "outputs": [],
475 |    "source": [
476 |     "# Combining into a single dataframe\n",
477 |     "gas_hr_mean['Elec BTUs'] = usage['Electrical BTUs']\n",
478 |     "gas_hr_mean"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {},
485 |    "outputs": [],
486 |    "source": [
487 |     "# More combining and renaming\n",
488 |     "\n",
489 |     "btus = usage.merge(gas_hr_mean, on=['hour'], how='left')\n",
490 |     "btus = btus.drop(labels=['dataid', 'solar', 'grid', 'use', 'Elec BTUs'], axis=1)\n",
491 |     "btus = btus.rename({'BTUs':'Electrical BTUs', 'Gas BTUs':'Gas BTUs'},axis='columns')\n",
492 |     "btus\n"
493 |    ]
494 |   },
495 |   {
496 |    "cell_type": "code",
497 |    "execution_count": null,
498 |    "metadata": {},
499 |    "outputs": [],
500 |    "source": [
501 |     "# lets go get some blucube water data now\n",
502 |     "\n",
503 |     "# uncomment this block for a direct database pull\n",
504 |     "'''\n",
505 |     "#Pull data for selected homes.\n",
506 |     "water_sql = \"\"\"SELECT * FROM water_and_gas.blucube_water_data\n",
507 |     "               where epoch_timestamp >= '2018-03-01' and epoch_timestamp <  '2018-09-01' \"\"\"\n",
508 |     "water_sql = water_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
509 |     "\n",
510 |     "# create a dataframe with the data from the sql query\n",
511 |     "water_df = pd.read_sql_query(sqla.text(water_sql), engine)\n",
512 |     "\n",
513 |     "'''\n",
514 |     "\n",
515 |     "# read in the prepared csv file\n",
516 |     "water_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_water_data.zip')\n",
517 |     "\n",
518 |     "water_df"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {},
525 |    "outputs": [],
526 |    "source": [
527 |     "# don't need the meter id\n",
528 |     "water_df = water_df.drop(columns=['met_id']) \n",
529 |     "water_df"
530 |    ]
531 |   },
532 |   {
533 |    "cell_type": "code",
534 |    "execution_count": null,
535 |    "metadata": {},
536 |    "outputs": [],
537 |    "source": [
538 |     "# convert 'localminute' to a datetime\n",
539 |     "water_df['datetime'] = pd.to_datetime(water_df['epoch_timestamp'], utc=True)\n",
540 |     "\n",
541 |     "# index by datetime \n",
542 |     "water_df = water_df.set_index('datetime')\n",
543 |     "\n",
544 |     "# bring to central timezone\n",
545 |     "water_df = water_df.tz_convert(tz='US/Central')\n",
546 |     "water_df"
547 |    ]
548 |   },
549 |   {
550 |    "cell_type": "code",
551 |    "execution_count": null,
552 |    "metadata": {},
553 |    "outputs": [],
554 |    "source": [
555 |     "water = water_df.drop(columns=['epoch_timestamp'])\n",
556 |     "water"
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "code",
561 |    "execution_count": null,
562 |    "metadata": {},
563 |    "outputs": [],
564 |    "source": [
565 |     "# group by dataid and hour then run a diff on the rows\n",
566 |     "water_group = water.groupby(['dataid', pd.Grouper(freq='H')]).max().diff()\n",
567 |     "\n",
568 |     "water_group = water_group.dropna()\n",
569 |     "\n",
570 |     "# zero out the negative diffs because that's the meter being reset and going from some high number reading to 0\n",
571 |     "water_group['reading_in_gal'] = water_group['reading_in_gal'].clip(0)\n",
572 |     "water_group.describe()"
573 |    ]
574 |   },
575 |   {
576 |    "cell_type": "code",
577 |    "execution_count": null,
578 |    "metadata": {},
579 |    "outputs": [],
580 |    "source": [
581 |     "water_group"
582 |    ]
583 |   },
584 |   {
585 |    "cell_type": "code",
586 |    "execution_count": null,
587 |    "metadata": {},
588 |    "outputs": [],
589 |    "source": [
590 |     "# add hour of day to water\n",
591 |     "water_group = water_group.reset_index()\n",
592 |     "water_group = water_group.set_index('datetime')\n",
593 |     "water_group['hour'] = water_group.index.hour\n",
594 |     "water_group"
595 |    ]
596 |   },
597 |   {
598 |    "cell_type": "code",
599 |    "execution_count": null,
600 |    "metadata": {},
601 |    "outputs": [],
602 |    "source": [
603 |     "# take the mean by hour of the day\n",
604 |     "water_mean = water_group.groupby(['hour']).mean()\n",
605 |     "water_mean = water_mean.drop(columns=['dataid'])\n",
606 |     "water_mean"
607 |    ]
608 |   },
609 |   {
610 |    "cell_type": "code",
611 |    "execution_count": null,
612 |    "metadata": {},
613 |    "outputs": [],
614 |    "source": [
615 |     "# convert gallons to kWh via 1,000,000 gallons =  5000kWh for Austin (reduced this is 200g to 1kWh, or divide the gallons by 200 to get kWh) (include ref?)\n",
616 |     "water_mean['Water BTUs'] = (water_mean['reading_in_gal'] / 200.0) * 3412\n",
617 |     "water_mean"
618 |    ]
619 |   },
620 |   {
621 |    "cell_type": "code",
622 |    "execution_count": null,
623 |    "metadata": {},
624 |    "outputs": [],
625 |    "source": [
626 |     "# again the first row is a bit meaningless because it's a diff against nothing\n",
627 |     "water_mean = water_mean.drop([0])\n",
628 |     "water_mean = water_mean.drop(columns=['reading_in_gal'])\n",
629 |     "water_mean.plot(figsize=(20,10), grid=True, x_compat=True)"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "code",
634 |    "execution_count": null,
635 |    "metadata": {},
636 |    "outputs": [],
637 |    "source": [
638 |     "water_mean"
639 |    ]
640 |   },
641 |   {
642 |    "cell_type": "code",
643 |    "execution_count": null,
644 |    "metadata": {},
645 |    "outputs": [],
646 |    "source": [
647 |     "btus"
648 |    ]
649 |   },
650 |   {
651 |    "cell_type": "code",
652 |    "execution_count": null,
653 |    "metadata": {},
654 |    "outputs": [],
655 |    "source": [
656 |     "# combine all three into one dataframe\n",
657 |     "btus = btus.merge(water_mean, on=['hour'], how='left')\n",
658 |     "btus"
659 |    ]
660 |   },
661 |   {
662 |    "cell_type": "code",
663 |    "execution_count": null,
664 |    "metadata": {},
665 |    "outputs": [],
666 |    "source": [
667 |     "# let's make some pretty pictures\n",
668 |     "btus.plot(figsize=(20,10), grid=True, x_compat=True)"
669 |    ]
670 |   },
671 |   {
672 |    "cell_type": "code",
673 |    "execution_count": null,
674 |    "metadata": {},
675 |    "outputs": [],
676 |    "source": [
677 |     "# now time to bring the insolation in, let's get it named properly\n",
678 |     "grouped = grouped.rename({'value':'Insolation or GHI (W/m^2)'} , axis='columns')\n",
679 |     "grouped"
680 |    ]
681 |   },
682 |   {
683 |    "cell_type": "code",
684 |    "execution_count": null,
685 |    "metadata": {},
686 |    "outputs": [],
687 |    "source": [
688 |     "# Add insolation to the btus dataframe\n",
689 |     "btus = btus.merge(grouped, on=['hour'], how='left')\n",
690 |     "\n",
691 |     "# calculate the sum of the 3 different btu usages\n",
692 |     "btus['Sum BTUs'] = btus['Electrical BTUs'] + btus['Gas BTUs'] + btus['Water BTUs']\n",
693 |     "btus"
694 |    ]
695 |   },
696 |   {
697 |    "cell_type": "code",
698 |    "execution_count": null,
699 |    "metadata": {},
700 |    "outputs": [],
701 |    "source": [
702 |     "# let's plot it all\n",
703 |     "# BTUs on one Y axis, and Insolation on the other.\n",
704 |     "# X axis is the hour of the day\n",
705 |     "\n",
706 |     "fig, ax1 = plt.subplots(figsize=(20,10))\n",
707 |     "\n",
708 |     "color = 'tab:orange'\n",
709 |     "\n",
710 |     "ax1.set_xlabel('Hour of Day')\n",
711 |     "ax1.set_ylabel('BTUs')\n",
712 |     "p1, = ax1.plot(btus.index, btus['Electrical BTUs'],label=\"Electrical Use in BTUs\", color=color)\n",
713 |     "p2, = ax1.plot(btus.index, btus['Gas BTUs'], label=\"Gas use in BTUs\", color='tab:green')\n",
714 |     "p3, = ax1.plot(btus.index, btus['Water BTUs'], label=\"Water use in BTUs\", color='tab:purple')\n",
715 |     "p4, = ax1.plot(btus.index, btus['Sum BTUs'], label=\"Sum BTUs\", color='tab:red')\n",
716 |     "plt.legend(loc='upper left')\n",
717 |     "ax1.tick_params(axis='y', labelcolor=color)\n",
718 |     "\n",
719 |     "ax2 = ax1.twinx()\n",
720 |     "color = 'tab:blue'\n",
721 |     "ax2.set_ylabel('Insolation or GHI (W/m^2)')\n",
722 |     "p2, = ax2.plot(btus.index, btus['Insolation or GHI (W/m^2)'], label=\"Insolation or GHI (W/m^2)\", color=color)\n",
723 |     "ax2.tick_params(axis='y', labelcolor=color)\n",
724 |     "\n",
725 |     "plt.legend(loc='upper right')\n",
726 |     "plt.title('Home BTU Usage and Solar Insolation Per Hour of Day')\n",
727 |     "\n",
728 |     "plt.show()"
729 |    ]
730 |   },
731 |   {
732 |    "cell_type": "code",
733 |    "execution_count": null,
734 |    "metadata": {},
735 |    "outputs": [],
736 |    "source": [
737 |     "# let's plot sum BTUs and Insolation\n",
738 |     "fig, ax1 = plt.subplots(figsize=(20,10))\n",
739 |     "\n",
740 |     "color = 'tab:orange'\n",
741 |     "\n",
742 |     "ax1.set_xlabel('Hour of Day')\n",
743 |     "ax1.set_ylabel('BTUs')\n",
744 |     "p4, = ax1.plot(btus.index, btus['Sum BTUs'], label=\"Sum BTUs\", color='tab:red')\n",
745 |     "plt.legend(loc='upper left')\n",
746 |     "ax1.tick_params(axis='y', labelcolor=color)\n",
747 |     "\n",
748 |     "ax2 = ax1.twinx()\n",
749 |     "color = 'tab:blue'\n",
750 |     "ax2.set_ylabel('Insolation or GHI (W/m^2)')\n",
751 |     "p2, = ax2.plot(btus.index, btus['Insolation or GHI (W/m^2)'], label=\"Insolation or GHI (W/m^2)\", color=color)\n",
752 |     "ax2.tick_params(axis='y', labelcolor=color)\n",
753 |     "\n",
754 |     "plt.legend(loc='upper right')\n",
755 |     "plt.title('Sum of Home BTU Usage and Solar Insolation Per Hour of Day')\n",
756 |     "\n",
757 |     "plt.show()"
758 |    ]
759 |   }
760 |  ],
761 |  "metadata": {
762 |   "kernelspec": {
763 |    "display_name": "Python 3",
764 |    "language": "python",
765 |    "name": "python3"
766 |   },
767 |   "language_info": {
768 |    "codemirror_mode": {
769 |     "name": "ipython",
770 |     "version": 3
771 |    },
772 |    "file_extension": ".py",
773 |    "mimetype": "text/x-python",
774 |    "name": "python",
775 |    "nbconvert_exporter": "python",
776 |    "pygments_lexer": "ipython3",
777 |    "version": "3.8.5"
778 |   }
779 |  },
780 |  "nbformat": 4,
781 |  "nbformat_minor": 4
782 | }
783 | 


--------------------------------------------------------------------------------
/Efficiency/Efficiency-Coincidence-Water-Gas-Elec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Efficiency: General visualization of all three inputs of water, gas, and electricity showing the coincidence of use\n",
  8 |     "Here we are using water, gas and energy data and showing coincidence of use by pairing all three data sources.\n",
  9 |     "\n",
 10 |     "To prepare this graph; gas data(water_and_gas.gas_ert table), water data(water_and_gas.water_ert table) and energy data(electricity.eg_realpower_15min view) for year 2014, 2015 and 2016 have been converted into hourly data for 30 homes.\n",
 11 |     "\n",
 12 |     "All three datasets have been joined to find all hours showing coincidence of use in homes. This data has been exported to 'water_gas_energy.csv' file which can be found at `/shared/JupyterHub-Examples-Data/efficiency/water_gas_energy.csv` ."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import pandas as pd\n",
 22 |     "import matplotlib.pyplot as plt\n",
 23 |     "import numpy as np\n",
 24 |     "import csv\n",
 25 |     "import os\n",
 26 |     "import sys\n",
 27 |     "import statistics\n",
 28 |     "%matplotlib inline\n",
 29 |     "print(sys.version) # prints the python version\n",
 30 |     "print(sys.executable)  # prints the path to the python you're using"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "#Read pre-processed data \n",
 40 |     "df1 = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/water_gas_energy.csv')\n",
 41 |     "homes_list = df1.dataid.unique() \n",
 42 |     "homes_list"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "len(homes_list)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "#sample data from data file\n",
 61 |     "df1.head()"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "#convert cubic feet to kWh for gas data\n",
 71 |     "df1['gasuse_kwh'] = df1['gasuse'] * 0.293"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "#describe data\n",
 81 |     "df1.describe()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "# group by dataid and month and take average of gasuse, wateruse and elecuse\n",
 91 |     "df1['datetime'] = pd.to_datetime(df1['hour'])\n",
 92 |     "df1 = df1.set_index('datetime')\n",
 93 |     "\n",
 94 |     "grouped = df1.groupby([pd.Grouper(freq='M'), 'dataid']).mean()\n",
 95 |     "grouped = grouped.reset_index()\n",
 96 |     "grouped"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "#find average use of water, gas and energy per hour per house, for each month\n",
106 |     "grouped = grouped.drop(columns=['dataid'])\n",
107 |     "df2 = grouped.groupby(['datetime']).mean()\n",
108 |     "df2 = df2.reset_index()\n",
109 |     "df2"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "#Plot above calculated data\n",
119 |     "plt.figure(figsize=(20,10))\n",
120 |     "plt.plot(df2['datetime'],df2['gasuse_kwh'],label=\"gas(kWh)\")\n",
121 |     "plt.plot(df2['datetime'],df2['wateruse'],label=\"water(gallons)\")\n",
122 |     "plt.plot(df2['datetime'],df2['elecuse'],label=\"energy(kWh)\")\n",
123 |     "#plt.xticks(np.arange(min(df['datetime']), max(df['datetime'])+1, 3))\n",
124 |     "plt.xlabel('month')\n",
125 |     "plt.legend()\n",
126 |     "\n",
127 |     "plt.show()\n",
128 |     "\n"
129 |    ]
130 |   }
131 |  ],
132 |  "metadata": {
133 |   "kernelspec": {
134 |    "display_name": "Python 3",
135 |    "language": "python",
136 |    "name": "python3"
137 |   },
138 |   "language_info": {
139 |    "codemirror_mode": {
140 |     "name": "ipython",
141 |     "version": 3
142 |    },
143 |    "file_extension": ".py",
144 |    "mimetype": "text/x-python",
145 |    "name": "python",
146 |    "nbconvert_exporter": "python",
147 |    "pygments_lexer": "ipython3",
148 |    "version": "3.8.5"
149 |   }
150 |  },
151 |  "nbformat": 4,
152 |  "nbformat_minor": 4
153 | }
154 | 


--------------------------------------------------------------------------------
/Efficiency/Efficiency-Emissions-Density.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Efficiency Notebooks: Exploring the emissions density of power used by homes\n",
  8 |     "We'll be using data from the Texas ISO, ERCOT, to calculate the emissions amount and emission sources from the generated power used by the electrical grid home usage for 50 homes.\n",
  9 |     "\n",
 10 |     "Entirely solar equipped homes will be used to demonstrate the interplay between solar generation and emissions during this time period (March - August 2018)\n",
 11 |     "\n",
 12 |     "ERCOT emissions/generation data pulled from http://www.ercot.com/content/wcm/lists/181766/FuelMixReport_PreviousYears.zip \n",
 13 |     "which you can find at http://www.ercot.com/gridinfo/generation"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "#import packages\n",
 23 |     "import pandas as pd\n",
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import psycopg2\n",
 26 |     "import sqlalchemy as sqla\n",
 27 |     "import os\n",
 28 |     "import sys\n",
 29 |     "sys.path.insert(0,'..')\n",
 30 |     "from config.read_config import get_database_config\n",
 31 |     "import numpy as np\n",
 32 |     "import statistics\n",
 33 |     "%matplotlib inline\n",
 34 |     "sys.executable  # shows you your path to the python you're using"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# set constants for lbs of CO2 / kWh\n",
 44 |     "_gas_cc_lbs = 0.75\n",
 45 |     "_gas_lbs = 1.0\n",
 46 |     "_coal_lbs = 2.21"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "# read in db credentials from ../config/config.txt\n",
 56 |     "# * make sure you add those to the ../config/config.txt file! *\n",
 57 |     "\n",
 58 |     "## Uncomment the following line to use the live database queries\n",
 59 |     "\n",
 60 |     "database_config = get_database_config(\"../config/config.txt\")\n",
 61 |     "\n",
 62 |     "#"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "# get our DB connection\n",
 72 |     "\n",
 73 |     "# uncomment if you want to use the live queries to the database instead of the prepared data\n",
 74 |     "\n",
 75 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 76 |     "                                                                     database_config['password'],\n",
 77 |     "                                                                     database_config['hostname'],\n",
 78 |     "                                                                     database_config['port'],\n",
 79 |     "                                                                     database_config['database']\n",
 80 |     "                                                                     ))\n"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "#Select a list of Austin homes from dataport metadata with good data availability for grid\n",
 90 |     "query = \"\"\"select distinct dataid, egauge_1min_data_availability, grid, solar from other_datasets.metadata \n",
 91 |     "                                          where grid = 'yes'\n",
 92 |     "                                          and solar = 'yes'\n",
 93 |     "                                          and egauge_1min_min_time <= '2018-03-01' \n",
 94 |     "                                          and egauge_1min_max_time > '2018-09-01'\n",
 95 |     "                                          and city='Austin'\n",
 96 |     "                                          and (egauge_1min_data_availability like '100%' \n",
 97 |     "                                               or \n",
 98 |     "                                               egauge_1min_data_availability like '99%'\n",
 99 |     "                                               or\n",
100 |     "                                               egauge_1min_data_availability like '98%'\n",
101 |     "                                               or\n",
102 |     "                                               egauge_1min_data_availability like '97%'\n",
103 |     "                                               )\n",
104 |     "                                          and gas_ert_min_time <= '2018-03-01'\n",
105 |     "                                          and gas_ert_max_time > '2018-09-01'\n",
106 |     "                                          limit 50\n",
107 |     "                                          ;\n",
108 |     "         \"\"\"\n",
109 |     "\n",
110 |     "df = pd.read_sql_query(sqla.text(query), engine)\n",
111 |     "df"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "# grab dataids and convert them to a string to put into the SQL query\n",
121 |     "dataids_list = df['dataid'].tolist()\n",
122 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
123 |     "dataids_str = ','.join(list(map(str, dataids_list)))\n",
124 |     "dataids_str"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "#Pull electricity data for selected homes.\n",
134 |     "data = \"\"\"select dataid,localminute::timestamp,grid \n",
135 |     "               from electricity.eg_realpower_1min \n",
136 |     "               where localminute >= '2018-03-01' and localminute <  '2018-09-01' \"\"\"\n",
137 |     "data = data + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n",
138 |     "\n",
139 |     "# create a dataframe with the data from the sql query\n",
140 |     "grid_df = pd.read_sql_query(sqla.text(data), engine)\n",
141 |     "\n",
142 |     "grid_df"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "# read in 2018 ERCOT emissions data\n",
152 |     "ercot = pd.read_csv('ercot_emissions_2018.csv')\n",
153 |     "\n",
154 |     "# index by Energy, GWh  \n",
155 |     "ercot = ercot.set_index('Energy, GWh')\n",
156 |     "\n",
157 |     "# remove the commas from the numbers\n",
158 |     "ercot.replace(',','', regex=True, inplace=True)\n",
159 |     "\n",
160 |     "# convert to a float from a string\n",
161 |     "ercot = ercot.astype('float64')\n",
162 |     "\n",
163 |     "ercot"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "# Calc just one of them months and sources for a sanity check\n",
173 |     "perc_coal_mar = ercot.loc['Coal','Mar'] / ercot.loc['Total','Mar']\n",
174 |     "perc_coal_mar"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {},
181 |    "outputs": [],
182 |    "source": [
183 |     "# find the percentages for coal, gas, and gas-cc of the total blend of generation sources from ERCOT for our months and the emissions-producing sources\n",
184 |     "sources = ['Coal', 'Gas', 'Gas-CC']\n",
185 |     "months = ['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug']\n",
186 |     "percs = {}\n",
187 |     "for source in sources:\n",
188 |     "    for month in months:\n",
189 |     "        percs[source + '' + month] = ercot.loc[source, month] / ercot.loc['Total', month]\n",
190 |     "percs"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "# take the mean across the months for each source\n",
200 |     "coal_ave = statistics.mean([percs['CoalMar'], percs['CoalApr'], percs['CoalMay'], percs['CoalJun'], percs['CoalJul'], percs['CoalAug']])\n",
201 |     "gas_ave  = statistics.mean([percs['GasMar'], percs['GasApr'], percs['GasMay'], percs['GasJun'], percs['GasJul'], percs['GasAug']])\n",
202 |     "gascc_ave  = statistics.mean([percs['Gas-CCMar'], percs['Gas-CCApr'], percs['Gas-CCMay'], percs['Gas-CCJun'], percs['Gas-CCJul'], percs['Gas-CCAug']])\n",
203 |     "\n",
204 |     "print ('Coal = {}%'.format(coal_ave * 100))\n",
205 |     "print ('Gas = {}%'.format(gas_ave * 100))\n",
206 |     "print ('Gas-CC = {}%'.format(gascc_ave * 100))"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "# complete the full percentage, fill with the rest of the sources that are largely non-emissions producing\n",
216 |     "the_rest = 1.0 - coal_ave - gas_ave - gascc_ave\n",
217 |     "\n",
218 |     "# pie chart\n",
219 |     "pie_data = [coal_ave, gas_ave, gascc_ave, the_rest]\n",
220 |     "pie_labels = ['Coal', 'Gas', 'Gas-CC', 'Other']\n",
221 |     "explode = [.05, .05, .05, .05] # separates the slices a little bit\n",
222 |     "plt.pie(pie_data, labels=pie_labels, autopct='%1.1f%%', startangle=15, shadow = True, explode=explode)\n",
223 |     "plt.title('ERCOT Generation Percentages')\n",
224 |     "plt.axis('equal')\n",
225 |     "plt.show()"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "# convert ercot table to percentages:\n",
235 |     "def add_percentages(column):\n",
236 |     "    return column / column['Total']\n",
237 |     "\n",
238 |     "ercot_perc = ercot.apply(add_percentages)\n",
239 |     "ercot_perc"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": null,
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": [
248 |     "ercot_perc.index.name = \"% of Generation\"\n",
249 |     "ercot_perc"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "# clean up that percentage table\n",
259 |     "ercot_perc = ercot_perc.drop(index=['Biomass', 'Hydro', 'Nuclear', 'Other', 'Solar', 'Wind', 'Total'], columns=['Jan', 'Feb', 'Sep', 'Oct', 'Nov', 'Dec', 'Total'])\n",
260 |     "ercot_perc"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {},
267 |    "outputs": [],
268 |    "source": [
269 |     "# index by localminute \n",
270 |     "grid_df = grid_df.set_index('localminute')\n",
271 |     "\n",
272 |     "# bring to central timezone\n",
273 |     "grid_df = grid_df.tz_localize(tz='US/Central')\n",
274 |     "grid_df"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {},
281 |    "outputs": [],
282 |    "source": [
283 |     "# drop any rows that have blank grid\n",
284 |     "grid_df = grid_df.dropna(how='any')\n",
285 |     "grid_df"
286 |    ]
287 |   },
288 |   {
289 |    "cell_type": "code",
290 |    "execution_count": null,
291 |    "metadata": {},
292 |    "outputs": [],
293 |    "source": [
294 |     "# calculate the average grid usage of the homes over this time period\n",
295 |     "grouped_grid = grid_df.groupby([pd.Grouper(freq='D')]).mean()\n",
296 |     "grouped_grid['grid'] = grouped_grid['grid'] * 24 # converts daily average grid use/generation to kWh\n",
297 |     "grouped_grid"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "code",
302 |    "execution_count": null,
303 |    "metadata": {},
304 |    "outputs": [],
305 |    "source": [
306 |     "# the above was using the monthly averages from Mar - Aug from ercot all averaged together for each source\n",
307 |     "# let's use the actual monthy averages for each point instead\n",
308 |     "grid_more = grouped_grid"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": null,
314 |    "metadata": {},
315 |    "outputs": [],
316 |    "source": [
317 |     "# extract and addd the month to the dataframe\n",
318 |     "grid_more['Month'] = grid_more.index.strftime('%B')\n",
319 |     "grid_more['Month'] = grid_more['Month'].astype(str)\n",
320 |     "grid_more"
321 |    ]
322 |   },
323 |   {
324 |    "cell_type": "code",
325 |    "execution_count": null,
326 |    "metadata": {},
327 |    "outputs": [],
328 |    "source": [
329 |     "# convert the month to the same 3 letter abbreviation as in the ERCOT table\n",
330 |     "def shorten_month(col):\n",
331 |     "    col['Month'] = col['Month'][0:3]\n",
332 |     "    return col\n",
333 |     "\n",
334 |     "grid_more = grid_more.apply(shorten_month, axis=1)\n",
335 |     "grid_more"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": null,
341 |    "metadata": {},
342 |    "outputs": [],
343 |    "source": [
344 |     "# Assign based on the monthly percentage breakdown\n",
345 |     "def assign_lbs(row):\n",
346 |     "    row['Gas-CC lbs'] = (ercot_perc.loc['Gas-CC', row.Month] * row['grid']) * _gas_cc_lbs\n",
347 |     "    row['Gas lbs'] = (ercot_perc.loc['Gas', row.Month] * row['grid']) * _gas_lbs\n",
348 |     "    row['Coal lbs'] = (ercot_perc.loc['Coal', row.Month] * row['grid']) * _coal_lbs\n",
349 |     "    return row\n",
350 |     "\n",
351 |     "\n",
352 |     "grid_more = grid_more.apply(assign_lbs, axis=1)\n",
353 |     "grid_more"
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {},
360 |    "outputs": [],
361 |    "source": [
362 |     "# don't need these anymore\n",
363 |     "grid_more = grid_more.drop(columns=['dataid', 'Month'])\n",
364 |     "grid_more"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": null,
370 |    "metadata": {},
371 |    "outputs": [],
372 |    "source": [
373 |     "# Add a total CO2 column\n",
374 |     "grid_more['Total CO2'] = grid_more['Gas-CC lbs'] + grid_more['Gas lbs'] + grid_more['Coal lbs']\n",
375 |     "grid_more"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {},
382 |    "outputs": [],
383 |    "source": [
384 |     "grid_more = grid_more.rename({'grid':'Grid Use (kWh)'} , axis='columns')"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "code",
389 |    "execution_count": null,
390 |    "metadata": {},
391 |    "outputs": [],
392 |    "source": [
393 |     "grid_more.plot(figsize=(25,15), title='Daily Grid (kWh) and Emissions in lbs of CO2', grid=True, xlabel='Day', ylabel='kWh or lbs CO2')"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "markdown",
398 |    "metadata": {},
399 |    "source": [
400 |     "# Observations:\n",
401 |     "\n",
402 |     "- These all solar homes have the capacity to offset some of the neighbors' emissions in the \"shoulder months\" by putting energy back on the grid\n",
403 |     "- Total CO2 as measured in lbs/kWh tracks at nearly 1-to-1\n"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": []
412 |   }
413 |  ],
414 |  "metadata": {
415 |   "kernelspec": {
416 |    "display_name": "Python 3",
417 |    "language": "python",
418 |    "name": "python3"
419 |   },
420 |   "language_info": {
421 |    "codemirror_mode": {
422 |     "name": "ipython",
423 |     "version": 3
424 |    },
425 |    "file_extension": ".py",
426 |    "mimetype": "text/x-python",
427 |    "name": "python",
428 |    "nbconvert_exporter": "python",
429 |    "pygments_lexer": "ipython3",
430 |    "version": "3.8.5"
431 |   }
432 |  },
433 |  "nbformat": 4,
434 |  "nbformat_minor": 4
435 | }
436 | 


--------------------------------------------------------------------------------
/Efficiency/Efficiency-Percent-PV-Home-Usage.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Efficiency: What percent of a home’s average daily energy use is provided by its on-site solar PV-generated power\n",
  8 |     "\n",
  9 |     "To prepare the data we selected 25 Texas homes with PV. We used 1 minute energy(from electricity.eg_realpower_1min) data for the selected homes for March 2018 through August 2018."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "#import packages\n",
 19 |     "import pandas as pd\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import psycopg2\n",
 22 |     "import sqlalchemy as sqla\n",
 23 |     "import os\n",
 24 |     "import sys\n",
 25 |     "sys.path.insert(0,'..')\n",
 26 |     "from config.read_config import get_database_config\n",
 27 |     "from datetime import datetime as dt\n",
 28 |     "import numpy as np\n",
 29 |     "%matplotlib inline\n",
 30 |     "sys.executable  # shows you your path to the python you're using"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "#Read pre-processed data \n",
 40 |     "data_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/whole-use-by-solar.zip', compression='zip')\n",
 41 |     "homes_list = data_df.dataid.unique() \n",
 42 |     "homes_list"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "#Function to calculate how much energy use was powered by solar for each home for each interval\n",
 52 |     "def calc_use_powered_by_solar(row):\n",
 53 |     "\n",
 54 |     "    if row['grid'] < 0 and row['solar'] > 0:\n",
 55 |     "\n",
 56 |     "        val = row['total_use']\n",
 57 |     "\n",
 58 |     "    elif row['solar'] > 0 and row['grid'] > 0:\n",
 59 |     "\n",
 60 |     "        val = row['solar']\n",
 61 |     "\n",
 62 |     "    else:\n",
 63 |     "\n",
 64 |     "        val = 0\n",
 65 |     "\n",
 66 |     "    return val"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "#replace null with 0's \n",
 76 |     "data_df = data_df.fillna(0)\n",
 77 |     "\n",
 78 |     "#create new column use.\n",
 79 |     "data_df['total_use'] = data_df['grid'] + data_df['solar']\n",
 80 |     "\n",
 81 |     "data_df['use_powered_by_solar'] = data_df.apply(calc_use_powered_by_solar, axis=1)\n",
 82 |     "\n",
 83 |     "data_df.head(10)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# convert localminute to date\n",
 93 |     "#data_df['dt'] = data_df['localminute'].dt.normalize()\n",
 94 |     "\n",
 95 |     "data_df['localminute'] = pd.to_datetime(data_df['localminute'])\n",
 96 |     "data_df['dt'] = data_df['localminute'].dt.date\n",
 97 |     "data_df.head(10)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {},
104 |    "outputs": [],
105 |    "source": [
106 |     "#calculate home's daily average use powered by solar\n",
107 |     "avg_solar_use_by_day = pd.DataFrame(data_df, columns = ['dt','total_use','use_powered_by_solar'])\n",
108 |     "avg_solar_use_by_day = avg_solar_use_by_day.groupby(['dt']).sum()\n",
109 |     "avg_solar_use_by_day = avg_solar_use_by_day.reset_index()\n",
110 |     "avg_solar_use_by_day['use_from_solar'] = (avg_solar_use_by_day['use_powered_by_solar']/avg_solar_use_by_day['total_use'])*100\n",
111 |     "avg_solar_use_by_day.head(10)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "print(avg_solar_use_by_day['use_from_solar'].mean())"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "#calculate home's weekly average use powered by solar\n",
130 |     "avg_solar_use_by_week = pd.DataFrame(data_df, columns = ['dt','total_use','use_powered_by_solar'])\n",
131 |     "avg_solar_use_by_week['dt'] = pd.to_datetime(avg_solar_use_by_week['dt'])\n",
132 |     "avg_solar_use_by_week['week_num'] = avg_solar_use_by_week['dt'].dt.week\n",
133 |     "#avg_solar_use_by_week['week_num'] = avg_solar_use_by_week['dt'].dt.week\n",
134 |     "avg_solar_use_by_week = avg_solar_use_by_week.groupby(['week_num']).sum()\n",
135 |     "avg_solar_use_by_week = avg_solar_use_by_week.reset_index()\n",
136 |     "avg_solar_use_by_week['use_from_solar'] = (avg_solar_use_by_week['use_powered_by_solar']/avg_solar_use_by_week['total_use'])*100\n",
137 |     "avg_solar_use_by_week.head(10)\n",
138 |     "\n",
139 |     "\n"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "#plot bar chart for average weekly use\n",
149 |     "avg_solar_use_by_week.plot.bar(x='week_num', y='use_from_solar',figsize=(20,10),color='orange',title=\"Homes's average weekly use powered by solar\")\n",
150 |     "plt.show()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "avg_solar_use_by_week['use_from_solar'].mean()"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "## From above analysis we see that ~29% of home’s average daily energy use is provided by its on-site solar PV-generated power."
167 |    ]
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "Python 3",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.8.5"
187 |   }
188 |  },
189 |  "nbformat": 4,
190 |  "nbformat_minor": 4
191 | }
192 | 


--------------------------------------------------------------------------------
/Efficiency/Efficiency-Water_vs_Energy.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Water and energy use correlation notebook:\n",
  8 |     "\n",
  9 |     "### Here we will explore whether there is a relationship between water use and electricity use in our home network.\n",
 10 |     "\n",
 11 |     "We've already exported the electricty and water data to a file in our JupyterHub's shared filesystem which is mounted at `/shared/JupyterHub-Examples-Data/efficiency/elec_water_data.csv`, our methodology for extracting the data is as follows:\n",
 12 |     "\n",
 13 |     "To find correlation between home's energy use and water use, Blucube water data from dataport (water_and_gas.blucube_water_data) and 1-minute interval energy (electricity.eg_realpower_1min) data was used. Blucube data consists of the cumulative device reading (in gallons), so the water usage has been calculated for each interval by subtracting current interval reading from previous interval. After calculating delta usage, only those time intervals with a delta greater than 0 were included in the dataset. This data has then been joined with energy data to find how much electricity was used in those same time intervals when water was used in a home.\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import pandas as pd\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "import csv\n",
 25 |     "import os\n",
 26 |     "import sys\n",
 27 |     "import statistics\n",
 28 |     "%matplotlib inline\n",
 29 |     "print(sys.version) # prints the python version\n",
 30 |     "print(sys.executable)  # prints the path to the python you're using"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# Read processed Data. Blucube water data from dataport has been processed to calculate delta \n",
 40 |     "# water usage for each minute interval. Only those intervals have been considered where water usage > 0.\n",
 41 |     "data = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/elec_water_data.csv')\n",
 42 |     "homes_list = data.dataid.unique() \n",
 43 |     "homes_list"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# Loop through list of homes and find correlation between water and electricity usage and also plot the datapoints\n",
 53 |     "homes_cor = []\n",
 54 |     "for home in homes_list:\n",
 55 |     "    data_to_process = data.loc[(data['dataid'] == home)]\n",
 56 |     "    x = data_to_process[\"water_use\"]\n",
 57 |     "    y = data_to_process[\"elec_use\"]\n",
 58 |     "    correlation = round(x.corr(y),3)\n",
 59 |     "    homes_cor.append(correlation)\n",
 60 |     "    print(str(home) + ' -> ' + str(correlation))\n",
 61 |     "    plt.scatter(x, y, edgecolors='black')\n",
 62 |     "    plt.title('Correlation for home {}'.format(home))\n",
 63 |     "    plt.xlabel('Water Use')\n",
 64 |     "    plt.ylabel('Energy Use')\n",
 65 |     "    plt.show()"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "print(\"Average correlation for all homes: \", statistics.mean(homes_cor))"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Conclusion:\n",
 82 |     "### From above plots and calculated average correlation we can say that water and electricity usage is *not* positively correlated."
 83 |    ]
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "kernelspec": {
 88 |    "display_name": "Python 3",
 89 |    "language": "python",
 90 |    "name": "python3"
 91 |   },
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython",
 95 |     "version": 3
 96 |    },
 97 |    "file_extension": ".py",
 98 |    "mimetype": "text/x-python",
 99 |    "name": "python",
100 |    "nbconvert_exporter": "python",
101 |    "pygments_lexer": "ipython3",
102 |    "version": "3.8.5"
103 |   }
104 |  },
105 |  "nbformat": 4,
106 |  "nbformat_minor": 4
107 | }
108 | 


--------------------------------------------------------------------------------
/Efficiency/ercot_emissions_2018.csv:
--------------------------------------------------------------------------------
 1 | ﻿"Energy, GWh",Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Total
 2 | Biomass,56,43,38,33,66,63,67,58,31,32,32,42,563
 3 | Coal,"8,891","5,899","4,990","6,132","7,524","8,758","9,339","9,491","8,765","8,199","7,704","7,558","93,249"
 4 | Gas,"1,411",661,884,951,"2,779","2,558","4,183","3,661","2,096","2,014","1,326",962,"23,487"
 5 | Gas-CC,"10,690","10,134","9,923","8,801","12,136","14,854","17,438","16,202","13,775","10,404","8,767","10,594","143,719"
 6 | Hydro,32,32,53,61,60,49,44,55,38,139,132,113,811
 7 | Nuclear,"3,809","3,433","3,528","2,783","3,746","3,593","3,706","3,622","3,589","2,909","3,297","3,111","41,125"
 8 | Other,3,1,2,2,3,2,4,1,2,3,3,2,29
 9 | Solar,190,165,241,297,340,368,367,354,257,209,250,203,"3,240"
10 | Wind,"6,237","5,534","6,625","6,717","7,060","7,059","4,293","5,569","3,778","5,227","5,592","6,105","69,796"
11 | Total,"31,319","25,902","26,285","25,777","33,715","37,304","39,440","39,014","32,331","29,138","27,105","28,690","376,019"


--------------------------------------------------------------------------------
/ElectricVehicle/Data-Extraction--EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Data Extraction for Critical Peak Power Events Notebooks: Exploring how EV charging aligns with Texas's critical peak power events by homes\n",
  8 |     "\n",
  9 |     "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n",
 10 |     "\n",
 11 |     "<p>We will be using data from ERCOT's 4CP calculations to determine how residential homes EV charging habits align with those Peak power events.<br><br>\n",
 12 |     "ERCOT 4CP data is pulled from http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey</p>\n",
 13 |     "\n",
 14 |     "<br>\n",
 15 |     "You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook in the analysis notebook."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "#import packages\n",
 25 |     "import pandas as pd\n",
 26 |     "import matplotlib.pyplot as plt\n",
 27 |     "import psycopg2\n",
 28 |     "import sqlalchemy as sqla\n",
 29 |     "import os\n",
 30 |     "import sys\n",
 31 |     "sys.path.insert(0,'..')\n",
 32 |     "from config.read_config import get_database_config\n",
 33 |     "import numpy as np\n",
 34 |     "%matplotlib inline\n",
 35 |     "sys.executable  # shows you your path to the python you're using"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "%matplotlib inline"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# read in db credentials from config/config.txt\n",
 54 |     "# * make sure you add those to the config/config.txt file! *\n",
 55 |     "\n",
 56 |     "database_config = get_database_config(\"../config/config.txt\")"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# get our DB connection\n",
 66 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 67 |     "                                                                     database_config['password'],\n",
 68 |     "                                                                     database_config['hostname'],\n",
 69 |     "                                                                     database_config['port'],\n",
 70 |     "                                                                     database_config['database']\n",
 71 |     "                                                                     ))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "# These are the ERCOT 4CP events for 2016 - 2019 acquired from\n",
 81 |     "# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey\n",
 82 |     "\n",
 83 |     "event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',\n",
 84 |     "               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',\n",
 85 |     "               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',\n",
 86 |     "               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:15:00-05'\n",
 87 |     "              ]\n",
 88 |     "event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',\n",
 89 |     "               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',\n",
 90 |     "               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',\n",
 91 |     "               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:30:00-05']"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "#\n",
101 |     "#\n",
102 |     "\n",
103 |     "query = \"\"\"select dataid, car1 from other_datasets.metadata \n",
104 |     "where car1 is not null \n",
105 |     "and grid is not null\n",
106 |     "and egauge_1min_min_time < '2016-06-15'\n",
107 |     "and egauge_1min_max_time > '2019-09-06'\n",
108 |     "LIMIT 25\n",
109 |     "\"\"\"\n",
110 |     "\n",
111 |     "# create a dataframe with the data from the sql query\n",
112 |     "df = pd.read_sql_query(sqla.text(query), engine)\n",
113 |     "df"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "# grab dataids and convert them to a string to put into the SQL Query\n",
123 |     "dataids_list = df['dataid'].tolist()\n",
124 |     "dataids_list\n",
125 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
126 |     "dataids_str = ','.join(list(map(str, dataids_list)))\n",
127 |     "dataids_str\n",
128 |     "dataids_list"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "# Assemble the SQL query to pull the data for the selected dataids\n",
138 |     "\n",
139 |     "first_start = event_start_dates.pop(0)\n",
140 |     "first_end = event_end_dates.pop(0)\n",
141 |     "query_2 = \"\"\"\n",
142 |     "SELECT dataid, localminute, car1, solar, grid FROM electricity.eg_realpower_1min\n",
143 |     "WHERE ((localminute >= '{}' AND localminute <= '{}') \"\"\".format(first_start, first_end)\n",
144 |     "\n",
145 |     "for start, end in zip(event_start_dates, event_end_dates):\n",
146 |     "    query_2 =  query_2 + \"OR (localminute >= '{}' AND localminute <= '{}') \".format(start, end)\n",
147 |     "    \n",
148 |     "query_2 = query_2 + \"\"\" ) AND dataid in ({})\"\"\".format(dataids_str)\n",
149 |     "\n",
150 |     "# here's what that query is\n",
151 |     "print(\"sql query is \\n\" + query_2)\n",
152 |     "\n",
153 |     "# create a dataframe with the data from the sql query\n",
154 |     "df2 = pd.read_sql_query(sqla.text(query_2), engine)\n",
155 |     "\n",
156 |     "\n",
157 |     "df2.head(15)\n"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [],
165 |    "source": [
166 |     "# export the data to a csv file\n",
167 |     "compression_opts = dict(method='zip',\n",
168 |     "                        archive_name='ev_charging_alignmnent_tx_4cp_events.csv')\n",
169 |     "df2.to_csv('ev_charging_alignmnent_tx_4cp_events.zip', index=False,\n",
170 |     "          compression=compression_opts)"
171 |    ]
172 |   }
173 |  ],
174 |  "metadata": {
175 |   "kernelspec": {
176 |    "display_name": "Python 3",
177 |    "language": "python",
178 |    "name": "python3"
179 |   },
180 |   "language_info": {
181 |    "codemirror_mode": {
182 |     "name": "ipython",
183 |     "version": 3
184 |    },
185 |    "file_extension": ".py",
186 |    "mimetype": "text/x-python",
187 |    "name": "python",
188 |    "nbconvert_exporter": "python",
189 |    "pygments_lexer": "ipython3",
190 |    "version": "3.8.5"
191 |   }
192 |  },
193 |  "nbformat": 4,
194 |  "nbformat_minor": 4
195 | }
196 | 


--------------------------------------------------------------------------------
/ElectricVehicle/Data-Extraction--EV_charging_alignment_with_at_home_rooftop_solar.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dataport Database Extration for: Exploring how EV charging aligns with rooftop solar generation by homes\n",
  8 |     "\n",
  9 |     "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n",
 10 |     "\n",
 11 |     "\n",
 12 |     "<p>We will be using Pecan Street Inc. data from Dataport to determine how electric vehicle charging aligns with rooftop solar generation.\n",
 13 |     "    \n",
 14 |     "<br>Data from 24 homes with fairly complete data for the year 2018 is used to explore this question.\n",
 15 |     "    \n",
 16 |     "<br>\n",
 17 |     "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.</p>\n",
 18 |     "\n",
 19 |     "<br>\n",
 20 |     "You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "#import packages\n",
 30 |     "import pandas as pd\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "import psycopg2\n",
 33 |     "import sqlalchemy as sqla\n",
 34 |     "import os\n",
 35 |     "import sys\n",
 36 |     "sys.path.insert(0,'..')\n",
 37 |     "from config.read_config import get_database_config\n",
 38 |     "import numpy as np\n",
 39 |     "%matplotlib inline\n",
 40 |     "sys.executable  # shows you your path to the python you're using"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "# read in db credentials from ../config/config.txt\n",
 50 |     "# * make sure you add those to the ../config/config.txt file! *\n",
 51 |     "\n",
 52 |     "database_config = get_database_config(\"../config/config.txt\")"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# get our DB connection\n",
 62 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 63 |     "                                                                     database_config['password'],\n",
 64 |     "                                                                     database_config['hostname'],\n",
 65 |     "                                                                     database_config['port'],\n",
 66 |     "                                                                     database_config['database']\n",
 67 |     "                                                                     ))\n"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "#Select a list of Texas homes from dataport metadata having CAR and solar configured and also has data for year 2018.\n",
 77 |     "\n",
 78 |     "query = \"\"\"select distinct dataid from other_datasets.metadata \n",
 79 |     "                                          where car1='yes' and solar='yes' \n",
 80 |     "                                          and egauge_1min_min_time < '2018-01-01' \n",
 81 |     "                                          and egauge_1min_max_time > '2019-01-01'\n",
 82 |     "                                          and state='Texas'\n",
 83 |     "                                          and (egauge_1min_data_availability like '100%' \n",
 84 |     "                                               or \n",
 85 |     "                                               egauge_1min_data_availability like '99%')\n",
 86 |     "                                          LIMIT 25\n",
 87 |     "                                          ;\n",
 88 |     "         \"\"\"\n",
 89 |     "\n",
 90 |     "df = pd.read_sql_query(sqla.text(query), engine)\n"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# grab dataids and convert them to a string to put into the SQL query\n",
100 |     "dataids_list = df['dataid'].tolist()\n",
101 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
102 |     "dataids_str = ','.join(list(map(str, dataids_list)))\n",
103 |     "dataids_str\n",
104 |     "dataids_list"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "#Check data completeness for dataids selected from metadata above.\n",
114 |     "\n",
115 |     "query2 = \"\"\"select dataid,count(*) total_rec from electricity.eg_realpower_1min \n",
116 |     "            where dataid in ({})\"\"\".format(dataids_str)\n",
117 |     "query2 = query2 + \"\"\" and localminute >= '2018-01-01' and localminute < '2019-01-01' group by 1\"\"\"\n",
118 |     "\n",
119 |     "df2 = pd.read_sql_query(sqla.text(query2), engine)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "#Select homes with atleast 90% data availability for year 2018.\n",
129 |     "df2['perc'] = (df2['total_rec']/525600)*100\n",
130 |     "final_dataids = df2[df2['perc'] >= 90]\n",
131 |     "final_dataids['dataid'].count()"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "# Pull data for homes\n",
141 |     "final_dataids_list = final_dataids['dataid'].tolist()\n",
142 |     "print(\"{} dataids selected listed here:\".format(len(final_dataids_list)))\n",
143 |     "final_dataids_str = ','.join(list(map(str, final_dataids_list)))\n",
144 |     "final_dataids_str\n",
145 |     "final_dataids_list"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "#fall\n",
155 |     "fall = \"\"\"select localminute::timestamp,car1,solar,grid \n",
156 |     "               from electricity.eg_realpower_1min \n",
157 |     "               where localminute >= '2018-09-01' and localminute <  '2018-12-01' \"\"\"\n",
158 |     "fall = fall + \"\"\"AND dataid in ({})\"\"\".format(final_dataids_str)\n",
159 |     "\n",
160 |     "fall_df = pd.read_sql_query(sqla.text(fall), engine)\n",
161 |     "\n",
162 |     "fall_df.head(15)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "# export fall to a zipped csv\n",
172 |     "compression_opts = dict(method='zip',\n",
173 |     "                        archive_name='ev_charging_alignmnent_fall.csv')\n",
174 |     "fall_df.to_csv('ev_charging_alignmnent_fall.zip', index=False,\n",
175 |     "          compression=compression_opts)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "#spring\n",
185 |     "spring = \"\"\"select localminute::timestamp,car1,solar,grid \n",
186 |     "               from electricity.eg_realpower_1min \n",
187 |     "               where localminute >= '2018-03-01' and localminute <  '2018-06-01' \"\"\"\n",
188 |     "spring = spring + \"\"\"AND dataid in ({})\"\"\".format(final_dataids_str)\n",
189 |     "\n",
190 |     "spring_df = pd.read_sql_query(sqla.text(spring), engine)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": [
199 |     "# export spring to a zipped csv\n",
200 |     "compression_opts = dict(method='zip',\n",
201 |     "                        archive_name='ev_charging_alignmnent_spring.csv')\n",
202 |     "spring_df.to_csv('ev_charging_alignmnent_spring.zip', index=False,\n",
203 |     "          compression=compression_opts)"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {},
210 |    "outputs": [],
211 |    "source": [
212 |     "#summer\n",
213 |     "summer = \"\"\"select localminute::timestamp,car1,solar,grid \n",
214 |     "               from electricity.eg_realpower_1min \n",
215 |     "               where localminute >= '2018-06-01' and localminute <  '2018-09-01' \"\"\"\n",
216 |     "summer = summer + \"\"\"AND dataid in ({})\"\"\".format(final_dataids_str)\n",
217 |     "\n",
218 |     "# create a dataframe with the data from the sql query\n",
219 |     "summer_df = pd.read_sql_query(sqla.text(summer), engine)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {},
226 |    "outputs": [],
227 |    "source": [
228 |     "# export summer to a zipped csv\n",
229 |     "compression_opts = dict(method='zip',\n",
230 |     "                        archive_name='ev_charging_alignmnent_summer.csv')\n",
231 |     "summer_df.to_csv('ev_charging_alignmnent_summer.zip', index=False,\n",
232 |     "          compression=compression_opts)"
233 |    ]
234 |   },
235 |   {
236 |    "cell_type": "code",
237 |    "execution_count": null,
238 |    "metadata": {},
239 |    "outputs": [],
240 |    "source": [
241 |     "#winter\n",
242 |     "winter = \"\"\"select localminute::timestamp,car1,solar,grid \n",
243 |     "               from electricity.eg_realpower_1min \n",
244 |     "               where localminute >= '2018-12-01' and localminute <  '2019-03-01' \"\"\"\n",
245 |     "winter = winter + \"\"\"AND dataid in ({})\"\"\".format(final_dataids_str)\n",
246 |     "\n",
247 |     "# create a dataframe with the data from the sql query\n",
248 |     "winter_df = pd.read_sql_query(sqla.text(winter), engine)"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "code",
253 |    "execution_count": null,
254 |    "metadata": {},
255 |    "outputs": [],
256 |    "source": [
257 |     "# export winter to a zipped csv\n",
258 |     "compression_opts = dict(method='zip',\n",
259 |     "                        archive_name='ev_charging_alignmnent_winter.csv')\n",
260 |     "winter_df.to_csv('ev_charging_alignmnent_winter.zip', index=False,\n",
261 |     "          compression=compression_opts)"
262 |    ]
263 |   }
264 |  ],
265 |  "metadata": {
266 |   "kernelspec": {
267 |    "display_name": "Python 3",
268 |    "language": "python",
269 |    "name": "python3"
270 |   },
271 |   "language_info": {
272 |    "codemirror_mode": {
273 |     "name": "ipython",
274 |     "version": 3
275 |    },
276 |    "file_extension": ".py",
277 |    "mimetype": "text/x-python",
278 |    "name": "python",
279 |    "nbconvert_exporter": "python",
280 |    "pygments_lexer": "ipython3",
281 |    "version": "3.8.5"
282 |   }
283 |  },
284 |  "nbformat": 4,
285 |  "nbformat_minor": 4
286 | }
287 | 


--------------------------------------------------------------------------------
/ElectricVehicle/Data-Extraction--EV_overall_household_demand.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dataport Database Extration for Overall Energy Demand Notebook: Exploring how a homes overall energy demand is attributable to at-home EV charging\n",
  8 |     "\n",
  9 |     "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n",
 10 |     "\n",
 11 |     "<p>We will be using Pecan Street Inc. data from dataport to calculate how much overall energy demand is used in homes by electric vehicle charging.<br><br>\n",
 12 |     "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.</p>\n",
 13 |     "\n",
 14 |     "<p>You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook in the analysis notebook.</p>"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# import packages\n",
 24 |     "import pandas as pd\n",
 25 |     "import psycopg2\n",
 26 |     "import sqlalchemy as sqla\n",
 27 |     "import os\n",
 28 |     "import sys\n",
 29 |     "sys.path.insert(0,'..')\n",
 30 |     "from config.read_config import get_database_config\n",
 31 |     "%matplotlib inline\n",
 32 |     "sys.executable  # shows you your path to the python you're using"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# read in db credentials from config/config.txt\n",
 42 |     "# * make sure you add those to the config/config.txt file! *\n",
 43 |     "\n",
 44 |     "database_config = get_database_config(\"../config/config.txt\")"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# get our DB connection\n",
 54 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 55 |     "                                                                     database_config['password'],\n",
 56 |     "                                                                     database_config['hostname'],\n",
 57 |     "                                                                     database_config['port'],\n",
 58 |     "                                                                     database_config['database']\n",
 59 |     "                                                                     ))"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# Select a list of Texas homes from dataport metadata having an electrical vehicle (car1) and also has data for year 2018.\n",
 69 |     "query = \"\"\"select distinct dataid from other_datasets.metadata \n",
 70 |     "                                          where car1='yes' and grid='yes'\n",
 71 |     "                                          and egauge_1min_min_time < '2018-01-01' \n",
 72 |     "                                          and egauge_1min_max_time > '2019-01-01'\n",
 73 |     "                                          and state='Texas'\n",
 74 |     "                                          and (egauge_1min_data_availability like '100%' \n",
 75 |     "                                               or \n",
 76 |     "                                               egauge_1min_data_availability like '99%')\n",
 77 |     "                                               LIMIT 25;\n",
 78 |     "         \"\"\"\n",
 79 |     "\n",
 80 |     "df = pd.read_sql_query(sqla.text(query), engine)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# grab dataids and convert them to a string to put into the SQL query\n",
 90 |     "dataids_list = df['dataid'].tolist()\n",
 91 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
 92 |     "dataids_str = ','.join(list(map(str, dataids_list)))\n",
 93 |     "dataids_str\n",
 94 |     "dataids_list"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "# Check data completeness for dataids selected from metadata above.\n",
104 |     "## Warning: This query takes some time to run.\n",
105 |     "query2 = \"\"\"select dataid,count(*) total_rec from electricity.eg_realpower_1min \n",
106 |     "            where dataid in ({})\"\"\".format(dataids_str)\n",
107 |     "query2 = query2 + \"\"\" and localminute >= '2018-01-01' and localminute < '2019-01-01' group by 1\"\"\"\n",
108 |     "\n",
109 |     "df2 = pd.read_sql_query(sqla.text(query2), engine)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# Select homes with atleast 99% data availability for year 2018.\n",
119 |     "df2['perc'] = (df2['total_rec']/525600)*100\n",
120 |     "final_dataids = df2[df2['perc'] >= 99]\n",
121 |     "final_dataids['dataid'].count()"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "# assemble list of selected homes\n",
131 |     "final_dataids_list = final_dataids['dataid'].tolist()\n",
132 |     "print(\"{} dataids selected listed here:\".format(len(final_dataids_list)))\n",
133 |     "final_dataids_str = ','.join(list(map(str, final_dataids_list)))\n",
134 |     "final_dataids_str\n",
135 |     "final_dataids_list"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "# now go pull the data for the selected homes\n",
145 |     "data_pull = \"\"\"select localminute::timestamp,car1,grid,solar \n",
146 |     "               from electricity.eg_realpower_1min \n",
147 |     "               where localminute >= '2018-03-01' and localminute <  '2018-06-01' \"\"\"\n",
148 |     "data_pull = data_pull + \"\"\"AND dataid in ({})\"\"\".format(final_dataids_str)\n",
149 |     "\n",
150 |     "data_df = pd.read_sql_query(sqla.text(data_pull), engine)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "data_df"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "# export the data to a csv file\n",
169 |     "compression_opts = dict(method='zip',\n",
170 |     "                        archive_name='ev_overall_household_demand.zip')\n",
171 |     "data_df.to_csv('ev_overall_household_demand.zip', index=False,\n",
172 |     "          compression=compression_opts)"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "kernelspec": {
178 |    "display_name": "Python 3",
179 |    "language": "python",
180 |    "name": "python3"
181 |   },
182 |   "language_info": {
183 |    "codemirror_mode": {
184 |     "name": "ipython",
185 |     "version": 3
186 |    },
187 |    "file_extension": ".py",
188 |    "mimetype": "text/x-python",
189 |    "name": "python",
190 |    "nbconvert_exporter": "python",
191 |    "pygments_lexer": "ipython3",
192 |    "version": "3.8.5"
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 4
197 | }
198 | 


--------------------------------------------------------------------------------
/ElectricVehicle/EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Critical Peak Power Events Notebooks: Exploring how EV charging aligns with Texas's critical peak power events by homes\n",
  8 |     "\n",
  9 |     "<p>We will be using data from ERCOT's 4CP calculations to determine how residential homes EV charging habits align with those Peak power events.<br><br>\n",
 10 |     "ERCOT 4CP data is pulled from http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey</p>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pandas as pd\n",
 20 |     "import matplotlib.pyplot as plt\n",
 21 |     "import sys\n",
 22 |     "sys.executable  # shows you your path to the python you're using"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "%matplotlib inline"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "## We've already extracted the data for you into a zipped CSV file and made it available on the JupyterHub server in the directory `/shared/JupyterHub-Examples-Data/ev/`\n",
 39 |     "\n",
 40 |     "If you wish to see how the data was extracted or to manipulate the queries for your own or work with the data from a live database connection, see the notebook named `Data-Extraction--EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb` in this same directory.\n",
 41 |     "That notebook goes through the initial home selection and data extraction, then exports those files to a zipped csv file."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# read in pre-prepared data\n",
 51 |     "df = pd.read_csv('/shared/JupyterHub-Examples-Data/ev/ev_charging_alignmnent_tx_4cp_events.zip', compression='zip')\n",
 52 |     "\n",
 53 |     "df.head(15)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "#create new dataframe with columns dataid,car1 and date.\n",
 63 |     "ev_cp_data = pd.DataFrame(df, columns = ['dataid','car1', 'localminute'])\n",
 64 |     "ev_cp_data.head(10)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "#group data by dataid and date and take average ofcar1. \n",
 74 |     "#This gives us avaerage ev usage for each home for each CP event day.\n",
 75 |     "ev_cp_data_grouped = ev_cp_data.groupby(['localminute','dataid']).mean()\n",
 76 |     "reset_ev_cp_data_grouped = ev_cp_data_grouped.reset_index()\n",
 77 |     "reset_ev_cp_data_grouped.head(10)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "scrolled": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "#filtering data only for those interval when CAR1 is charging..\n",
 89 |     "ev_on = reset_ev_cp_data_grouped[reset_ev_cp_data_grouped['car1'] > 0.1]\n",
 90 |     "ev_on = (ev_on['localminute'].value_counts()/50)*100\n",
 91 |     "print ('Minimum percentage: {}%. Maximum percentage: {}%'.format(ev_on.min(), ev_on.max()))"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "# Observations:\n",
 99 |     "\n",
100 |     "From the above results we can conclude that at least 2% of homes were charging an electric vehicle for each CP event from 2016-2019."
101 |    ]
102 |   }
103 |  ],
104 |  "metadata": {
105 |   "kernelspec": {
106 |    "display_name": "Python 3",
107 |    "language": "python",
108 |    "name": "python3"
109 |   },
110 |   "language_info": {
111 |    "codemirror_mode": {
112 |     "name": "ipython",
113 |     "version": 3
114 |    },
115 |    "file_extension": ".py",
116 |    "mimetype": "text/x-python",
117 |    "name": "python",
118 |    "nbconvert_exporter": "python",
119 |    "pygments_lexer": "ipython3",
120 |    "version": "3.8.5"
121 |   }
122 |  },
123 |  "nbformat": 4,
124 |  "nbformat_minor": 4
125 | }
126 | 


--------------------------------------------------------------------------------
/ElectricVehicle/EV_charging_alignment_with_at_home_rooftop_solar.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# At Home Solar Charging notebooks: Exploring how EV charging aligns with rooftop solar generation by homes\n",
  8 |     "\n",
  9 |     "<p>We will be using Pecan Street Inc. data from Dataport to determine how electric vehicle charging aligns with rooftop solar generation.\n",
 10 |     "    \n",
 11 |     "<br>Data from 24 homes with fairly complete data for the year 2018 is used to explore this question.\n",
 12 |     "    \n",
 13 |     "<br>\n",
 14 |     "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.</p>\n"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "#import packages\n",
 24 |     "import pandas as pd\n",
 25 |     "import matplotlib.pyplot as plt\n",
 26 |     "import psycopg2\n",
 27 |     "import sqlalchemy as sqla\n",
 28 |     "import os\n",
 29 |     "import sys\n",
 30 |     "sys.path.insert(0,'..')\n",
 31 |     "from config.read_config import get_database_config\n",
 32 |     "import numpy as np\n",
 33 |     "%matplotlib inline\n",
 34 |     "sys.executable  # shows you your path to the python you're using"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "## We've already extracted the data for you into the 4 seasons of data for the time period and made them available on the JupyterHub server in the directory `/shared/JupyterHub-Examples-Data/ev/`\n",
 42 |     "\n",
 43 |     "If you wish to manipulate the queries for your own or work with the data from a live database connection, see the notebook named `Data-Extraction--EV_charging_alignment_with_at_home_rooftop_solar.ipynb` in this same directory.\n",
 44 |     "That notebook goes through the initial home selection and data extraction, then exports those files to a zipped csv file."
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# read fall data from the prepared csv.zip file\n",
 54 |     "fall_df = pd.read_csv('/shared/JupyterHub-Examples-Data/ev/ev_charging_alignmnent_fall.zip', compression='zip')\n",
 55 |     "\n",
 56 |     "fall_df.head(15)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# read spring data from the prepared csv.zip file\n",
 66 |     "spring_df = pd.read_csv('/shared/JupyterHub-Examples-Data/ev/ev_charging_alignmnent_spring.zip', compression='zip')\n",
 67 |     "\n",
 68 |     "spring_df.head(15)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# read fall data from the prepared csv.zip file\n",
 78 |     "summer_df = pd.read_csv('/shared/JupyterHub-Examples-Data/ev/ev_charging_alignmnent_summer.zip', compression='zip')\n",
 79 |     "\n",
 80 |     "summer_df.head(15)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# read winter data from the prepared csv.zip file\n",
 90 |     "winter_df = pd.read_csv('/shared/JupyterHub-Examples-Data/ev/ev_charging_alignmnent_winter.zip', compression='zip')\n",
 91 |     "\n",
 92 |     "winter_df.head(15)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "# data analysis for summer\n",
102 |     "\n",
103 |     "#replace null with 0's\n",
104 |     "summer_df = summer_df.fillna(0)\n",
105 |     "# convert localminute to pandas daytime type\n",
106 |     "summer_df['datetime'] = pd.to_datetime(summer_df['localminute'])\n",
107 |     "\n",
108 |     "summer_df = summer_df.set_index('datetime')\n",
109 |     "\n",
110 |     "#create hour column\n",
111 |     "summer_df['hr'] = summer_df.index.hour\n",
112 |     "\n",
113 |     "#create new dataframes with solar, car1, and hour column\n",
114 |     "summer_df_new = pd.DataFrame(summer_df, columns = ['solar', 'car1', 'hr'])\n",
115 |     "\n",
116 |     "# group data based on hour and get avg\n",
117 |     "summer_df_grouped = summer_df_new.groupby(['hr']).mean()\n",
118 |     "plot_summer = summer_df_grouped.reset_index()"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "# plot summer\n",
128 |     "plt.figure(figsize=(20,10))\n",
129 |     "plt.plot(plot_summer['hr'], plot_summer['car1'], label=\"car1\")\n",
130 |     "plt.plot(plot_summer['hr'], plot_summer['solar'], label=\"solar\")\n",
131 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
132 |     "plt.xlabel('hour')\n",
133 |     "plt.legend()\n",
134 |     "\n",
135 |     "plt.show()"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "# Total car1 usuage powered by solar in summer\n",
145 |     "total_car1_summer = summer_df_new['car1'].sum()\n",
146 |     "solar_car1_summer = summer_df_new.loc[summer_df_new['car1'] < summer_df_new['solar'], 'car1'].sum()\n",
147 |     "\n",
148 |     "car1_powered_by_solar_summer = (solar_car1_summer/total_car1_summer) * 100\n",
149 |     "car1_powered_by_solar_summer"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "# data analysis for fall\n",
159 |     "\n",
160 |     "#replace null with 0's\n",
161 |     "fall_df = fall_df.fillna(0)\n",
162 |     "# convert localminute to pandas daytime type\n",
163 |     "fall_df['datetime'] = pd.to_datetime(fall_df['localminute'])\n",
164 |     "\n",
165 |     "fall_df = fall_df.set_index('datetime')\n",
166 |     "\n",
167 |     "#create hour column\n",
168 |     "fall_df['hr'] = fall_df.index.hour\n",
169 |     "\n",
170 |     "#create new dataframes with solar, car1, and hour column\n",
171 |     "fall_df_new = pd.DataFrame(fall_df, columns = ['solar', 'car1', 'hr'])\n",
172 |     "\n",
173 |     "# group data based on hour and get avg\n",
174 |     "fall_df_grouped = fall_df_new.groupby(['hr']).mean()\n",
175 |     "plot_fall = fall_df_grouped.reset_index()"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": null,
181 |    "metadata": {},
182 |    "outputs": [],
183 |    "source": [
184 |     "# plot fall\n",
185 |     "plt.figure(figsize=(20,10))\n",
186 |     "plt.plot(plot_fall['hr'], plot_fall['car1'], label=\"car1\")\n",
187 |     "plt.plot(plot_fall['hr'], plot_fall['solar'], label=\"solar\")\n",
188 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
189 |     "plt.xlabel('hour')\n",
190 |     "plt.legend()\n",
191 |     "\n",
192 |     "plt.show()"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": [
201 |     "# Total car1 usuage powered by solar in fall\n",
202 |     "total_car1_fall = fall_df_new['car1'].sum()\n",
203 |     "solar_car1_fall = fall_df_new.loc[fall_df_new['car1'] < fall_df_new['solar'], 'car1'].sum()\n",
204 |     "\n",
205 |     "car1_powered_by_solar_fall = (solar_car1_fall/total_car1_fall) * 100\n",
206 |     "car1_powered_by_solar_fall"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {},
213 |    "outputs": [],
214 |    "source": [
215 |     "#data analysis for spring\n",
216 |     "\n",
217 |     "#replace null with 0's \n",
218 |     "spring_df = spring_df.fillna(0)\n",
219 |     "\n",
220 |     "\n",
221 |     "# convert localminute to pandas datetime type\n",
222 |     "spring_df['datetime'] = pd.to_datetime(spring_df['localminute'])\n",
223 |     "\n",
224 |     "spring_df = spring_df.set_index('datetime')\n",
225 |     "\n",
226 |     "#create hour column. We will be calculating average hourly load for spring.\n",
227 |     "spring_df['hr'] = spring_df.index.hour\n",
228 |     "\n",
229 |     "#create new dataframes with only solar, car1 and hour column\n",
230 |     "spring_df_new = pd.DataFrame(spring_df, columns = ['solar', 'car1','hr'])\n",
231 |     "\n",
232 |     "#group data based on hour and take avg\n",
233 |     "spring_df_grouped = spring_df_new.groupby(['hr']).mean()\n",
234 |     "plot_spring = spring_df_grouped.reset_index()"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": [
243 |     "#plot spring\n",
244 |     "plt.figure(figsize=(20,10))\n",
245 |     "plt.plot(plot_spring['hr'],plot_spring['car1'],label=\"car1\")\n",
246 |     "plt.plot(plot_spring['hr'],plot_spring['solar'],label=\"solar\")\n",
247 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
248 |     "plt.xlabel('hour')\n",
249 |     "plt.legend()\n",
250 |     "\n",
251 |     "plt.show()"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "#Total car1 usage powered by PV system in spring\n",
261 |     "total_car1_spring = spring_df_new['car1'].sum()\n",
262 |     "solar_car1_spring = spring_df_new.loc[spring_df_new['car1'] < spring_df_new['solar'], 'car1'].sum()\n",
263 |     "\n",
264 |     "car1_powered_by_solar_spring = (solar_car1_spring/total_car1_spring)*100\n",
265 |     "car1_powered_by_solar_spring"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {},
272 |    "outputs": [],
273 |    "source": [
274 |     "#data analysis for winter\n",
275 |     "\n",
276 |     "#replace null with 0's \n",
277 |     "winter_df = winter_df.fillna(0)\n",
278 |     "\n",
279 |     "# convert localminute to pandas datetime type\n",
280 |     "winter_df['datetime'] = pd.to_datetime(winter_df['localminute'])\n",
281 |     "\n",
282 |     "winter_df = winter_df.set_index('datetime')\n",
283 |     "\n",
284 |     "#create hour column. We will be calculating average hourly load for winter.\n",
285 |     "winter_df['hr'] = winter_df.index.hour\n",
286 |     "\n",
287 |     "#create new dataframes with only solar, car1 and hour column\n",
288 |     "winter_df_new = pd.DataFrame(winter_df, columns = ['solar', 'car1','hr'])\n",
289 |     "\n",
290 |     "#group data based on hour and take avg\n",
291 |     "winter_df_grouped = winter_df_new.groupby(['hr']).mean()\n",
292 |     "plot_winter = winter_df_grouped.reset_index()"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "code",
297 |    "execution_count": null,
298 |    "metadata": {},
299 |    "outputs": [],
300 |    "source": [
301 |     "#plot winter\n",
302 |     "plt.figure(figsize=(20,10))\n",
303 |     "plt.plot(plot_winter['hr'],plot_winter['car1'],label=\"car1\")\n",
304 |     "plt.plot(plot_winter['hr'],plot_winter['solar'],label=\"solar\")\n",
305 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
306 |     "plt.xlabel('hour')\n",
307 |     "plt.legend()\n",
308 |     "\n",
309 |     "plt.show()"
310 |    ]
311 |   },
312 |   {
313 |    "cell_type": "code",
314 |    "execution_count": null,
315 |    "metadata": {},
316 |    "outputs": [],
317 |    "source": [
318 |     "#Total car1 usage powered by PV system in winter\n",
319 |     "total_car1_winter = winter_df_new['car1'].sum()\n",
320 |     "solar_car1_winter = winter_df_new.loc[winter_df_new['car1'] < winter_df_new['solar'], 'car1'].sum()\n",
321 |     "\n",
322 |     "car1_powered_by_solar_winter = (solar_car1_winter/total_car1_winter) * 100\n",
323 |     "car1_powered_by_solar_winter"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "import statistics\n",
333 |     "statistics.mean([car1_powered_by_solar_winter, car1_powered_by_solar_spring, car1_powered_by_solar_summer, car1_powered_by_solar_fall])"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "## Observations:\n",
341 |     "From the above graphs through out the year most homes started EV charging arount 9PM to 9:30PM and ended charging around 6AM (outside of solar peak hours). The average percentage of solar powered used for EV charging is around 4.4 %"
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "metadata": {},
348 |    "outputs": [],
349 |    "source": []
350 |   }
351 |  ],
352 |  "metadata": {
353 |   "kernelspec": {
354 |    "display_name": "Python 3",
355 |    "language": "python",
356 |    "name": "python3"
357 |   },
358 |   "language_info": {
359 |    "codemirror_mode": {
360 |     "name": "ipython",
361 |     "version": 3
362 |    },
363 |    "file_extension": ".py",
364 |    "mimetype": "text/x-python",
365 |    "name": "python",
366 |    "nbconvert_exporter": "python",
367 |    "pygments_lexer": "ipython3",
368 |    "version": "3.8.5"
369 |   }
370 |  },
371 |  "nbformat": 4,
372 |  "nbformat_minor": 4
373 | }
374 | 


--------------------------------------------------------------------------------
/ElectricVehicle/EV_overall_household_demand.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Overall Energy Demand Notebook: Exploring how a homes overall energy demand is attributable to at-home EV charging\n",
  8 |     "\n",
  9 |     "<p>We will be using Pecan Street Inc. data from dataport to calculate how much overall energy demand is used in homes by electric vehicle charging.<br><br>\n",
 10 |     "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.</p>"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "# import packages\n",
 20 |     "import pandas as pd\n",
 21 |     "import matplotlib.pyplot as plt\n",
 22 |     "import sys\n",
 23 |     "sys.executable  # shows you your path to the python you're using"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## We've already extracted the data for you into a zipped CSV file and made it available on the JupyterHub server in the file `/shared/JupyterHub-Examples-Data/ev/ev_overall_household_demand.zip`\n",
 31 |     "\n",
 32 |     "If you wish to see how the data was extracted or to manipulate the queries for your own or work with the data from a live database connection, see the notebook named `Data-Extraction--EV_overall_household_demand.ipynb` in this same directory.\n",
 33 |     "That notebook goes through the initial home selection and data extraction, then exports those files to a zipped csv file."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# read in pre-prepared data\n",
 43 |     "data_df = pd.read_csv('/shared/JupyterHub-Examples-Data/ev/ev_overall_household_demand.zip', compression='zip')\n",
 44 |     "\n",
 45 |     "data_df.head(15)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "#replace null with 0's\n",
 55 |     "day_df = data_df.fillna(0)\n",
 56 |     "\n",
 57 |     "# convert localminute to pandas daytime type\n",
 58 |     "day_df['datetime'] = pd.to_datetime(day_df['localminute'])\n",
 59 |     "\n",
 60 |     "day_df = day_df.set_index('datetime')\n",
 61 |     "\n",
 62 |     "#create day column\n",
 63 |     "day_df['day'] = day_df.index.day\n",
 64 |     "\n",
 65 |     "#create new dataframes with grid, solar, car1, and day column\n",
 66 |     "day_df_new = pd.DataFrame(day_df, columns = ['grid', 'solar', 'car1', 'day'])\n",
 67 |     "\n",
 68 |     "# group data based on day and get avg\n",
 69 |     "day_df_grouped = day_df_new.groupby(['day']).mean()\n",
 70 |     "\n",
 71 |     "day_df_grouped['demand'] = day_df_grouped['car1'] / (day_df_grouped['grid']+ day_df_grouped['solar']) * 100\n",
 72 |     "\n",
 73 |     "day_df_grouped.head(10)\n"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "print(day_df_grouped['demand'].mean())"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "#replace null with 0's\n",
 92 |     "week_df = data_df.fillna(0)\n",
 93 |     "# convert localminute to pandas daytime type\n",
 94 |     "week_df['datetime'] = pd.to_datetime(week_df['localminute'])\n",
 95 |     "\n",
 96 |     "week_df = week_df.set_index('datetime')\n",
 97 |     "\n",
 98 |     "#create day column\n",
 99 |     "week_df['week'] = week_df.index.week\n",
100 |     "\n",
101 |     "#create new dataframes with grid, car1, and week column\n",
102 |     "week_df_new = pd.DataFrame(week_df, columns = ['grid', 'solar', 'car1', 'week'])\n",
103 |     "\n",
104 |     "# group data based on week and get avg\n",
105 |     "week_df_grouped = week_df_new.groupby(['week']).mean()\n",
106 |     "\n",
107 |     "week_df_grouped['demand'] = week_df_grouped['car1'] / (week_df_grouped['grid']+week_df_grouped['solar']) * 100\n",
108 |     "\n",
109 |     "week_df_grouped.head(10)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "print(week_df_grouped['demand'].mean())"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "# Observations:\n",
126 |     "As seen above daily and weekly overall demand of EV chargers stays pretty consistant. On average both daily and week demand is a little over 12%. At most demand peaks at about 16% and at min demand peaks at 10%."
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": []
135 |   }
136 |  ],
137 |  "metadata": {
138 |   "kernelspec": {
139 |    "display_name": "Python 3",
140 |    "language": "python",
141 |    "name": "python3"
142 |   },
143 |   "language_info": {
144 |    "codemirror_mode": {
145 |     "name": "ipython",
146 |     "version": 3
147 |    },
148 |    "file_extension": ".py",
149 |    "mimetype": "text/x-python",
150 |    "name": "python",
151 |    "nbconvert_exporter": "python",
152 |    "pygments_lexer": "ipython3",
153 |    "version": "3.8.5"
154 |   }
155 |  },
156 |  "nbformat": 4,
157 |  "nbformat_minor": 4
158 | }
159 | 


--------------------------------------------------------------------------------
/HVAC/HVAC-Cooling-During-4CP.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# HVAC vs ERCOT 4CP event alignment:\n",
  8 |     "This notebook explores how household cooling aligns with the State of Texas’s critical peak power events.\n",
  9 |     "\n",
 10 |     "We will be using one-minute interval data from 25 Texas homes and will determine what percentage of homes \n",
 11 |     "had air conditioning on during these CP events for each year from 2016-2019."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "To prepare the data we selected 25 Texas homes with PV and hvac system. We used 1 minute energy(from electricity.eg_realpower_1min) data for the selected homes for the CP dates mentioned below."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "# These are the ERCOT 4CP events (start date/time and end date/time) for 2016 - 2019 acquired from\n",
 28 |     "# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey\n",
 29 |     "\n",
 30 |     "event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',\n",
 31 |     "               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',\n",
 32 |     "               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',\n",
 33 |     "               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:15:00-05'\n",
 34 |     "              ]\n",
 35 |     "event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',\n",
 36 |     "               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',\n",
 37 |     "               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',\n",
 38 |     "               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:30:00-05']"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "#import packages\n",
 48 |     "import pandas as pd\n",
 49 |     "import matplotlib.pyplot as plt\n",
 50 |     "import os\n",
 51 |     "from datetime import datetime as dt\n",
 52 |     "import numpy as np\n",
 53 |     "import sys\n",
 54 |     "%matplotlib inline\n",
 55 |     "sys.executable  # shows you your path to the python you're using"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "#Read pre-processed data \n",
 65 |     "df3 = pd.read_csv('/shared/JupyterHub-Examples-Data/hvac/hvac_cooling_4cp.zip', compression='zip')\n",
 66 |     "\n",
 67 |     "homes_list = df3.dataid.unique() "
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "#replace null with 0's \n",
 77 |     "df3 = df3.fillna(0)\n",
 78 |     "df3.head(10)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "#create new column hvac.(hvac is sum of air compressor and furnace circuits.)\n",
 88 |     "df3['hvac'] = df3['air1'] + df3['air2'] + df3['furnace1'] + df3['furnace2']\n",
 89 |     "df3['localminute'] = pd.to_datetime(df3['localminute'])\n",
 90 |     "df3['localminute'] = df3['localminute'].dt.date\n",
 91 |     "df3.head(10)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "#create new dataframe with columns dataid,hvac and date.\n",
101 |     "hvac_cp_data = pd.DataFrame(df3, columns = ['dataid','hvac', 'localminute'])\n",
102 |     "hvac_cp_data.head(10)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "#group data by dataid and date and take average of hvac. \n",
112 |     "#This gives us avaerage hvac usage for each home for each CP event day.\n",
113 |     "hvac_cp_data_grouped = hvac_cp_data.groupby(['localminute','dataid']).mean()\n",
114 |     "reset_hvac_cp_data_grouped = hvac_cp_data_grouped.reset_index()\n",
115 |     "reset_hvac_cp_data_grouped.head(10)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "#filtering data only for those interval when AC is actually cooling..\n",
125 |     "#We consider AC is ON and cooling only when it's drawing power of atleast 500W. \n",
126 |     "\n",
127 |     "hvac_on = reset_hvac_cp_data_grouped[reset_hvac_cp_data_grouped['hvac'] > 0.5]\n",
128 |     "homes_on = (hvac_on['localminute'].value_counts()/25)*100\n",
129 |     "homes_on_perc = pd.DataFrame(homes_on.reset_index().values, columns=[\"Date\", \"Percentage\"])\n",
130 |     "homes_on_percindex = homes_on_perc.sort_index(axis = 0, ascending=True)\n",
131 |     "homes_on_percindex"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "#plot bar chart\n",
141 |     "homes_on_percindex.plot.bar(x='Date', y='Percentage',figsize=(20,10),color='orange',title=\"Home's cooling alignment with Texas CP events\")\n",
142 |     "plt.show()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": null,
148 |    "metadata": {},
149 |    "outputs": [],
150 |    "source": [
151 |     "homes_on_percindex['Percentage'].mean()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "## From above results we can see that nearly 86% (85.5%) of homes were ON and cooling for each 4CP event day from 2016-2019."
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": []
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "Python 3",
172 |    "language": "python",
173 |    "name": "python3"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.8.5"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 4
190 | }
191 | 


--------------------------------------------------------------------------------
/HVAC/HVAC-Solar-Generation-Alignment.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## This notebook explores how solar generation aligns with in-home HVAC use and what percent of the HVAC energy load is powered by the home’s rooftop solar over each season for one year\n",
  8 |     "Data from 25 homes has been pre-prepared and is read in from `/shared/JupyterHub-Examples-Data/hvac` on the JupyterHub server."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "#import packages\n",
 18 |     "import pandas as pd\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import os\n",
 21 |     "import numpy as np\n",
 22 |     "import sys\n",
 23 |     "%matplotlib inline\n",
 24 |     "sys.executable  # shows you your path to the python you're using"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "#Read pre-processed data for June 2018 through August 2018\n",
 34 |     "summer_df = pd.read_csv('/shared/JupyterHub-Examples-Data/hvac/hvac_alignment_summer.zip',compression='zip')\n",
 35 |     "summer_df"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "homes_list = summer_df.dataid.unique() \n",
 45 |     "len(homes_list)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "#data analysis for summer\n",
 55 |     "\n",
 56 |     "#replace null with 0's \n",
 57 |     "summer_df = summer_df.fillna(0)\n",
 58 |     "\n",
 59 |     "#create new column hvac.\n",
 60 |     "summer_df['hvac'] = summer_df['air1'] + summer_df['air2'] + summer_df['furnace1'] + summer_df['furnace2']\n",
 61 |     "\n",
 62 |     "# convert localminute to pandas datetime type\n",
 63 |     "summer_df['datetime'] = pd.to_datetime(summer_df['localminute'])\n",
 64 |     "\n",
 65 |     "summer_df = summer_df.set_index('datetime')\n",
 66 |     "\n",
 67 |     "#create hour column. We will be calculating average hourly load for summer.\n",
 68 |     "summer_df['hr'] = summer_df.index.hour\n",
 69 |     "\n",
 70 |     "#create new dataframes with only solar, hvac and hour column\n",
 71 |     "summer_df_new = pd.DataFrame(summer_df, columns = ['solar', 'hvac','hr'])\n",
 72 |     "\n",
 73 |     "#group data based on hour and take avg\n",
 74 |     "summer_df_grouped = summer_df_new.groupby(['hr']).mean()\n",
 75 |     "plot_summer = summer_df_grouped.reset_index()\n",
 76 |     "\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "#plot summer\n",
 86 |     "plt.figure(figsize=(20,10))\n",
 87 |     "plt.plot(plot_summer['hr'],plot_summer['hvac'],label=\"hvac\")\n",
 88 |     "plt.plot(plot_summer['hr'],plot_summer['solar'],label=\"solar\")\n",
 89 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
 90 |     "plt.xlabel('hour')\n",
 91 |     "plt.legend()\n",
 92 |     "\n",
 93 |     "plt.show()"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": null,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "#Total hvac usage powered by solar in summer\n",
103 |     "total_hvac_summer = summer_df_new['hvac'].sum()\n",
104 |     "solar_hvac_summer = summer_df_new.loc[summer_df_new['hvac'] < summer_df_new['solar'], 'hvac'].sum()\n",
105 |     "\n",
106 |     "hvac_powered_by_solar_summer = (solar_hvac_summer/total_hvac_summer)*100\n",
107 |     "hvac_powered_by_solar_summer"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "From above results we can see that 16% of HVAC energy load in summer is powered by the home’s rooftop solar."
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [],
122 |    "source": [
123 |     "#Read pre-processed data for Sep 2018 through Nov 2018\n",
124 |     "fall_df = pd.read_csv('/shared/JupyterHub-Examples-Data/hvac/hvac_alignment_fall.zip',compression='zip')"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {},
131 |    "outputs": [],
132 |    "source": [
133 |     "#data analysis for fall\n",
134 |     "\n",
135 |     "#replace null with 0's \n",
136 |     "fall_df = fall_df.fillna(0)\n",
137 |     "\n",
138 |     "#create new column hvac.\n",
139 |     "fall_df['hvac'] = fall_df['air1'] + fall_df['air2'] + fall_df['furnace1'] + fall_df['furnace2']\n",
140 |     "\n",
141 |     "# convert localminute to pandas datetime type\n",
142 |     "fall_df['datetime'] = pd.to_datetime(fall_df['localminute'])\n",
143 |     "\n",
144 |     "fall_df = fall_df.set_index('datetime')\n",
145 |     "\n",
146 |     "#create hour column. We will be calculating average hourly load for fall.\n",
147 |     "fall_df['hr'] = fall_df.index.hour\n",
148 |     "\n",
149 |     "#create new dataframes with only solar, hvac and hour column\n",
150 |     "fall_df_new = pd.DataFrame(fall_df, columns = ['solar', 'hvac','hr'])\n",
151 |     "\n",
152 |     "#group data based on hour and take avg\n",
153 |     "fall_df_grouped = fall_df_new.groupby(['hr']).mean()\n",
154 |     "plot_fall = fall_df_grouped.reset_index()\n",
155 |     "\n",
156 |     "\n"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "#plot fall\n",
166 |     "plt.figure(figsize=(20,10))\n",
167 |     "plt.plot(plot_fall['hr'],plot_fall['hvac'],label=\"hvac\")\n",
168 |     "plt.plot(plot_fall['hr'],plot_fall['solar'],label=\"solar\")\n",
169 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
170 |     "plt.xlabel('hour')\n",
171 |     "plt.legend()\n",
172 |     "\n",
173 |     "plt.show()\n"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "#Total hvac usage powered by PV system in fall\n",
183 |     "total_hvac_fall = fall_df_new['hvac'].sum()\n",
184 |     "solar_hvac_fall = fall_df_new.loc[fall_df_new['hvac'] < fall_df_new['solar'], 'hvac'].sum()\n",
185 |     "\n",
186 |     "hvac_powered_by_solar_fall = (solar_hvac_fall/total_hvac_fall)*100\n",
187 |     "hvac_powered_by_solar_fall"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "13% of HVAC energy load in Fall is powered by the home’s rooftop solar."
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "#Read pre-processed data for march 2018 through may 2018\n",
204 |     "spring_df = pd.read_csv('/shared/JupyterHub-Examples-Data/hvac/hvac_alignment_spring.zip',compression='zip')"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "metadata": {},
211 |    "outputs": [],
212 |    "source": [
213 |     "#data analysis for spring\n",
214 |     "\n",
215 |     "#replace null with 0's \n",
216 |     "spring_df = spring_df.fillna(0)\n",
217 |     "\n",
218 |     "#create new column hvac.\n",
219 |     "spring_df['hvac'] = spring_df['air1'] + spring_df['air2'] + spring_df['furnace1'] + spring_df['furnace2']\n",
220 |     "\n",
221 |     "# convert localminute to pandas datetime type\n",
222 |     "spring_df['datetime'] = pd.to_datetime(spring_df['localminute'])\n",
223 |     "\n",
224 |     "spring_df = spring_df.set_index('datetime')\n",
225 |     "\n",
226 |     "#create hour column. We will be calculating average hourly load for spring.\n",
227 |     "spring_df['hr'] = spring_df.index.hour\n",
228 |     "\n",
229 |     "#create new dataframes with only solar, hvac and hour column\n",
230 |     "spring_df_new = pd.DataFrame(spring_df, columns = ['solar', 'hvac','hr'])\n",
231 |     "\n",
232 |     "#group data based on hour and take avg\n",
233 |     "spring_df_grouped = spring_df_new.groupby(['hr']).mean()\n",
234 |     "plot_spring = spring_df_grouped.reset_index()"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": [
243 |     "#plot spring\n",
244 |     "plt.figure(figsize=(20,10))\n",
245 |     "plt.plot(plot_spring['hr'],plot_spring['hvac'],label=\"hvac\")\n",
246 |     "plt.plot(plot_spring['hr'],plot_spring['solar'],label=\"solar\")\n",
247 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
248 |     "plt.xlabel('hour')\n",
249 |     "plt.legend()\n",
250 |     "\n",
251 |     "plt.show()"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": null,
257 |    "metadata": {},
258 |    "outputs": [],
259 |    "source": [
260 |     "#Total hvac usage powered by PV system in spring\n",
261 |     "total_hvac_spring = spring_df_new['hvac'].sum()\n",
262 |     "solar_hvac_spring = spring_df_new.loc[spring_df_new['hvac'] < spring_df_new['solar'], 'hvac'].sum()\n",
263 |     "\n",
264 |     "hvac_powered_by_solar_spring = (solar_hvac_spring/total_hvac_spring)*100\n",
265 |     "hvac_powered_by_solar_spring"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "25% of HVAC energy load in spring is powered by the home’s rooftop solar. \n",
273 |     "This is also the highest among all four seasons."
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": null,
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": [
282 |     "#Read pre-processed data for dec 2018 through feb 2019\n",
283 |     "winter_df = pd.read_csv('/shared/JupyterHub-Examples-Data/hvac/hvac_alignment_winter.zip',compression='zip')"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": [
292 |     "#data analysis for winter\n",
293 |     "\n",
294 |     "#replace null with 0's \n",
295 |     "winter_df = winter_df.fillna(0)\n",
296 |     "\n",
297 |     "#create new column hvac.\n",
298 |     "winter_df['hvac'] = winter_df['air1'] + winter_df['air2'] + winter_df['furnace1'] + winter_df['furnace2']\n",
299 |     "\n",
300 |     "# convert localminute to pandas datetime type\n",
301 |     "winter_df['datetime'] = pd.to_datetime(winter_df['localminute'])\n",
302 |     "\n",
303 |     "winter_df = winter_df.set_index('datetime')\n",
304 |     "\n",
305 |     "#create hour column. We will be calculating average hourly load for winter.\n",
306 |     "winter_df['hr'] = winter_df.index.hour\n",
307 |     "\n",
308 |     "#create new dataframes with only solar, hvac and hour column\n",
309 |     "winter_df_new = pd.DataFrame(winter_df, columns = ['solar', 'hvac','hr'])\n",
310 |     "\n",
311 |     "#group data based on hour and take avg\n",
312 |     "winter_df_grouped = winter_df_new.groupby(['hr']).mean()\n",
313 |     "plot_winter = winter_df_grouped.reset_index()"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "#plot winter\n",
323 |     "plt.figure(figsize=(20,10))\n",
324 |     "plt.plot(plot_winter['hr'],plot_winter['hvac'],label=\"hvac\")\n",
325 |     "plt.plot(plot_winter['hr'],plot_winter['solar'],label=\"solar\")\n",
326 |     "plt.xticks(np.arange(0, 24, 1.0))\n",
327 |     "plt.xlabel('hour')\n",
328 |     "plt.legend()\n",
329 |     "\n",
330 |     "plt.show()"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {},
337 |    "outputs": [],
338 |    "source": [
339 |     "#Total hvac usage pwered by PV system in winter\n",
340 |     "total_hvac_winter = winter_df_new['hvac'].sum()\n",
341 |     "solar_hvac_winter = winter_df_new.loc[winter_df_new['hvac'] < winter_df_new['solar'], 'hvac'].sum()\n",
342 |     "\n",
343 |     "hvac_powered_by_solar_winter = (solar_hvac_winter/total_hvac_winter)*100\n",
344 |     "hvac_powered_by_solar_winter"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {},
350 |    "source": [
351 |     "From above season charts we can see that solar and home's HVAC usage peaks at different times except in winter. \n",
352 |     "\n",
353 |     "Solar peaks between 12PM-14PM while HVAC usage increases until 7PM and then starts to drop off. \n",
354 |     "\n",
355 |     "In winter average HVAC usage is almost constant with respect to solar generation.\n",
356 |     "\n",
357 |     "\n",
358 |     "Also, Spring is the season when HVAC load is most powered by home’s PV system. \n",
359 |     "This makes sense because during spring we get enough sunlight but HVAC system are not used as much."
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": []
368 |   }
369 |  ],
370 |  "metadata": {
371 |   "kernelspec": {
372 |    "display_name": "Python 3",
373 |    "language": "python",
374 |    "name": "python3"
375 |   },
376 |   "language_info": {
377 |    "codemirror_mode": {
378 |     "name": "ipython",
379 |     "version": 3
380 |    },
381 |    "file_extension": ".py",
382 |    "mimetype": "text/x-python",
383 |    "name": "python",
384 |    "nbconvert_exporter": "python",
385 |    "pygments_lexer": "ipython3",
386 |    "version": "3.8.5"
387 |   }
388 |  },
389 |  "nbformat": 4,
390 |  "nbformat_minor": 4
391 | }
392 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Pecan Street Inc.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PV/Data-Extraction--PV-South-vs-West.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Data Extraction Notebook for PV: Solar production on south- vs. west-facing rooftop and how solar systems align with 4CP events in Texas\n",
  8 |     "\n",
  9 |     "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n",
 10 |     "\n",
 11 |     "This notebook will explore solar generation around the ERCOT 4CP events and compare West vs South facing solar generation during those events.\n",
 12 |     "\n",
 13 |     "The ERCOT 4CP events are the 15-minute ERCOT grid peak events for each month in June, July, August and September.\n",
 14 |     "\n",
 15 |     "ERCOT uses each large customer’s (including municipal utilities) total energy demand during the 4CP periods in the previous year as the basis for charges in the current year.\n",
 16 |     "\n",
 17 |     "<p>You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook in the analysis notebook.</p>"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import pandas as pd\n",
 27 |     "import psycopg2\n",
 28 |     "import sqlalchemy as sqla\n",
 29 |     "import os\n",
 30 |     "import sys\n",
 31 |     "sys.path.insert(0,'..')\n",
 32 |     "from config.read_config import get_database_config\n",
 33 |     "%matplotlib inline\n",
 34 |     "sys.executable  # shows you your path to the python you're using"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# read in db credentials from config/config.txt\n",
 44 |     "# * make sure you add those to the config/config.txt file! *\n",
 45 |     "\n",
 46 |     "database_config = get_database_config(\"../config/config.txt\")\n"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "# get our DB connection\n",
 56 |     "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n",
 57 |     "                                                                     database_config['password'],\n",
 58 |     "                                                                     database_config['hostname'],\n",
 59 |     "                                                                     database_config['port'],\n",
 60 |     "                                                                     database_config['database']\n",
 61 |     "                                                                     ))\n"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# These are the ERCOT 4CP events (start date/time and end date/time) for 2016 - 2019 acquired from\n",
 71 |     "# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey\n",
 72 |     "\n",
 73 |     "event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',\n",
 74 |     "               '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',\n",
 75 |     "               '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',\n",
 76 |     "               '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:16:00-05'\n",
 77 |     "              ]\n",
 78 |     "event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',\n",
 79 |     "               '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',\n",
 80 |     "               '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',\n",
 81 |     "               '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:31:00-05']"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "# Select the dataids, pv direction, amount of PV of solar homes\n",
 91 |     "# we're selecting homes with just South and West facing PV that have data between the first event and the last event\n",
 92 |     "\n",
 93 |     "\n",
 94 |     "query = \"\"\"\n",
 95 |     "select dataid, pv, pv_panel_direction, total_amount_of_pv, amount_of_west_facing_pv, amount_of_south_facing_pv\n",
 96 |     "from other_datasets.metadata\n",
 97 |     "where pv is not null\n",
 98 |     "and total_amount_of_pv is not null\n",
 99 |     "and grid is not null \n",
100 |     "and solar is not null\n",
101 |     "and pv_panel_direction in ('South', 'West')\n",
102 |     "and egauge_1min_min_time < '2016-06-15'\n",
103 |     "and egauge_1min_max_time > '2019-09-06'\n",
104 |     "LIMIT 32\n",
105 |     "\"\"\"\n",
106 |     "\n",
107 |     "# create a Pandas dataframe with the data from the sql query\n",
108 |     "df = pd.read_sql_query(sqla.text(query), engine)\n",
109 |     "df"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# export homes to csv file\n",
119 |     "compression_opts = dict(method='zip',\n",
120 |     "                        archive_name='pv_south_vs_west_homes.zip')\n",
121 |     "df.to_csv('pv_south_vs_west_homes.zip', index=False,\n",
122 |     "          compression=compression_opts)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "# grab dataids and convert them to a string to put into the SQL query\n",
132 |     "dataids_list = df['dataid'].tolist()\n",
133 |     "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n",
134 |     "dataids_str = ','.join(list(map(str, dataids_list)))\n",
135 |     "dataids_str\n",
136 |     "dataids_list"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "# Assemble the SQL query to pull the data for the selected dataids\n",
146 |     "# \n",
147 |     "first_start = event_start_dates.pop(0)\n",
148 |     "first_end   = event_end_dates.pop(0)\n",
149 |     "query_2 = \"\"\"\n",
150 |     "select dataid, localminute, solar, grid from electricity.eg_realpower_1min \n",
151 |     "where ((localminute >= '{}' and localminute <= '{}') \"\"\".format(first_start, first_end)\n",
152 |     "\n",
153 |     "for start, end in zip(event_start_dates, event_end_dates):\n",
154 |     "    query_2 = query_2 + \"OR (localminute >= '{}' and localminute <= '{}') \".format(start, end)\n",
155 |     "\n",
156 |     "query_2 = query_2 + \"\"\" ) AND dataid in ({})\"\"\".format(dataids_str)\n",
157 |     "\n",
158 |     "# here's what that query is\n",
159 |     "print(\"sql query is \\n\" + query_2)\n",
160 |     "\n",
161 |     "# create a dataframe with the data from the sql query\n",
162 |     "df2 = pd.read_sql_query(sqla.text(query_2), engine)\n",
163 |     "\n",
164 |     "# calculate usage as grid minus solar (which is actually grid + solar because solar is negative use)\n",
165 |     "# Calculate the difference with a lambda function and add it as a new column called 'usage'\n",
166 |     "df2['usage'] = df2.apply(lambda row: row.solar + row.grid, axis=1)\n",
167 |     "df2.head(15)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "# Total number of records in the dataset\n",
177 |     "df2['dataid'].count()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "# export the data to a csv file\n",
187 |     "compression_opts = dict(method='zip',\n",
188 |     "                        archive_name='pv_south_vs_west.zip')\n",
189 |     "df2.to_csv('pv_south_vs_west.zip', index=False,\n",
190 |     "          compression=compression_opts)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {},
197 |    "outputs": [],
198 |    "source": []
199 |   }
200 |  ],
201 |  "metadata": {
202 |   "kernelspec": {
203 |    "display_name": "Python 3",
204 |    "language": "python",
205 |    "name": "python3"
206 |   },
207 |   "language_info": {
208 |    "codemirror_mode": {
209 |     "name": "ipython",
210 |     "version": 3
211 |    },
212 |    "file_extension": ".py",
213 |    "mimetype": "text/x-python",
214 |    "name": "python",
215 |    "nbconvert_exporter": "python",
216 |    "pygments_lexer": "ipython3",
217 |    "version": "3.8.5"
218 |   }
219 |  },
220 |  "nbformat": 4,
221 |  "nbformat_minor": 4
222 | }
223 | 


--------------------------------------------------------------------------------
/PV/PV-South-vs-West.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PV: Solar production on south- vs. west-facing rooftop and how solar systems align with 4CP events in Texas\n",
  8 |     "\n",
  9 |     "This notebook will explore solar generation around the ERCOT 4CP events and compare West vs South facing solar generation during those events.\n",
 10 |     "\n",
 11 |     "The ERCOT 4CP events are the 15-minute ERCOT grid peak events for each month in June, July, August and September.\n",
 12 |     "\n",
 13 |     "ERCOT uses each large customer’s (including municipal utilities) total energy demand during the 4CP periods in the previous year as the basis for charges in the current year."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import pandas as pd\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "import sys\n",
 25 |     "%matplotlib inline\n",
 26 |     "sys.executable  # shows you your path to the python you're using"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# Read pre-processed data for June 2018 through August 2018\n",
 36 |     "df2 = pd.read_csv('/shared/JupyterHub-Examples-Data/pv/pv_south_vs_west.zip',compression='zip')\n",
 37 |     "df2"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Checking completeness of data. We will consider only those homes that have 100% data for the 4CP dates\n",
 47 |     "data = df2.groupby(['dataid']).size().reset_index(name='counts')\n",
 48 |     "data['perc'] = (data['counts']/256)*100\n",
 49 |     "ndata = data[data['perc'] == 100]\n",
 50 |     "final_dataids = ndata['dataid']\n",
 51 |     "\n",
 52 |     "df3=df2.loc[df2['dataid'].isin(final_dataids)]\n",
 53 |     "df3.count()"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "dataids_list = df3['dataid'].unique()\n",
 63 |     "len(dataids_list)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# drop any missing values\n",
 73 |     "df3 = df3.dropna()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "# convert localminute to pandas datetime type\n",
 83 |     "df3['datetime'] = pd.to_datetime(df3['localminute'])\n",
 84 |     "\n",
 85 |     "# and set as index\n",
 86 |     "df3 = df3.set_index('datetime')\n",
 87 |     "df3"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# group by month and dataid and take the mean of solar, grid, and usage within those groups\n",
 97 |     "grouped = df3.groupby([pd.Grouper(freq='M'), 'dataid']).mean()\n",
 98 |     "grouped"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "# read in the list of homes and direction that they face\n",
108 |     "df = pd.read_csv('/shared/JupyterHub-Examples-Data/pv/pv_south_vs_west_homes.zip',compression='zip')\n",
109 |     "df"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "# map pv direction onto dataids with a merge after resetting the index\n",
119 |     "grouped = grouped.reset_index()\n",
120 |     "grouped = grouped.merge(df, how='left', left_on='dataid', right_on='dataid')\n",
121 |     "grouped"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "# reindex by the date\n",
131 |     "grouped = grouped.set_index('datetime')\n",
132 |     "grouped"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {},
139 |    "outputs": [],
140 |    "source": [
141 |     "# regroup by year and pv_panel_direction and average the values\n",
142 |     "year_west_vs_south = (grouped.groupby([pd.Grouper(freq='Y'),'pv_panel_direction']).mean())\n",
143 |     "\n",
144 |     "# we don't need a mean of the dataids, so we can drop that column now\n",
145 |     "year_west_vs_south = year_west_vs_south.drop(columns=['dataid'])\n",
146 |     "year_west_vs_south"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "markdown",
151 |    "metadata": {},
152 |    "source": []
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {},
158 |    "outputs": [],
159 |    "source": [
160 |     "# now just drop unneeded columns and rearrange them and simplify the view\n",
161 |     "year_west_vs_south = year_west_vs_south.reset_index()\n",
162 |     "year_west_vs_south['year'] = pd.DatetimeIndex(year_west_vs_south['datetime']).year\n",
163 |     "year_west_vs_south = year_west_vs_south[['year', 'pv_panel_direction','solar', 'grid', 'usage']]\n",
164 |     "year_west_vs_south = year_west_vs_south.set_index('year')\n",
165 |     "year_west_vs_south"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "# Result: West Facing Solar Wins!\n",
173 |     "## Despite the higher in-home usage (green bars, 'usage') in the west facing houses the solar production is so high (blue bars, 'solar') that it is still using less power from the grid (orage bars, 'grid') in West-facing vs South-facing home in each paired year. In most cases here, grid is even negative meaning that the West-facing homes are putting power back on the grid."
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "from matplotlib.pyplot import figure\n",
183 |     "plot = year_west_vs_south.plot(kind='bar',figsize=(25,15), title=\"Solar production, Net Grid Usage (home usage from the grid minus solar production), and Home Usage During ERCOT 4CP events\")\n",
184 |     "labels = plot.set_xticklabels(['2016-South', '2016-West', '2017-South', '2017-West', '2018-South', '2018-West', '2019-South', '2019-West'])\n",
185 |     "ylabel = plot.set_ylabel('Usage/Production in kW')\n",
186 |     "xlabel = plot.set_xlabel('South vs West Facing Year')"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": []
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.8.5"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 4
218 | }
219 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # JupyterHub-Examples
 2 | Example Jupyter Notebooks demonstrating use of Pecan Street data on our JupyterHub
 3 | 
 4 | ## Use
 5 | Clone the repository with the command `git clone git@github.com:Pecan-Street/JupyterHub-Examples.git` either on your own computer, or from a terminal on the Pecan Street JupyterHub server.
 6 | 
 7 | Some of the notebooks require a Dataport database account to perform database queries and extract data.
 8 | 
 9 | If you have a Dataport database account, edit the config/config.txt file with the connection information from your https://dataport.pecanstreet.org/access page.
10 | 
11 | If you have questions or feedback, contact us at https://www.pecanstreet.org/contact/
12 | 


--------------------------------------------------------------------------------
/config/config.txt:
--------------------------------------------------------------------------------
1 | [database_config]
2 | hostname = 
3 | database = 
4 | port = 
5 | username = 
6 | password = 
7 | 
8 | 


--------------------------------------------------------------------------------
/config/read_config.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | import traceback
 3 | 
 4 | 
 5 | def get_database_config(config_file):
 6 |     try:
 7 |         config = configparser.ConfigParser()
 8 |         config.read(config_file)
 9 |         return {
10 |             "hostname": config.get("database_config", "hostname"),
11 |             "username": config.get("database_config", "username"),
12 |             "password": config.get("database_config", "password"),
13 |             "port": int(config.get("database_config", "port")),
14 |             "database": config.get("database_config", "database")
15 |         }
16 |     except configparser.Error as e:
17 |         traceback.print_exc()
18 |         print(
19 |             "Error reading database configuration. Does the config/config.txt file exist and have entries for "
20 |             "hostname, username, password, database, and port?")
21 |     except ValueError:
22 |         traceback.print_exc()
23 |         print(
24 |             "Error reading database configuration. Does the config/config.txt file exist and have entries for "
25 |             "hostname, username, password, database, and port?")
26 |     except Exception as e:
27 |         traceback.print_exc()
28 |         print(
29 |             "Error reading database configuration. Does the config/config.txt file exist and have entries for "
30 |             "hostname, username, password, database, and port?")
31 | 


--------------------------------------------------------------------------------