├── .gitignore ├── Efficiency ├── Efficiency-BTUs-data.ipynb ├── Efficiency-BTUs.ipynb ├── Efficiency-Coincidence-Water-Gas-Elec.ipynb ├── Efficiency-Emissions-Density.ipynb ├── Efficiency-Percent-PV-Home-Usage.ipynb ├── Efficiency-Water_vs_Energy.ipynb └── ercot_emissions_2018.csv ├── ElectricVehicle ├── Data-Extraction--EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb ├── Data-Extraction--EV_charging_alignment_with_at_home_rooftop_solar.ipynb ├── Data-Extraction--EV_overall_household_demand.ipynb ├── EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb ├── EV_charging_alignment_with_at_home_rooftop_solar.ipynb └── EV_overall_household_demand.ipynb ├── HVAC ├── AC Comparison JupyterHub Example Book .ipynb ├── HVAC-Cooling-During-4CP.ipynb └── HVAC-Solar-Generation-Alignment.ipynb ├── LICENSE ├── PV ├── Data-Extraction--PV-South-vs-West.ipynb ├── Data-Extraction--PV-storage-savings-4CP.ipynb ├── PV-South-vs-West.ipynb └── PV-storage-savings-4CP.ipynb ├── README.md └── config ├── config.txt └── read_config.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Mac Directory thingies 132 | .DS_Store 133 | -------------------------------------------------------------------------------- /Efficiency/Efficiency-BTUs-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Dataport Database Extraction Notebook for the Efficiency-BTUs Notebook\n", 8 | "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n", 9 | "You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared directory on the JupyterHub server if you would like to use the ones exported by this notebook." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#import packages\n", 19 | "import pandas as pd\n", 20 | "import psycopg2\n", 21 | "import sqlalchemy as sqla\n", 22 | "import os\n", 23 | "import numpy as np\n", 24 | "import sys\n", 25 | "sys.path.insert(0,'..')\n", 26 | "from config.read_config import get_database_config\n", 27 | "%matplotlib inline\n", 28 | "sys.executable # shows you your path to the python you're using\n", 29 | "\n", 30 | "# read in db credentials from config/config.txt\n", 31 | "# * make sure you add those to the config/config.txt file! *\n", 32 | "\n", 33 | "database_config = get_database_config(\"../config/config.txt\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# get our DB connection\n", 43 | "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n", 44 | " database_config['password'],\n", 45 | " database_config['hostname'],\n", 46 | " database_config['port'],\n", 47 | " database_config['database']\n", 48 | " ))" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "#Select a list of Texas homes from dataport metadata having good gas data availabilty\n", 58 | "query = \"\"\"select distinct dataid, egauge_1min_data_availability, gas_data_availability, grid, solar from other_datasets.metadata \n", 59 | " \n", 60 | " where grid = 'yes'\n", 61 | " and egauge_1min_min_time <= '2018-03-01' \n", 62 | " and egauge_1min_max_time > '2018-09-01'\n", 63 | " and city='Austin'\n", 64 | " and (egauge_1min_data_availability like '100%' \n", 65 | " or \n", 66 | " egauge_1min_data_availability like '9%')\n", 67 | " and gas_ert_min_time <= '2018-03-01'\n", 68 | " and gas_ert_max_time > '2018-09-01'\n", 69 | " and\n", 70 | " (\n", 71 | " gas_data_availability like '100%'\n", 72 | " or\n", 73 | " gas_data_availability like '9%'\n", 74 | " or\n", 75 | " gas_data_availability like '8%'\n", 76 | " or\n", 77 | " gas_data_availability like '7%'\n", 78 | " )\n", 79 | " LIMIT 50\n", 80 | " ;\n", 81 | " \"\"\"\n", 82 | "\n", 83 | "df = pd.read_sql_query(sqla.text(query), engine)\n", 84 | "df" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# export to a zipped csv\n", 94 | "compression_opts = dict(method='zip',\n", 95 | " archive_name='efficiency_btus_metadata.csv')\n", 96 | "df.to_csv('efficiency_btus_metadata.zip', index=False,\n", 97 | " compression=compression_opts)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "# grab dataids and convert them to a string to put into the SQL query\n", 107 | "dataids_list = df['dataid'].tolist()\n", 108 | "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n", 109 | "dataids_str = ','.join(list(map(str, dataids_list)))\n", 110 | "dataids_str" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "#Pull electricity data for selected homes.\n", 120 | "data = \"\"\"select dataid,localminute::timestamp,solar,grid \n", 121 | " from electricity.eg_realpower_1min \n", 122 | " where localminute >= '2018-03-01' and localminute < '2018-09-01' \"\"\"\n", 123 | "data = data + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 124 | "\n", 125 | "# create a dataframe with the data from the sql query\n", 126 | "data_df = pd.read_sql_query(sqla.text(data), engine)\n", 127 | "\n", 128 | "data_df" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "# export to a zipped csv\n", 138 | "compression_opts = dict(method='zip',\n", 139 | " archive_name='efficiency_btus_electricity_data.csv')\n", 140 | "data_df.to_csv('efficiency_btus_electricity_data.zip', index=False,\n", 141 | " compression=compression_opts)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# Pull gas data for same homes and time period\n", 151 | "gas_sql = \"\"\"select * \n", 152 | " from water_and_gas.gas_ert \n", 153 | " where readtime >= '2018-03-01' and readtime < '2018-09-01' \"\"\"\n", 154 | "gas_sql = gas_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 155 | "\n", 156 | "# create a dataframe with the data from the sql query\n", 157 | "gas_df = pd.read_sql_query(sqla.text(gas_sql), engine)\n", 158 | "\n", 159 | "gas_df" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# export to a zipped csv\n", 169 | "compression_opts = dict(method='zip',\n", 170 | " archive_name='efficiency_btus_gas_data.csv')\n", 171 | "gas_df.to_csv('efficiency_btus_gas_data.zip', index=False,\n", 172 | " compression=compression_opts)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "# lets go get some blucube water data now\n", 182 | "#Pull data for selected homes.\n", 183 | "water_sql = \"\"\"SELECT * FROM water_and_gas.blucube_water_data\n", 184 | " where epoch_timestamp >= '2018-03-01' and epoch_timestamp < '2018-09-01' \"\"\"\n", 185 | "water_sql = water_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 186 | "\n", 187 | "# create a dataframe with the data from the sql query\n", 188 | "water_df = pd.read_sql_query(sqla.text(water_sql), engine)\n", 189 | "\n", 190 | "water_df" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "# export to a zipped csv\n", 200 | "compression_opts = dict(method='zip',\n", 201 | " archive_name='efficiency_btus_water_data.csv')\n", 202 | "water_df.to_csv('efficiency_btus_water_data.zip', index=False,\n", 203 | " compression=compression_opts)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [] 212 | } 213 | ], 214 | "metadata": { 215 | "kernelspec": { 216 | "display_name": "Python 3", 217 | "language": "python", 218 | "name": "python3" 219 | }, 220 | "language_info": { 221 | "codemirror_mode": { 222 | "name": "ipython", 223 | "version": 3 224 | }, 225 | "file_extension": ".py", 226 | "mimetype": "text/x-python", 227 | "name": "python", 228 | "nbconvert_exporter": "python", 229 | "pygments_lexer": "ipython3", 230 | "version": "3.8.5" 231 | } 232 | }, 233 | "nbformat": 4, 234 | "nbformat_minor": 4 235 | } 236 | -------------------------------------------------------------------------------- /Efficiency/Efficiency-BTUs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Visualization of energy intensity converting electricity use to embedded BTU based on national averages:\n", 8 | "\n", 9 | "The `Efficiency-BTUs-data.ipynb` notebook in this same directory will also perform the database queries and export the files as zipped csv files to this directory. You would just need to change the `pg.read_csv()` calls to look in the current directory instead of the /shared one." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#import packages\n", 19 | "import pandas as pd\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import psycopg2\n", 22 | "import sqlalchemy as sqla\n", 23 | "import os\n", 24 | "import sys\n", 25 | "sys.path.insert(0,'..')\n", 26 | "from config.read_config import get_database_config\n", 27 | "import numpy as np\n", 28 | "%matplotlib inline\n", 29 | "sys.executable # shows you your path to the python you're using" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# read in db credentials from ../config/config.txt\n", 39 | "# * make sure you add those to the ../config/config.txt file! *\n", 40 | "\n", 41 | "## Uncomment the following line to use the live database queries\n", 42 | "'''\n", 43 | "database_config = get_database_config(\"../config/config.txt\")\n", 44 | "'''\n", 45 | "#" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# get our DB connection\n", 55 | "\n", 56 | "# uncomment if you want to use the live queries to the database instead of the prepared data\n", 57 | "'''\n", 58 | "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n", 59 | " database_config['password'],\n", 60 | " database_config['hostname'],\n", 61 | " database_config['port'],\n", 62 | " database_config['database']\n", 63 | " ))\n", 64 | "'''\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "#Select a list of Texas homes from dataport metadata having good gas data availabilty\n", 74 | "\n", 75 | "# Uncomment the following block to use live database queries\n", 76 | "'''\n", 77 | "query = \"\"\"select distinct dataid, egauge_1min_data_availability, gas_data_availability, grid, solar from other_datasets.metadata \n", 78 | " \n", 79 | " where grid = 'yes'\n", 80 | " and egauge_1min_min_time <= '2018-03-01' \n", 81 | " and egauge_1min_max_time > '2018-09-01'\n", 82 | " and city='Austin'\n", 83 | " and (egauge_1min_data_availability like '100%' \n", 84 | " or \n", 85 | " egauge_1min_data_availability like '9%')\n", 86 | " and gas_ert_min_time <= '2018-03-01'\n", 87 | " and gas_ert_max_time > '2018-09-01'\n", 88 | " and\n", 89 | " (\n", 90 | " gas_data_availability like '100%'\n", 91 | " or\n", 92 | " gas_data_availability like '9%'\n", 93 | " or\n", 94 | " gas_data_availability like '8%'\n", 95 | " or\n", 96 | " gas_data_availability like '7%'\n", 97 | " )\n", 98 | " LIMIT 50\n", 99 | " ;\n", 100 | " \"\"\"\n", 101 | "\n", 102 | "df = pd.read_sql_query(sqla.text(query), engine)\n", 103 | "df\n", 104 | "'''\n", 105 | "\n", 106 | "# otherwise we'll read from the prepared/extracted zipped data files\n", 107 | "df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_metadata.zip', compression='zip')\n", 108 | "df" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# grab dataids and convert them to a string to put into the SQL query\n", 118 | "dataids_list = df['dataid'].tolist()\n", 119 | "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n", 120 | "dataids_str = ','.join(list(map(str, dataids_list)))" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "# Uncomment the following block to use live database queries\n", 130 | "\n", 131 | "'''\n", 132 | "#Pull electricity data for selected homes.\n", 133 | "data = \"\"\"select dataid,localminute::timestamp,solar,grid \n", 134 | " from electricity.eg_realpower_1min \n", 135 | " where localminute >= '2018-03-01' and localminute < '2018-09-01' \"\"\"\n", 136 | "data = data + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 137 | "\n", 138 | "# create a dataframe with the data from the sql query\n", 139 | "data_df = pd.read_sql_query(sqla.text(data), engine)\n", 140 | "'''\n", 141 | "\n", 142 | "# otherwise we'll read in the already prepared electricity data\n", 143 | "data_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_electricity_data.zip', compression='zip')\n", 144 | "\n", 145 | "data_df" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "# convert 'localminute' to a datetime\n", 155 | "data_df['datetime'] = pd.to_datetime(data_df['localminute'])\n", 156 | "\n", 157 | "# index by datetime \n", 158 | "data_df = data_df.set_index('datetime')\n", 159 | "\n", 160 | "# bring to central timezone\n", 161 | "data_df = data_df.tz_localize(tz='US/Central')\n", 162 | "data_df" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "# add hour of day to df\n", 172 | "data_df['hour'] = data_df.index.hour\n", 173 | "data_df" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "# If there's no grid value, throw the row out\n", 183 | "data_df.dropna(subset=['grid'], inplace=True)\n", 184 | "data_df" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "# replace solar NaNs with 0\n", 194 | "data_df['solar'].fillna(value=0, inplace=True)\n", 195 | "print(data_df['solar'].isna().sum())\n", 196 | "\n", 197 | "# calculate the use, the grid minus the solar (we're actually adding them because solar generation shows up negative in the database)\n", 198 | "data_df['use'] = data_df['grid'] + data_df['solar']\n", 199 | "data_df" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# group by the hour and take the mean to get the hourly average use\n", 209 | "usage = data_df.groupby(['hour']).mean()\n", 210 | "usage" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "# calc btus for kWh (1kWh = 3412 BTUs)\n", 220 | "usage['Electrical BTUs'] = usage['use'] * 3412\n", 221 | "usage" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# Read in insolation data downloaded from Solar Forecast Arbitor for Austin for the same timeframe\n", 231 | "# https://dashboard.solarforecastarbiter.org/observations/c6d40462-7e49-11e9-aef1-0a580a8003e9\n", 232 | "insol = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/Austin_TX_ghi_2018-03-01T06_00_00+00_00-2018-10-01T06_00_00+00_00.csv.zip', skiprows=2)\n", 233 | "insol['datetime'] = pd.to_datetime(insol['timestamp'])\n", 234 | "insol = insol.set_index('datetime')\n", 235 | "insol = insol.tz_convert(tz='US/Central')\n", 236 | "insol" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": {}, 243 | "outputs": [], 244 | "source": [ 245 | "# add hour of day to df\n", 246 | "insol['hour'] = insol.index.hour\n", 247 | "insol" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "# one could potentially look at the quality flag to determine if we want to keep the row or not\n", 257 | "insol = insol.drop(columns=['quality_flag'])\n", 258 | "insol.describe()" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# group into hour of the day and take the mean\n", 268 | "grouped = insol.groupby(['hour']).mean()\n", 269 | "grouped" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "# Plot insolation vs grid usage for hour of the day\n", 279 | "\n", 280 | "fig, ax1 = plt.subplots(figsize=(20,10))\n", 281 | "\n", 282 | "color = 'tab:orange'\n", 283 | "\n", 284 | "ax1.set_xlabel('Hour of Day')\n", 285 | "ax1.set_ylabel('Insolation or GHI (W/m^2)')\n", 286 | "p1, = ax1.plot(grouped.index, grouped['value'],label=\"Insolation or GHI (W/m^2)\", color=color)\n", 287 | "ax1.tick_params(axis='y', labelcolor=color)\n", 288 | "\n", 289 | "ax2 = ax1.twinx()\n", 290 | "color = 'tab:blue'\n", 291 | "ax2.set_ylabel('Usage (kWh)')\n", 292 | "p2, = ax2.plot(grouped.index, usage['use'], label=\"Usage (kWh)\", color=color)\n", 293 | "ax2.tick_params(axis='y', labelcolor=color)\n", 294 | "\n", 295 | "ax1.legend((p1, p2), ('Insolation (W/m^2)', 'Usage (kWh)'), loc='upper right', shadow=True)\n", 296 | "\n", 297 | "plt.show()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "# Read gas data for same homes and time period\n", 307 | "\n", 308 | "# uncomment the following block to do the live db query\n", 309 | "'''\n", 310 | "gas_sql = \"\"\"select * \n", 311 | " from water_and_gas.gas_ert \n", 312 | " where readtime >= '2018-03-01' and readtime < '2018-09-01' \"\"\"\n", 313 | "gas_sql = gas_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 314 | "\n", 315 | "# create a dataframe with the data from the sql query\n", 316 | "gas_df = pd.read_sql_query(sqla.text(gas_sql), engine)\n", 317 | "\n", 318 | "'''\n", 319 | "\n", 320 | "# read from the prepared csv.zip file\n", 321 | "gas_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_gas_data.zip', compression='zip')\n", 322 | "\n", 323 | "gas_df" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "# convert readtime to a datetime, set the index, and convert to Central Time\n", 333 | "pd.options.display.max_rows = 500\n", 334 | "gas_df['datetime'] = pd.to_datetime(gas_df['readtime'], utc=True)\n", 335 | "gas_df = gas_df.set_index('datetime')\n", 336 | "gas_df = gas_df.tz_convert(tz='US/Central')\n", 337 | "gas_df = gas_df.drop(columns=['readtime'])\n", 338 | "gas_df" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "# the gas (and water data for that matter) are cumulative meter readings, meaning that they gradually increase as more gas flows. \n", 348 | "# So you can have the same reading many times in a row, or you can have gaps with no readings until more gas is used.\n", 349 | "# You can also have the meter get reset to 0, so we're going to do some gymnastics to deal with all of that\n", 350 | "\n", 351 | "# group by dataid and hour then run a diff on the rows\n", 352 | "gas_df_group = gas_df.groupby(['dataid', pd.Grouper(freq='H')]).max().diff()\n", 353 | "\n", 354 | "gas_df_group = gas_df_group.dropna()\n", 355 | "\n", 356 | "# zero out the negative diffs because that's the meter being reset and going from some high number reading to 0 (This Needs Looking AT!!!! IDK if it's the right thing to do!)\n", 357 | "gas_df_group['meter_value'] = gas_df_group['meter_value'].clip(0)\n", 358 | "gas_df_group.describe()" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": null, 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [ 367 | "# gas is measured in ft^3 convert to BTUs with (1CCF = 103,700BTU = 30.4kWh). (Per EIA's calculator, 1 cubic ft of natural gas is 1,037BTU.)\n", 368 | "\n", 369 | "# calculate the BTUs of gas used\n", 370 | "gas_df_group['Gas BTUs'] = gas_df_group['meter_value'] * 1037\n", 371 | "gas_df_group" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "# add hour of day to gas\n", 381 | "gas_df_group = gas_df_group.reset_index()\n", 382 | "gas_df_group = gas_df_group.set_index('datetime')\n", 383 | "gas_df_group['hour'] = gas_df_group.index.hour\n", 384 | "gas_df_group" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "# take the mean over all the same hours of the day across all dataids\n", 394 | "gas_hr_mean = gas_df_group.groupby(['hour']).mean()\n", 395 | "gas_hr_mean" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "# don't need those columns anymore\n", 405 | "gas_hr_mean = gas_hr_mean.drop(columns=['dataid','meter_value'])" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "metadata": {}, 412 | "outputs": [], 413 | "source": [ 414 | "# the first row is a bit meaningless as a diff, so we're dropping it\n", 415 | "gas_hr_mean = gas_hr_mean.drop([0])\n", 416 | "gas_hr_mean" 417 | ] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": null, 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [ 425 | "# Let's have a peek at it\n", 426 | "gas_hr_mean.plot(figsize=(20,10), grid=True, x_compat=True, title=\"Gas Use in BTUs Per Hour of Day\")" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [ 435 | "# combine electrical usage and gas as btus\n", 436 | "combined = pd.merge(left=usage['Electrical BTUs'], right=gas_hr_mean['Gas BTUs'], left_on=usage.index, right_on=gas_hr_mean.index)\n", 437 | "combined = combined.drop(columns=['key_0'])\n", 438 | "combined" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "# let's see how those look on a plot. \n", 448 | "# Plotting with 2 y axes because of the scale difference between them\n", 449 | "\n", 450 | "fig, ax1 = plt.subplots(figsize=(20,10))\n", 451 | "\n", 452 | "color = 'tab:orange'\n", 453 | "\n", 454 | "ax1.set_xlabel('Hour of Day')\n", 455 | "ax1.set_ylabel('Electrical Use as BTUs')\n", 456 | "p1, = ax1.plot(usage.index, usage['Electrical BTUs'],label=\"Electrical Use as BTUs\", color=color)\n", 457 | "ax1.tick_params(axis='y', labelcolor=color)\n", 458 | "\n", 459 | "ax2 = ax1.twinx()\n", 460 | "color = 'tab:blue'\n", 461 | "ax2.set_ylabel('Gas Use as BTUs')\n", 462 | "p2, = ax2.plot(gas_hr_mean.index, gas_hr_mean['Gas BTUs'], label=\"Gas Use as BTUs\", color=color)\n", 463 | "ax2.tick_params(axis='y', labelcolor=color)\n", 464 | "\n", 465 | "ax1.legend((p1, p2), ('Electrical Use as BTUs', 'Gas Use as BTUs'), loc='upper right', shadow=True)\n", 466 | "\n", 467 | "plt.show()" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "# Combining into a single dataframe\n", 477 | "gas_hr_mean['Elec BTUs'] = usage['Electrical BTUs']\n", 478 | "gas_hr_mean" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": {}, 485 | "outputs": [], 486 | "source": [ 487 | "# More combining and renaming\n", 488 | "\n", 489 | "btus = usage.merge(gas_hr_mean, on=['hour'], how='left')\n", 490 | "btus = btus.drop(labels=['dataid', 'solar', 'grid', 'use', 'Elec BTUs'], axis=1)\n", 491 | "btus = btus.rename({'BTUs':'Electrical BTUs', 'Gas BTUs':'Gas BTUs'},axis='columns')\n", 492 | "btus\n" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": null, 498 | "metadata": {}, 499 | "outputs": [], 500 | "source": [ 501 | "# lets go get some blucube water data now\n", 502 | "\n", 503 | "# uncomment this block for a direct database pull\n", 504 | "'''\n", 505 | "#Pull data for selected homes.\n", 506 | "water_sql = \"\"\"SELECT * FROM water_and_gas.blucube_water_data\n", 507 | " where epoch_timestamp >= '2018-03-01' and epoch_timestamp < '2018-09-01' \"\"\"\n", 508 | "water_sql = water_sql + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 509 | "\n", 510 | "# create a dataframe with the data from the sql query\n", 511 | "water_df = pd.read_sql_query(sqla.text(water_sql), engine)\n", 512 | "\n", 513 | "'''\n", 514 | "\n", 515 | "# read in the prepared csv file\n", 516 | "water_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/efficiency_btus_water_data.zip')\n", 517 | "\n", 518 | "water_df" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": {}, 525 | "outputs": [], 526 | "source": [ 527 | "# don't need the meter id\n", 528 | "water_df = water_df.drop(columns=['met_id']) \n", 529 | "water_df" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": null, 535 | "metadata": {}, 536 | "outputs": [], 537 | "source": [ 538 | "# convert 'localminute' to a datetime\n", 539 | "water_df['datetime'] = pd.to_datetime(water_df['epoch_timestamp'], utc=True)\n", 540 | "\n", 541 | "# index by datetime \n", 542 | "water_df = water_df.set_index('datetime')\n", 543 | "\n", 544 | "# bring to central timezone\n", 545 | "water_df = water_df.tz_convert(tz='US/Central')\n", 546 | "water_df" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "water = water_df.drop(columns=['epoch_timestamp'])\n", 556 | "water" 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "execution_count": null, 562 | "metadata": {}, 563 | "outputs": [], 564 | "source": [ 565 | "# group by dataid and hour then run a diff on the rows\n", 566 | "water_group = water.groupby(['dataid', pd.Grouper(freq='H')]).max().diff()\n", 567 | "\n", 568 | "water_group = water_group.dropna()\n", 569 | "\n", 570 | "# zero out the negative diffs because that's the meter being reset and going from some high number reading to 0\n", 571 | "water_group['reading_in_gal'] = water_group['reading_in_gal'].clip(0)\n", 572 | "water_group.describe()" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": null, 578 | "metadata": {}, 579 | "outputs": [], 580 | "source": [ 581 | "water_group" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "# add hour of day to water\n", 591 | "water_group = water_group.reset_index()\n", 592 | "water_group = water_group.set_index('datetime')\n", 593 | "water_group['hour'] = water_group.index.hour\n", 594 | "water_group" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": null, 600 | "metadata": {}, 601 | "outputs": [], 602 | "source": [ 603 | "# take the mean by hour of the day\n", 604 | "water_mean = water_group.groupby(['hour']).mean()\n", 605 | "water_mean = water_mean.drop(columns=['dataid'])\n", 606 | "water_mean" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": {}, 613 | "outputs": [], 614 | "source": [ 615 | "# convert gallons to kWh via 1,000,000 gallons = 5000kWh for Austin (reduced this is 200g to 1kWh, or divide the gallons by 200 to get kWh) (include ref?)\n", 616 | "water_mean['Water BTUs'] = (water_mean['reading_in_gal'] / 200.0) * 3412\n", 617 | "water_mean" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": null, 623 | "metadata": {}, 624 | "outputs": [], 625 | "source": [ 626 | "# again the first row is a bit meaningless because it's a diff against nothing\n", 627 | "water_mean = water_mean.drop([0])\n", 628 | "water_mean = water_mean.drop(columns=['reading_in_gal'])\n", 629 | "water_mean.plot(figsize=(20,10), grid=True, x_compat=True)" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": null, 635 | "metadata": {}, 636 | "outputs": [], 637 | "source": [ 638 | "water_mean" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": null, 644 | "metadata": {}, 645 | "outputs": [], 646 | "source": [ 647 | "btus" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": null, 653 | "metadata": {}, 654 | "outputs": [], 655 | "source": [ 656 | "# combine all three into one dataframe\n", 657 | "btus = btus.merge(water_mean, on=['hour'], how='left')\n", 658 | "btus" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "metadata": {}, 665 | "outputs": [], 666 | "source": [ 667 | "# let's make some pretty pictures\n", 668 | "btus.plot(figsize=(20,10), grid=True, x_compat=True)" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": null, 674 | "metadata": {}, 675 | "outputs": [], 676 | "source": [ 677 | "# now time to bring the insolation in, let's get it named properly\n", 678 | "grouped = grouped.rename({'value':'Insolation or GHI (W/m^2)'} , axis='columns')\n", 679 | "grouped" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": null, 685 | "metadata": {}, 686 | "outputs": [], 687 | "source": [ 688 | "# Add insolation to the btus dataframe\n", 689 | "btus = btus.merge(grouped, on=['hour'], how='left')\n", 690 | "\n", 691 | "# calculate the sum of the 3 different btu usages\n", 692 | "btus['Sum BTUs'] = btus['Electrical BTUs'] + btus['Gas BTUs'] + btus['Water BTUs']\n", 693 | "btus" 694 | ] 695 | }, 696 | { 697 | "cell_type": "code", 698 | "execution_count": null, 699 | "metadata": {}, 700 | "outputs": [], 701 | "source": [ 702 | "# let's plot it all\n", 703 | "# BTUs on one Y axis, and Insolation on the other.\n", 704 | "# X axis is the hour of the day\n", 705 | "\n", 706 | "fig, ax1 = plt.subplots(figsize=(20,10))\n", 707 | "\n", 708 | "color = 'tab:orange'\n", 709 | "\n", 710 | "ax1.set_xlabel('Hour of Day')\n", 711 | "ax1.set_ylabel('BTUs')\n", 712 | "p1, = ax1.plot(btus.index, btus['Electrical BTUs'],label=\"Electrical Use in BTUs\", color=color)\n", 713 | "p2, = ax1.plot(btus.index, btus['Gas BTUs'], label=\"Gas use in BTUs\", color='tab:green')\n", 714 | "p3, = ax1.plot(btus.index, btus['Water BTUs'], label=\"Water use in BTUs\", color='tab:purple')\n", 715 | "p4, = ax1.plot(btus.index, btus['Sum BTUs'], label=\"Sum BTUs\", color='tab:red')\n", 716 | "plt.legend(loc='upper left')\n", 717 | "ax1.tick_params(axis='y', labelcolor=color)\n", 718 | "\n", 719 | "ax2 = ax1.twinx()\n", 720 | "color = 'tab:blue'\n", 721 | "ax2.set_ylabel('Insolation or GHI (W/m^2)')\n", 722 | "p2, = ax2.plot(btus.index, btus['Insolation or GHI (W/m^2)'], label=\"Insolation or GHI (W/m^2)\", color=color)\n", 723 | "ax2.tick_params(axis='y', labelcolor=color)\n", 724 | "\n", 725 | "plt.legend(loc='upper right')\n", 726 | "plt.title('Home BTU Usage and Solar Insolation Per Hour of Day')\n", 727 | "\n", 728 | "plt.show()" 729 | ] 730 | }, 731 | { 732 | "cell_type": "code", 733 | "execution_count": null, 734 | "metadata": {}, 735 | "outputs": [], 736 | "source": [ 737 | "# let's plot sum BTUs and Insolation\n", 738 | "fig, ax1 = plt.subplots(figsize=(20,10))\n", 739 | "\n", 740 | "color = 'tab:orange'\n", 741 | "\n", 742 | "ax1.set_xlabel('Hour of Day')\n", 743 | "ax1.set_ylabel('BTUs')\n", 744 | "p4, = ax1.plot(btus.index, btus['Sum BTUs'], label=\"Sum BTUs\", color='tab:red')\n", 745 | "plt.legend(loc='upper left')\n", 746 | "ax1.tick_params(axis='y', labelcolor=color)\n", 747 | "\n", 748 | "ax2 = ax1.twinx()\n", 749 | "color = 'tab:blue'\n", 750 | "ax2.set_ylabel('Insolation or GHI (W/m^2)')\n", 751 | "p2, = ax2.plot(btus.index, btus['Insolation or GHI (W/m^2)'], label=\"Insolation or GHI (W/m^2)\", color=color)\n", 752 | "ax2.tick_params(axis='y', labelcolor=color)\n", 753 | "\n", 754 | "plt.legend(loc='upper right')\n", 755 | "plt.title('Sum of Home BTU Usage and Solar Insolation Per Hour of Day')\n", 756 | "\n", 757 | "plt.show()" 758 | ] 759 | } 760 | ], 761 | "metadata": { 762 | "kernelspec": { 763 | "display_name": "Python 3", 764 | "language": "python", 765 | "name": "python3" 766 | }, 767 | "language_info": { 768 | "codemirror_mode": { 769 | "name": "ipython", 770 | "version": 3 771 | }, 772 | "file_extension": ".py", 773 | "mimetype": "text/x-python", 774 | "name": "python", 775 | "nbconvert_exporter": "python", 776 | "pygments_lexer": "ipython3", 777 | "version": "3.8.5" 778 | } 779 | }, 780 | "nbformat": 4, 781 | "nbformat_minor": 4 782 | } 783 | -------------------------------------------------------------------------------- /Efficiency/Efficiency-Coincidence-Water-Gas-Elec.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Efficiency: General visualization of all three inputs of water, gas, and electricity showing the coincidence of use\n", 8 | "Here we are using water, gas and energy data and showing coincidence of use by pairing all three data sources.\n", 9 | "\n", 10 | "To prepare this graph; gas data(water_and_gas.gas_ert table), water data(water_and_gas.water_ert table) and energy data(electricity.eg_realpower_15min view) for year 2014, 2015 and 2016 have been converted into hourly data for 30 homes.\n", 11 | "\n", 12 | "All three datasets have been joined to find all hours showing coincidence of use in homes. This data has been exported to 'water_gas_energy.csv' file which can be found at `/shared/JupyterHub-Examples-Data/efficiency/water_gas_energy.csv` ." 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import pandas as pd\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "import numpy as np\n", 24 | "import csv\n", 25 | "import os\n", 26 | "import sys\n", 27 | "import statistics\n", 28 | "%matplotlib inline\n", 29 | "print(sys.version) # prints the python version\n", 30 | "print(sys.executable) # prints the path to the python you're using" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "#Read pre-processed data \n", 40 | "df1 = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/water_gas_energy.csv')\n", 41 | "homes_list = df1.dataid.unique() \n", 42 | "homes_list" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "len(homes_list)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "#sample data from data file\n", 61 | "df1.head()" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "#convert cubic feet to kWh for gas data\n", 71 | "df1['gasuse_kwh'] = df1['gasuse'] * 0.293" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "#describe data\n", 81 | "df1.describe()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# group by dataid and month and take average of gasuse, wateruse and elecuse\n", 91 | "df1['datetime'] = pd.to_datetime(df1['hour'])\n", 92 | "df1 = df1.set_index('datetime')\n", 93 | "\n", 94 | "grouped = df1.groupby([pd.Grouper(freq='M'), 'dataid']).mean()\n", 95 | "grouped = grouped.reset_index()\n", 96 | "grouped" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "#find average use of water, gas and energy per hour per house, for each month\n", 106 | "grouped = grouped.drop(columns=['dataid'])\n", 107 | "df2 = grouped.groupby(['datetime']).mean()\n", 108 | "df2 = df2.reset_index()\n", 109 | "df2" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "#Plot above calculated data\n", 119 | "plt.figure(figsize=(20,10))\n", 120 | "plt.plot(df2['datetime'],df2['gasuse_kwh'],label=\"gas(kWh)\")\n", 121 | "plt.plot(df2['datetime'],df2['wateruse'],label=\"water(gallons)\")\n", 122 | "plt.plot(df2['datetime'],df2['elecuse'],label=\"energy(kWh)\")\n", 123 | "#plt.xticks(np.arange(min(df['datetime']), max(df['datetime'])+1, 3))\n", 124 | "plt.xlabel('month')\n", 125 | "plt.legend()\n", 126 | "\n", 127 | "plt.show()\n", 128 | "\n" 129 | ] 130 | } 131 | ], 132 | "metadata": { 133 | "kernelspec": { 134 | "display_name": "Python 3", 135 | "language": "python", 136 | "name": "python3" 137 | }, 138 | "language_info": { 139 | "codemirror_mode": { 140 | "name": "ipython", 141 | "version": 3 142 | }, 143 | "file_extension": ".py", 144 | "mimetype": "text/x-python", 145 | "name": "python", 146 | "nbconvert_exporter": "python", 147 | "pygments_lexer": "ipython3", 148 | "version": "3.8.5" 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 4 153 | } 154 | -------------------------------------------------------------------------------- /Efficiency/Efficiency-Emissions-Density.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Efficiency Notebooks: Exploring the emissions density of power used by homes\n", 8 | "We'll be using data from the Texas ISO, ERCOT, to calculate the emissions amount and emission sources from the generated power used by the electrical grid home usage for 50 homes.\n", 9 | "\n", 10 | "Entirely solar equipped homes will be used to demonstrate the interplay between solar generation and emissions during this time period (March - August 2018)\n", 11 | "\n", 12 | "ERCOT emissions/generation data pulled from http://www.ercot.com/content/wcm/lists/181766/FuelMixReport_PreviousYears.zip \n", 13 | "which you can find at http://www.ercot.com/gridinfo/generation" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "#import packages\n", 23 | "import pandas as pd\n", 24 | "import matplotlib.pyplot as plt\n", 25 | "import psycopg2\n", 26 | "import sqlalchemy as sqla\n", 27 | "import os\n", 28 | "import sys\n", 29 | "sys.path.insert(0,'..')\n", 30 | "from config.read_config import get_database_config\n", 31 | "import numpy as np\n", 32 | "import statistics\n", 33 | "%matplotlib inline\n", 34 | "sys.executable # shows you your path to the python you're using" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# set constants for lbs of CO2 / kWh\n", 44 | "_gas_cc_lbs = 0.75\n", 45 | "_gas_lbs = 1.0\n", 46 | "_coal_lbs = 2.21" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# read in db credentials from ../config/config.txt\n", 56 | "# * make sure you add those to the ../config/config.txt file! *\n", 57 | "\n", 58 | "## Uncomment the following line to use the live database queries\n", 59 | "\n", 60 | "database_config = get_database_config(\"../config/config.txt\")\n", 61 | "\n", 62 | "#" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": {}, 69 | "outputs": [], 70 | "source": [ 71 | "# get our DB connection\n", 72 | "\n", 73 | "# uncomment if you want to use the live queries to the database instead of the prepared data\n", 74 | "\n", 75 | "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n", 76 | " database_config['password'],\n", 77 | " database_config['hostname'],\n", 78 | " database_config['port'],\n", 79 | " database_config['database']\n", 80 | " ))\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "#Select a list of Austin homes from dataport metadata with good data availability for grid\n", 90 | "query = \"\"\"select distinct dataid, egauge_1min_data_availability, grid, solar from other_datasets.metadata \n", 91 | " where grid = 'yes'\n", 92 | " and solar = 'yes'\n", 93 | " and egauge_1min_min_time <= '2018-03-01' \n", 94 | " and egauge_1min_max_time > '2018-09-01'\n", 95 | " and city='Austin'\n", 96 | " and (egauge_1min_data_availability like '100%' \n", 97 | " or \n", 98 | " egauge_1min_data_availability like '99%'\n", 99 | " or\n", 100 | " egauge_1min_data_availability like '98%'\n", 101 | " or\n", 102 | " egauge_1min_data_availability like '97%'\n", 103 | " )\n", 104 | " and gas_ert_min_time <= '2018-03-01'\n", 105 | " and gas_ert_max_time > '2018-09-01'\n", 106 | " limit 50\n", 107 | " ;\n", 108 | " \"\"\"\n", 109 | "\n", 110 | "df = pd.read_sql_query(sqla.text(query), engine)\n", 111 | "df" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "# grab dataids and convert them to a string to put into the SQL query\n", 121 | "dataids_list = df['dataid'].tolist()\n", 122 | "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n", 123 | "dataids_str = ','.join(list(map(str, dataids_list)))\n", 124 | "dataids_str" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "#Pull electricity data for selected homes.\n", 134 | "data = \"\"\"select dataid,localminute::timestamp,grid \n", 135 | " from electricity.eg_realpower_1min \n", 136 | " where localminute >= '2018-03-01' and localminute < '2018-09-01' \"\"\"\n", 137 | "data = data + \"\"\"AND dataid in ({})\"\"\".format(dataids_str)\n", 138 | "\n", 139 | "# create a dataframe with the data from the sql query\n", 140 | "grid_df = pd.read_sql_query(sqla.text(data), engine)\n", 141 | "\n", 142 | "grid_df" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "# read in 2018 ERCOT emissions data\n", 152 | "ercot = pd.read_csv('ercot_emissions_2018.csv')\n", 153 | "\n", 154 | "# index by Energy, GWh \n", 155 | "ercot = ercot.set_index('Energy, GWh')\n", 156 | "\n", 157 | "# remove the commas from the numbers\n", 158 | "ercot.replace(',','', regex=True, inplace=True)\n", 159 | "\n", 160 | "# convert to a float from a string\n", 161 | "ercot = ercot.astype('float64')\n", 162 | "\n", 163 | "ercot" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "# Calc just one of them months and sources for a sanity check\n", 173 | "perc_coal_mar = ercot.loc['Coal','Mar'] / ercot.loc['Total','Mar']\n", 174 | "perc_coal_mar" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "# find the percentages for coal, gas, and gas-cc of the total blend of generation sources from ERCOT for our months and the emissions-producing sources\n", 184 | "sources = ['Coal', 'Gas', 'Gas-CC']\n", 185 | "months = ['Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug']\n", 186 | "percs = {}\n", 187 | "for source in sources:\n", 188 | " for month in months:\n", 189 | " percs[source + '' + month] = ercot.loc[source, month] / ercot.loc['Total', month]\n", 190 | "percs" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "# take the mean across the months for each source\n", 200 | "coal_ave = statistics.mean([percs['CoalMar'], percs['CoalApr'], percs['CoalMay'], percs['CoalJun'], percs['CoalJul'], percs['CoalAug']])\n", 201 | "gas_ave = statistics.mean([percs['GasMar'], percs['GasApr'], percs['GasMay'], percs['GasJun'], percs['GasJul'], percs['GasAug']])\n", 202 | "gascc_ave = statistics.mean([percs['Gas-CCMar'], percs['Gas-CCApr'], percs['Gas-CCMay'], percs['Gas-CCJun'], percs['Gas-CCJul'], percs['Gas-CCAug']])\n", 203 | "\n", 204 | "print ('Coal = {}%'.format(coal_ave * 100))\n", 205 | "print ('Gas = {}%'.format(gas_ave * 100))\n", 206 | "print ('Gas-CC = {}%'.format(gascc_ave * 100))" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "# complete the full percentage, fill with the rest of the sources that are largely non-emissions producing\n", 216 | "the_rest = 1.0 - coal_ave - gas_ave - gascc_ave\n", 217 | "\n", 218 | "# pie chart\n", 219 | "pie_data = [coal_ave, gas_ave, gascc_ave, the_rest]\n", 220 | "pie_labels = ['Coal', 'Gas', 'Gas-CC', 'Other']\n", 221 | "explode = [.05, .05, .05, .05] # separates the slices a little bit\n", 222 | "plt.pie(pie_data, labels=pie_labels, autopct='%1.1f%%', startangle=15, shadow = True, explode=explode)\n", 223 | "plt.title('ERCOT Generation Percentages')\n", 224 | "plt.axis('equal')\n", 225 | "plt.show()" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "# convert ercot table to percentages:\n", 235 | "def add_percentages(column):\n", 236 | " return column / column['Total']\n", 237 | "\n", 238 | "ercot_perc = ercot.apply(add_percentages)\n", 239 | "ercot_perc" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "ercot_perc.index.name = \"% of Generation\"\n", 249 | "ercot_perc" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "# clean up that percentage table\n", 259 | "ercot_perc = ercot_perc.drop(index=['Biomass', 'Hydro', 'Nuclear', 'Other', 'Solar', 'Wind', 'Total'], columns=['Jan', 'Feb', 'Sep', 'Oct', 'Nov', 'Dec', 'Total'])\n", 260 | "ercot_perc" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "# index by localminute \n", 270 | "grid_df = grid_df.set_index('localminute')\n", 271 | "\n", 272 | "# bring to central timezone\n", 273 | "grid_df = grid_df.tz_localize(tz='US/Central')\n", 274 | "grid_df" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "# drop any rows that have blank grid\n", 284 | "grid_df = grid_df.dropna(how='any')\n", 285 | "grid_df" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "# calculate the average grid usage of the homes over this time period\n", 295 | "grouped_grid = grid_df.groupby([pd.Grouper(freq='D')]).mean()\n", 296 | "grouped_grid['grid'] = grouped_grid['grid'] * 24 # converts daily average grid use/generation to kWh\n", 297 | "grouped_grid" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "# the above was using the monthly averages from Mar - Aug from ercot all averaged together for each source\n", 307 | "# let's use the actual monthy averages for each point instead\n", 308 | "grid_more = grouped_grid" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": null, 314 | "metadata": {}, 315 | "outputs": [], 316 | "source": [ 317 | "# extract and addd the month to the dataframe\n", 318 | "grid_more['Month'] = grid_more.index.strftime('%B')\n", 319 | "grid_more['Month'] = grid_more['Month'].astype(str)\n", 320 | "grid_more" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "# convert the month to the same 3 letter abbreviation as in the ERCOT table\n", 330 | "def shorten_month(col):\n", 331 | " col['Month'] = col['Month'][0:3]\n", 332 | " return col\n", 333 | "\n", 334 | "grid_more = grid_more.apply(shorten_month, axis=1)\n", 335 | "grid_more" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": {}, 342 | "outputs": [], 343 | "source": [ 344 | "# Assign based on the monthly percentage breakdown\n", 345 | "def assign_lbs(row):\n", 346 | " row['Gas-CC lbs'] = (ercot_perc.loc['Gas-CC', row.Month] * row['grid']) * _gas_cc_lbs\n", 347 | " row['Gas lbs'] = (ercot_perc.loc['Gas', row.Month] * row['grid']) * _gas_lbs\n", 348 | " row['Coal lbs'] = (ercot_perc.loc['Coal', row.Month] * row['grid']) * _coal_lbs\n", 349 | " return row\n", 350 | "\n", 351 | "\n", 352 | "grid_more = grid_more.apply(assign_lbs, axis=1)\n", 353 | "grid_more" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "# don't need these anymore\n", 363 | "grid_more = grid_more.drop(columns=['dataid', 'Month'])\n", 364 | "grid_more" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": {}, 371 | "outputs": [], 372 | "source": [ 373 | "# Add a total CO2 column\n", 374 | "grid_more['Total CO2'] = grid_more['Gas-CC lbs'] + grid_more['Gas lbs'] + grid_more['Coal lbs']\n", 375 | "grid_more" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "grid_more = grid_more.rename({'grid':'Grid Use (kWh)'} , axis='columns')" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "grid_more.plot(figsize=(25,15), title='Daily Grid (kWh) and Emissions in lbs of CO2', grid=True, xlabel='Day', ylabel='kWh or lbs CO2')" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "# Observations:\n", 401 | "\n", 402 | "- These all solar homes have the capacity to offset some of the neighbors' emissions in the \"shoulder months\" by putting energy back on the grid\n", 403 | "- Total CO2 as measured in lbs/kWh tracks at nearly 1-to-1\n" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": null, 409 | "metadata": {}, 410 | "outputs": [], 411 | "source": [] 412 | } 413 | ], 414 | "metadata": { 415 | "kernelspec": { 416 | "display_name": "Python 3", 417 | "language": "python", 418 | "name": "python3" 419 | }, 420 | "language_info": { 421 | "codemirror_mode": { 422 | "name": "ipython", 423 | "version": 3 424 | }, 425 | "file_extension": ".py", 426 | "mimetype": "text/x-python", 427 | "name": "python", 428 | "nbconvert_exporter": "python", 429 | "pygments_lexer": "ipython3", 430 | "version": "3.8.5" 431 | } 432 | }, 433 | "nbformat": 4, 434 | "nbformat_minor": 4 435 | } 436 | -------------------------------------------------------------------------------- /Efficiency/Efficiency-Percent-PV-Home-Usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Efficiency: What percent of a home’s average daily energy use is provided by its on-site solar PV-generated power\n", 8 | "\n", 9 | "To prepare the data we selected 25 Texas homes with PV. We used 1 minute energy(from electricity.eg_realpower_1min) data for the selected homes for March 2018 through August 2018." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "#import packages\n", 19 | "import pandas as pd\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "import psycopg2\n", 22 | "import sqlalchemy as sqla\n", 23 | "import os\n", 24 | "import sys\n", 25 | "sys.path.insert(0,'..')\n", 26 | "from config.read_config import get_database_config\n", 27 | "from datetime import datetime as dt\n", 28 | "import numpy as np\n", 29 | "%matplotlib inline\n", 30 | "sys.executable # shows you your path to the python you're using" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "#Read pre-processed data \n", 40 | "data_df = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/whole-use-by-solar.zip', compression='zip')\n", 41 | "homes_list = data_df.dataid.unique() \n", 42 | "homes_list" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "#Function to calculate how much energy use was powered by solar for each home for each interval\n", 52 | "def calc_use_powered_by_solar(row):\n", 53 | "\n", 54 | " if row['grid'] < 0 and row['solar'] > 0:\n", 55 | "\n", 56 | " val = row['total_use']\n", 57 | "\n", 58 | " elif row['solar'] > 0 and row['grid'] > 0:\n", 59 | "\n", 60 | " val = row['solar']\n", 61 | "\n", 62 | " else:\n", 63 | "\n", 64 | " val = 0\n", 65 | "\n", 66 | " return val" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "#replace null with 0's \n", 76 | "data_df = data_df.fillna(0)\n", 77 | "\n", 78 | "#create new column use.\n", 79 | "data_df['total_use'] = data_df['grid'] + data_df['solar']\n", 80 | "\n", 81 | "data_df['use_powered_by_solar'] = data_df.apply(calc_use_powered_by_solar, axis=1)\n", 82 | "\n", 83 | "data_df.head(10)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# convert localminute to date\n", 93 | "#data_df['dt'] = data_df['localminute'].dt.normalize()\n", 94 | "\n", 95 | "data_df['localminute'] = pd.to_datetime(data_df['localminute'])\n", 96 | "data_df['dt'] = data_df['localminute'].dt.date\n", 97 | "data_df.head(10)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "#calculate home's daily average use powered by solar\n", 107 | "avg_solar_use_by_day = pd.DataFrame(data_df, columns = ['dt','total_use','use_powered_by_solar'])\n", 108 | "avg_solar_use_by_day = avg_solar_use_by_day.groupby(['dt']).sum()\n", 109 | "avg_solar_use_by_day = avg_solar_use_by_day.reset_index()\n", 110 | "avg_solar_use_by_day['use_from_solar'] = (avg_solar_use_by_day['use_powered_by_solar']/avg_solar_use_by_day['total_use'])*100\n", 111 | "avg_solar_use_by_day.head(10)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "print(avg_solar_use_by_day['use_from_solar'].mean())" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "#calculate home's weekly average use powered by solar\n", 130 | "avg_solar_use_by_week = pd.DataFrame(data_df, columns = ['dt','total_use','use_powered_by_solar'])\n", 131 | "avg_solar_use_by_week['dt'] = pd.to_datetime(avg_solar_use_by_week['dt'])\n", 132 | "avg_solar_use_by_week['week_num'] = avg_solar_use_by_week['dt'].dt.week\n", 133 | "#avg_solar_use_by_week['week_num'] = avg_solar_use_by_week['dt'].dt.week\n", 134 | "avg_solar_use_by_week = avg_solar_use_by_week.groupby(['week_num']).sum()\n", 135 | "avg_solar_use_by_week = avg_solar_use_by_week.reset_index()\n", 136 | "avg_solar_use_by_week['use_from_solar'] = (avg_solar_use_by_week['use_powered_by_solar']/avg_solar_use_by_week['total_use'])*100\n", 137 | "avg_solar_use_by_week.head(10)\n", 138 | "\n", 139 | "\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "#plot bar chart for average weekly use\n", 149 | "avg_solar_use_by_week.plot.bar(x='week_num', y='use_from_solar',figsize=(20,10),color='orange',title=\"Homes's average weekly use powered by solar\")\n", 150 | "plt.show()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "avg_solar_use_by_week['use_from_solar'].mean()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## From above analysis we see that ~29% of home’s average daily energy use is provided by its on-site solar PV-generated power." 167 | ] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "Python 3", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.8.5" 187 | } 188 | }, 189 | "nbformat": 4, 190 | "nbformat_minor": 4 191 | } 192 | -------------------------------------------------------------------------------- /Efficiency/Efficiency-Water_vs_Energy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Water and energy use correlation notebook:\n", 8 | "\n", 9 | "### Here we will explore whether there is a relationship between water use and electricity use in our home network.\n", 10 | "\n", 11 | "We've already exported the electricty and water data to a file in our JupyterHub's shared filesystem which is mounted at `/shared/JupyterHub-Examples-Data/efficiency/elec_water_data.csv`, our methodology for extracting the data is as follows:\n", 12 | "\n", 13 | "To find correlation between home's energy use and water use, Blucube water data from dataport (water_and_gas.blucube_water_data) and 1-minute interval energy (electricity.eg_realpower_1min) data was used. Blucube data consists of the cumulative device reading (in gallons), so the water usage has been calculated for each interval by subtracting current interval reading from previous interval. After calculating delta usage, only those time intervals with a delta greater than 0 were included in the dataset. This data has then been joined with energy data to find how much electricity was used in those same time intervals when water was used in a home.\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import pandas as pd\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "import csv\n", 25 | "import os\n", 26 | "import sys\n", 27 | "import statistics\n", 28 | "%matplotlib inline\n", 29 | "print(sys.version) # prints the python version\n", 30 | "print(sys.executable) # prints the path to the python you're using" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# Read processed Data. Blucube water data from dataport has been processed to calculate delta \n", 40 | "# water usage for each minute interval. Only those intervals have been considered where water usage > 0.\n", 41 | "data = pd.read_csv('/shared/JupyterHub-Examples-Data/efficiency/elec_water_data.csv')\n", 42 | "homes_list = data.dataid.unique() \n", 43 | "homes_list" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# Loop through list of homes and find correlation between water and electricity usage and also plot the datapoints\n", 53 | "homes_cor = []\n", 54 | "for home in homes_list:\n", 55 | " data_to_process = data.loc[(data['dataid'] == home)]\n", 56 | " x = data_to_process[\"water_use\"]\n", 57 | " y = data_to_process[\"elec_use\"]\n", 58 | " correlation = round(x.corr(y),3)\n", 59 | " homes_cor.append(correlation)\n", 60 | " print(str(home) + ' -> ' + str(correlation))\n", 61 | " plt.scatter(x, y, edgecolors='black')\n", 62 | " plt.title('Correlation for home {}'.format(home))\n", 63 | " plt.xlabel('Water Use')\n", 64 | " plt.ylabel('Energy Use')\n", 65 | " plt.show()" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "print(\"Average correlation for all homes: \", statistics.mean(homes_cor))" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Conclusion:\n", 82 | "### From above plots and calculated average correlation we can say that water and electricity usage is *not* positively correlated." 83 | ] 84 | } 85 | ], 86 | "metadata": { 87 | "kernelspec": { 88 | "display_name": "Python 3", 89 | "language": "python", 90 | "name": "python3" 91 | }, 92 | "language_info": { 93 | "codemirror_mode": { 94 | "name": "ipython", 95 | "version": 3 96 | }, 97 | "file_extension": ".py", 98 | "mimetype": "text/x-python", 99 | "name": "python", 100 | "nbconvert_exporter": "python", 101 | "pygments_lexer": "ipython3", 102 | "version": "3.8.5" 103 | } 104 | }, 105 | "nbformat": 4, 106 | "nbformat_minor": 4 107 | } 108 | -------------------------------------------------------------------------------- /Efficiency/ercot_emissions_2018.csv: -------------------------------------------------------------------------------- 1 | "Energy, GWh",Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Total 2 | Biomass,56,43,38,33,66,63,67,58,31,32,32,42,563 3 | Coal,"8,891","5,899","4,990","6,132","7,524","8,758","9,339","9,491","8,765","8,199","7,704","7,558","93,249" 4 | Gas,"1,411",661,884,951,"2,779","2,558","4,183","3,661","2,096","2,014","1,326",962,"23,487" 5 | Gas-CC,"10,690","10,134","9,923","8,801","12,136","14,854","17,438","16,202","13,775","10,404","8,767","10,594","143,719" 6 | Hydro,32,32,53,61,60,49,44,55,38,139,132,113,811 7 | Nuclear,"3,809","3,433","3,528","2,783","3,746","3,593","3,706","3,622","3,589","2,909","3,297","3,111","41,125" 8 | Other,3,1,2,2,3,2,4,1,2,3,3,2,29 9 | Solar,190,165,241,297,340,368,367,354,257,209,250,203,"3,240" 10 | Wind,"6,237","5,534","6,625","6,717","7,060","7,059","4,293","5,569","3,778","5,227","5,592","6,105","69,796" 11 | Total,"31,319","25,902","26,285","25,777","33,715","37,304","39,440","39,014","32,331","29,138","27,105","28,690","376,019" -------------------------------------------------------------------------------- /ElectricVehicle/Data-Extraction--EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Extraction for Critical Peak Power Events Notebooks: Exploring how EV charging aligns with Texas's critical peak power events by homes\n", 8 | "\n", 9 | "## This notebook will connect to the database and extract the data live and put it into compressed zip files in this directory. \n", 10 | "\n", 11 | "
We will be using data from ERCOT's 4CP calculations to determine how residential homes EV charging habits align with those Peak power events.
\n",
12 | "ERCOT 4CP data is pulled from http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey
We will be using Pecan Street Inc. data from Dataport to determine how electric vehicle charging aligns with rooftop solar generation.\n",
13 | " \n",
14 | "
Data from 24 homes with fairly complete data for the year 2018 is used to explore this question.\n",
15 | " \n",
16 | "
\n",
17 | "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.
We will be using Pecan Street Inc. data from dataport to calculate how much overall energy demand is used in homes by electric vehicle charging.
\n",
12 | "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.
You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook in the analysis notebook.
" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# import packages\n", 24 | "import pandas as pd\n", 25 | "import psycopg2\n", 26 | "import sqlalchemy as sqla\n", 27 | "import os\n", 28 | "import sys\n", 29 | "sys.path.insert(0,'..')\n", 30 | "from config.read_config import get_database_config\n", 31 | "%matplotlib inline\n", 32 | "sys.executable # shows you your path to the python you're using" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "# read in db credentials from config/config.txt\n", 42 | "# * make sure you add those to the config/config.txt file! *\n", 43 | "\n", 44 | "database_config = get_database_config(\"../config/config.txt\")" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# get our DB connection\n", 54 | "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n", 55 | " database_config['password'],\n", 56 | " database_config['hostname'],\n", 57 | " database_config['port'],\n", 58 | " database_config['database']\n", 59 | " ))" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# Select a list of Texas homes from dataport metadata having an electrical vehicle (car1) and also has data for year 2018.\n", 69 | "query = \"\"\"select distinct dataid from other_datasets.metadata \n", 70 | " where car1='yes' and grid='yes'\n", 71 | " and egauge_1min_min_time < '2018-01-01' \n", 72 | " and egauge_1min_max_time > '2019-01-01'\n", 73 | " and state='Texas'\n", 74 | " and (egauge_1min_data_availability like '100%' \n", 75 | " or \n", 76 | " egauge_1min_data_availability like '99%')\n", 77 | " LIMIT 25;\n", 78 | " \"\"\"\n", 79 | "\n", 80 | "df = pd.read_sql_query(sqla.text(query), engine)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "# grab dataids and convert them to a string to put into the SQL query\n", 90 | "dataids_list = df['dataid'].tolist()\n", 91 | "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n", 92 | "dataids_str = ','.join(list(map(str, dataids_list)))\n", 93 | "dataids_str\n", 94 | "dataids_list" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Check data completeness for dataids selected from metadata above.\n", 104 | "## Warning: This query takes some time to run.\n", 105 | "query2 = \"\"\"select dataid,count(*) total_rec from electricity.eg_realpower_1min \n", 106 | " where dataid in ({})\"\"\".format(dataids_str)\n", 107 | "query2 = query2 + \"\"\" and localminute >= '2018-01-01' and localminute < '2019-01-01' group by 1\"\"\"\n", 108 | "\n", 109 | "df2 = pd.read_sql_query(sqla.text(query2), engine)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# Select homes with atleast 99% data availability for year 2018.\n", 119 | "df2['perc'] = (df2['total_rec']/525600)*100\n", 120 | "final_dataids = df2[df2['perc'] >= 99]\n", 121 | "final_dataids['dataid'].count()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# assemble list of selected homes\n", 131 | "final_dataids_list = final_dataids['dataid'].tolist()\n", 132 | "print(\"{} dataids selected listed here:\".format(len(final_dataids_list)))\n", 133 | "final_dataids_str = ','.join(list(map(str, final_dataids_list)))\n", 134 | "final_dataids_str\n", 135 | "final_dataids_list" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# now go pull the data for the selected homes\n", 145 | "data_pull = \"\"\"select localminute::timestamp,car1,grid,solar \n", 146 | " from electricity.eg_realpower_1min \n", 147 | " where localminute >= '2018-03-01' and localminute < '2018-06-01' \"\"\"\n", 148 | "data_pull = data_pull + \"\"\"AND dataid in ({})\"\"\".format(final_dataids_str)\n", 149 | "\n", 150 | "data_df = pd.read_sql_query(sqla.text(data_pull), engine)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "data_df" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# export the data to a csv file\n", 169 | "compression_opts = dict(method='zip',\n", 170 | " archive_name='ev_overall_household_demand.zip')\n", 171 | "data_df.to_csv('ev_overall_household_demand.zip', index=False,\n", 172 | " compression=compression_opts)" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "kernelspec": { 178 | "display_name": "Python 3", 179 | "language": "python", 180 | "name": "python3" 181 | }, 182 | "language_info": { 183 | "codemirror_mode": { 184 | "name": "ipython", 185 | "version": 3 186 | }, 187 | "file_extension": ".py", 188 | "mimetype": "text/x-python", 189 | "name": "python", 190 | "nbconvert_exporter": "python", 191 | "pygments_lexer": "ipython3", 192 | "version": "3.8.5" 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 4 197 | } 198 | -------------------------------------------------------------------------------- /ElectricVehicle/EV_charging_alignment_with_State_of_Texas_critical_peak_power_events.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Critical Peak Power Events Notebooks: Exploring how EV charging aligns with Texas's critical peak power events by homes\n", 8 | "\n", 9 | "We will be using data from ERCOT's 4CP calculations to determine how residential homes EV charging habits align with those Peak power events.
\n",
10 | "ERCOT 4CP data is pulled from http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey
We will be using Pecan Street Inc. data from Dataport to determine how electric vehicle charging aligns with rooftop solar generation.\n",
10 | " \n",
11 | "
Data from 24 homes with fairly complete data for the year 2018 is used to explore this question.\n",
12 | " \n",
13 | "
\n",
14 | "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.
We will be using Pecan Street Inc. data from dataport to calculate how much overall energy demand is used in homes by electric vehicle charging.
\n",
10 | "Pecans Streets data can be obtained by applying for a dataport account at https://www.dataport.pecanstreet.org.
You'll need to modify the read_csv calls in that notebook to point at these instead of the ones we've extracted and prepared for you in the /shared/JupyterHub-Examples-Data/ directory on the JupyterHub server if you would like to use the ones exported by this notebook in the analysis notebook.
" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import pandas as pd\n", 27 | "import psycopg2\n", 28 | "import sqlalchemy as sqla\n", 29 | "import os\n", 30 | "import sys\n", 31 | "sys.path.insert(0,'..')\n", 32 | "from config.read_config import get_database_config\n", 33 | "%matplotlib inline\n", 34 | "sys.executable # shows you your path to the python you're using" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# read in db credentials from config/config.txt\n", 44 | "# * make sure you add those to the config/config.txt file! *\n", 45 | "\n", 46 | "database_config = get_database_config(\"../config/config.txt\")\n" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# get our DB connection\n", 56 | "engine = sqla.create_engine('postgresql://{}:{}@{}:{}/{}'.format(database_config['username'],\n", 57 | " database_config['password'],\n", 58 | " database_config['hostname'],\n", 59 | " database_config['port'],\n", 60 | " database_config['database']\n", 61 | " ))\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "# These are the ERCOT 4CP events (start date/time and end date/time) for 2016 - 2019 acquired from\n", 71 | "# http://mis.ercot.com/misapp/GetReports.do?reportTypeId=13037&reportTitle=Planned%20Service%20Four%20Coincident%20Peak%20Calculations&showHTMLView=&mimicKey\n", 72 | "\n", 73 | "event_start_dates = ['2019-06-19 17:00:00-05', '2019-07-30 16:30:00-05', '2019-08-12 17:00:00-05', '2019-09-06 16:45:00-05',\n", 74 | " '2018-06-27 17:00:00-05', '2018-07-19 17:00:00-05', '2018-08-23 16:45:00-05', '2018-09-19 16:30:00-05',\n", 75 | " '2017-06-23 16:45:00-05', '2017-07-28 17:00:00-05', '2017-08-16 17:00:00-05', '2017-09-20 16:45:00-05',\n", 76 | " '2016-06-15 17:00:00-05', '2016-07-14 16:00:00-05', '2016-08-11 16:30:00-05', '2016-09-19 16:16:00-05'\n", 77 | " ]\n", 78 | "event_end_dates = ['2019-06-19 17:15:00-05', '2019-07-30 16:45:00-05', '2019-08-12 17:15:00-05', '2019-09-06 17:00:00-05',\n", 79 | " '2018-06-27 17:15:00-05', '2018-07-19 17:15:00-05', '2018-08-23 17:00:00-05', '2018-09-19 16:45:00-05',\n", 80 | " '2017-06-23 17:00:00-05', '2017-07-28 17:15:00-05', '2017-08-16 17:15:00-05', '2017-09-20 17:00:00-05',\n", 81 | " '2016-06-15 17:15:00-05', '2016-07-14 16:15:00-05', '2016-08-11 16:45:00-05', '2016-09-19 16:31:00-05']" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# Select the dataids, pv direction, amount of PV of solar homes\n", 91 | "# we're selecting homes with just South and West facing PV that have data between the first event and the last event\n", 92 | "\n", 93 | "\n", 94 | "query = \"\"\"\n", 95 | "select dataid, pv, pv_panel_direction, total_amount_of_pv, amount_of_west_facing_pv, amount_of_south_facing_pv\n", 96 | "from other_datasets.metadata\n", 97 | "where pv is not null\n", 98 | "and total_amount_of_pv is not null\n", 99 | "and grid is not null \n", 100 | "and solar is not null\n", 101 | "and pv_panel_direction in ('South', 'West')\n", 102 | "and egauge_1min_min_time < '2016-06-15'\n", 103 | "and egauge_1min_max_time > '2019-09-06'\n", 104 | "LIMIT 32\n", 105 | "\"\"\"\n", 106 | "\n", 107 | "# create a Pandas dataframe with the data from the sql query\n", 108 | "df = pd.read_sql_query(sqla.text(query), engine)\n", 109 | "df" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# export homes to csv file\n", 119 | "compression_opts = dict(method='zip',\n", 120 | " archive_name='pv_south_vs_west_homes.zip')\n", 121 | "df.to_csv('pv_south_vs_west_homes.zip', index=False,\n", 122 | " compression=compression_opts)" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# grab dataids and convert them to a string to put into the SQL query\n", 132 | "dataids_list = df['dataid'].tolist()\n", 133 | "print(\"{} dataids selected listed here:\".format(len(dataids_list)))\n", 134 | "dataids_str = ','.join(list(map(str, dataids_list)))\n", 135 | "dataids_str\n", 136 | "dataids_list" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "# Assemble the SQL query to pull the data for the selected dataids\n", 146 | "# \n", 147 | "first_start = event_start_dates.pop(0)\n", 148 | "first_end = event_end_dates.pop(0)\n", 149 | "query_2 = \"\"\"\n", 150 | "select dataid, localminute, solar, grid from electricity.eg_realpower_1min \n", 151 | "where ((localminute >= '{}' and localminute <= '{}') \"\"\".format(first_start, first_end)\n", 152 | "\n", 153 | "for start, end in zip(event_start_dates, event_end_dates):\n", 154 | " query_2 = query_2 + \"OR (localminute >= '{}' and localminute <= '{}') \".format(start, end)\n", 155 | "\n", 156 | "query_2 = query_2 + \"\"\" ) AND dataid in ({})\"\"\".format(dataids_str)\n", 157 | "\n", 158 | "# here's what that query is\n", 159 | "print(\"sql query is \\n\" + query_2)\n", 160 | "\n", 161 | "# create a dataframe with the data from the sql query\n", 162 | "df2 = pd.read_sql_query(sqla.text(query_2), engine)\n", 163 | "\n", 164 | "# calculate usage as grid minus solar (which is actually grid + solar because solar is negative use)\n", 165 | "# Calculate the difference with a lambda function and add it as a new column called 'usage'\n", 166 | "df2['usage'] = df2.apply(lambda row: row.solar + row.grid, axis=1)\n", 167 | "df2.head(15)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "# Total number of records in the dataset\n", 177 | "df2['dataid'].count()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [ 186 | "# export the data to a csv file\n", 187 | "compression_opts = dict(method='zip',\n", 188 | " archive_name='pv_south_vs_west.zip')\n", 189 | "df2.to_csv('pv_south_vs_west.zip', index=False,\n", 190 | " compression=compression_opts)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.8.5" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 4 222 | } 223 | -------------------------------------------------------------------------------- /PV/PV-South-vs-West.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PV: Solar production on south- vs. west-facing rooftop and how solar systems align with 4CP events in Texas\n", 8 | "\n", 9 | "This notebook will explore solar generation around the ERCOT 4CP events and compare West vs South facing solar generation during those events.\n", 10 | "\n", 11 | "The ERCOT 4CP events are the 15-minute ERCOT grid peak events for each month in June, July, August and September.\n", 12 | "\n", 13 | "ERCOT uses each large customer’s (including municipal utilities) total energy demand during the 4CP periods in the previous year as the basis for charges in the current year." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import pandas as pd\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "import sys\n", 25 | "%matplotlib inline\n", 26 | "sys.executable # shows you your path to the python you're using" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# Read pre-processed data for June 2018 through August 2018\n", 36 | "df2 = pd.read_csv('/shared/JupyterHub-Examples-Data/pv/pv_south_vs_west.zip',compression='zip')\n", 37 | "df2" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Checking completeness of data. We will consider only those homes that have 100% data for the 4CP dates\n", 47 | "data = df2.groupby(['dataid']).size().reset_index(name='counts')\n", 48 | "data['perc'] = (data['counts']/256)*100\n", 49 | "ndata = data[data['perc'] == 100]\n", 50 | "final_dataids = ndata['dataid']\n", 51 | "\n", 52 | "df3=df2.loc[df2['dataid'].isin(final_dataids)]\n", 53 | "df3.count()" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "dataids_list = df3['dataid'].unique()\n", 63 | "len(dataids_list)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# drop any missing values\n", 73 | "df3 = df3.dropna()" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "# convert localminute to pandas datetime type\n", 83 | "df3['datetime'] = pd.to_datetime(df3['localminute'])\n", 84 | "\n", 85 | "# and set as index\n", 86 | "df3 = df3.set_index('datetime')\n", 87 | "df3" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "# group by month and dataid and take the mean of solar, grid, and usage within those groups\n", 97 | "grouped = df3.groupby([pd.Grouper(freq='M'), 'dataid']).mean()\n", 98 | "grouped" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "# read in the list of homes and direction that they face\n", 108 | "df = pd.read_csv('/shared/JupyterHub-Examples-Data/pv/pv_south_vs_west_homes.zip',compression='zip')\n", 109 | "df" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# map pv direction onto dataids with a merge after resetting the index\n", 119 | "grouped = grouped.reset_index()\n", 120 | "grouped = grouped.merge(df, how='left', left_on='dataid', right_on='dataid')\n", 121 | "grouped" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# reindex by the date\n", 131 | "grouped = grouped.set_index('datetime')\n", 132 | "grouped" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "# regroup by year and pv_panel_direction and average the values\n", 142 | "year_west_vs_south = (grouped.groupby([pd.Grouper(freq='Y'),'pv_panel_direction']).mean())\n", 143 | "\n", 144 | "# we don't need a mean of the dataids, so we can drop that column now\n", 145 | "year_west_vs_south = year_west_vs_south.drop(columns=['dataid'])\n", 146 | "year_west_vs_south" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "# now just drop unneeded columns and rearrange them and simplify the view\n", 161 | "year_west_vs_south = year_west_vs_south.reset_index()\n", 162 | "year_west_vs_south['year'] = pd.DatetimeIndex(year_west_vs_south['datetime']).year\n", 163 | "year_west_vs_south = year_west_vs_south[['year', 'pv_panel_direction','solar', 'grid', 'usage']]\n", 164 | "year_west_vs_south = year_west_vs_south.set_index('year')\n", 165 | "year_west_vs_south" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "# Result: West Facing Solar Wins!\n", 173 | "## Despite the higher in-home usage (green bars, 'usage') in the west facing houses the solar production is so high (blue bars, 'solar') that it is still using less power from the grid (orage bars, 'grid') in West-facing vs South-facing home in each paired year. In most cases here, grid is even negative meaning that the West-facing homes are putting power back on the grid." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "from matplotlib.pyplot import figure\n", 183 | "plot = year_west_vs_south.plot(kind='bar',figsize=(25,15), title=\"Solar production, Net Grid Usage (home usage from the grid minus solar production), and Home Usage During ERCOT 4CP events\")\n", 184 | "labels = plot.set_xticklabels(['2016-South', '2016-West', '2017-South', '2017-West', '2018-South', '2018-West', '2019-South', '2019-West'])\n", 185 | "ylabel = plot.set_ylabel('Usage/Production in kW')\n", 186 | "xlabel = plot.set_xlabel('South vs West Facing Year')" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.8.5" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 4 218 | } 219 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JupyterHub-Examples 2 | Example Jupyter Notebooks demonstrating use of Pecan Street data on our JupyterHub 3 | 4 | ## Use 5 | Clone the repository with the command `git clone git@github.com:Pecan-Street/JupyterHub-Examples.git` either on your own computer, or from a terminal on the Pecan Street JupyterHub server. 6 | 7 | Some of the notebooks require a Dataport database account to perform database queries and extract data. 8 | 9 | If you have a Dataport database account, edit the config/config.txt file with the connection information from your https://dataport.pecanstreet.org/access page. 10 | 11 | If you have questions or feedback, contact us at https://www.pecanstreet.org/contact/ 12 | -------------------------------------------------------------------------------- /config/config.txt: -------------------------------------------------------------------------------- 1 | [database_config] 2 | hostname = 3 | database = 4 | port = 5 | username = 6 | password = 7 | 8 | -------------------------------------------------------------------------------- /config/read_config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import traceback 3 | 4 | 5 | def get_database_config(config_file): 6 | try: 7 | config = configparser.ConfigParser() 8 | config.read(config_file) 9 | return { 10 | "hostname": config.get("database_config", "hostname"), 11 | "username": config.get("database_config", "username"), 12 | "password": config.get("database_config", "password"), 13 | "port": int(config.get("database_config", "port")), 14 | "database": config.get("database_config", "database") 15 | } 16 | except configparser.Error as e: 17 | traceback.print_exc() 18 | print( 19 | "Error reading database configuration. Does the config/config.txt file exist and have entries for " 20 | "hostname, username, password, database, and port?") 21 | except ValueError: 22 | traceback.print_exc() 23 | print( 24 | "Error reading database configuration. Does the config/config.txt file exist and have entries for " 25 | "hostname, username, password, database, and port?") 26 | except Exception as e: 27 | traceback.print_exc() 28 | print( 29 | "Error reading database configuration. Does the config/config.txt file exist and have entries for " 30 | "hostname, username, password, database, and port?") 31 | --------------------------------------------------------------------------------