├── .Rhistory ├── VERSION ├── test ├── 20161102_Barometer.xle ├── __init__.py ├── ag13a 2016-08-02.xle ├── ag13c 2016-08-02.xle ├── 20160919_LittleHobble.xle ├── ManualMeasurements.csv ├── usgsP.csv └── test.py ├── .idea ├── markdown-navigator │ └── profiles_settings.xml ├── libraries │ └── R_User_Library.xml ├── vcs.xml ├── preferred-vcs.xml ├── modules.xml ├── misc.xml ├── inspectionProfiles │ └── Project_Default.xml ├── WellApplication.iml └── markdown-navigator.xml ├── requirements.txt ├── wellapplication ├── __init__.py ├── MannKendall.py ├── hydropy.py ├── arcpy_functions.py ├── ros.py ├── chem.py ├── usgs.py ├── mesopy.py └── graphs.py ├── pyproject.toml ├── LICENSE.txt ├── .travis.yml ├── setup.py ├── CHANGES.txt ├── docs ├── Transport.ipynb └── UMAR_WL_Data.ipynb └── README.md /.Rhistory: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.5.10 2 | -------------------------------------------------------------------------------- /test/20161102_Barometer.xle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/20161102_Barometer.xle -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = 'Paul Inkenbrandt' 4 | __email__ = 'paulinkenbrandt@utah.gov' 5 | -------------------------------------------------------------------------------- /test/ag13a 2016-08-02.xle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/ag13a 2016-08-02.xle -------------------------------------------------------------------------------- /test/ag13c 2016-08-02.xle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/ag13c 2016-08-02.xle -------------------------------------------------------------------------------- /test/20160919_LittleHobble.xle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/20160919_LittleHobble.xle -------------------------------------------------------------------------------- /.idea/markdown-navigator/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/libraries/R_User_Library.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Pandas >= 0.16.0 2 | Numpy >= 0.7.0 3 | Matplotlib >= 1.1 4 | xmltodict >= 0.6.2 5 | scipy >= 0.10.0 6 | pyproj >= 1.9.4 7 | requests >= 2.11.1 8 | xlrd >= 0.5.4 9 | statsmodels >= 0.6.0 10 | -------------------------------------------------------------------------------- /.idea/preferred-vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ApexVCS 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /wellapplication/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | import os 4 | 5 | from .transport import * 6 | from .usgs import * 7 | from .chem import * 8 | from .mesopy import * 9 | from .graphs import * 10 | from .MannKendall import * 11 | from .ros import * 12 | from .arcpy_functions import * 13 | 14 | __version__ = '0.5.10' 15 | __author__ = 'Paul Inkenbrandt' 16 | __name__ = 'wellapplication' 17 | 18 | __all__ = ['usgs','chem','transport','ros','hydropy','graphs','MannKendall', 19 | 'mesopy','arcpy_functions'] 20 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 16 | -------------------------------------------------------------------------------- /test/ManualMeasurements.csv: -------------------------------------------------------------------------------- 1 | datetime,Fork,staff gage,CFS 2 | 8/16/2016 12:49,SpringCreek,0.0825,1.714458 3 | 8/16/2016 15:57,SpringCreek,,2 4 | 8/16/2016 13:55,UpperMain,0.35,0.84 5 | 8/17/2016 15:40,SpringCreek,0.79,1.84 6 | 8/18/2016 16:55,LittleHobble,0.15,0.47 7 | 8/25/2016 12:54,UpperMain,,0.24 8 | 8/25/2016 14:30,LowerMain,0.2,0.6 9 | 9/19/2016 10:30,LittleHobble,0.15,0.39 10 | 9/19/2016 11:00,UpperMain,0.25,1.14 11 | 9/19/2016 11:50,SpringCreek,0.775,1.81 12 | 9/19/2016 13:02,LowerMain,0.2,2.96 13 | 10/5/2016 11:24,LittleHobble,0.2,0.58 14 | 10/5/2016 12:45,SpringCreek,,1.81 15 | 10/5/2016 13:15,LowerMain,0.2,3 16 | 11/2/2016 9:00,LowerMain,0.25,5.45 17 | 11/2/2016 10:30,SpringCreek,0.5,1.61 18 | 11/2/2016 12:10,UpperMain,,1.15 19 | 11/2/2016 13:18,LittleHobble,0.23,0.77 20 | -------------------------------------------------------------------------------- /.idea/WellApplication.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 18 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | dynamic = ["version"] 7 | name = "WellApplication" 8 | requires-python = ">= 3.8" 9 | authors = [{name = "Paul Inkenbrandt", email = "paulinkenbrandt@utah.gov"}] 10 | maintainers = [{name = "Paul Inkenbrandt", email = "paulinkenbrandt@utah.gov"}] 11 | description = "Tools used to manipulate hydrograph data; Solinst xles to pandas dataframe; hydrograph analysis; hydrology; Gannt Charts; Piper diagrams; hydrogeology; wells; groundwater; USGS data; WQP data;" 12 | readme = "README.md" 13 | license = {file = "LICENSE.txt"} 14 | keywords = ["well", "groundwater", "transducer"] 15 | 16 | [project.urls] 17 | Homepage = "https://github.com/utah-geological-survey/WellApplication" 18 | Issues = "https://github.com/utah-geological-survey/WellApplication/issues" 19 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Paul 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.5" 6 | 7 | sudo: required 8 | 9 | before_install: 10 | - pip install pytest 11 | - "export DISPLAY=:99.0" 12 | - "sh -e /etc/init.d/xvfb start" 13 | - sleep 3 # give xvfb some time to start 14 | 15 | env: 16 | - DEPS="numpy scipy matplotlib pandas statsmodels" 17 | 18 | install: 19 | - pip install codecov 20 | - pip install pytest-cov 21 | - pip install -r requirements.txt 22 | 23 | 24 | script: 25 | - pytest test/test.py --cov=./ 26 | 27 | after_success: 28 | - codecov 29 | 30 | deploy: 31 | provider: pypi 32 | distributions: sdist 33 | user: "paulinkenbrandt" 34 | password: 35 | secure: "TtR3oPiYSSb3/eUny7/jcZAnLhfpZs+52N/ieKekL7bCJc+N7EK2ViktOE0c3uoA3dqjC+AnP4VUcpISCvmvQx7D2l1udMhBwxg+/08/6b+Gb4leuNgEZ+gdRVRHvHHTaTwLRk4dRhOpfXnf8L7W4ZPCOjc+3jZSvObqDcbN5uW1fcpb4zNwhYEFf/seWsNXw8EBTZ4CgDvaBKexHAifrT53BmhWULs+ImmUqryzxdJFneq8lvkbnJFRnqsy8B2aLC7Ev+JfNiIVDSvYZAB9M614iKaBkPscful242UOc0mZBj4IoQMxBt9oduyoztJqTdhos/VHpB0PGtek/FL5EukRtUqkjgWf9h0Abp6zpxYTjOxlQpb0lNmKSHmz8+RbNUJ9WtGu4ybQip4nqSuWZ/Df1EGdZ6ndlVobmHyP172vxSUE619wf4zFAu3PqjCNDs9QApLtd7RnEcwt9kPnTH+TRXM/w6xjltKMHN4Y9dqses3I6AjlcSEB48XehBTuW7tOH0sd5ytHeqBC2/RWQHaiZFm042kgeK+m0A0P0dgcOx6KnDhkHyeNoc1Qh05CnuFSMMr1dQXyu3UaHqdJdEilbSxalWuFf4ISwjBrCIJgI8RFmEfdWbV1z8kqCgqvB5K5q3JUfh0wGtKVpMv6jA9edK+p6FQayvuSK0TPZOs=" 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | import sys 3 | import os 4 | from setuptools import setup, find_packages 5 | 6 | if not sys.version_info[0] in [2,3]: 7 | print('Sorry, wellapplication not supported in your Python version') 8 | print(' Supported versions: 2 and 3') 9 | print(' Your version of Python: {}'.format(sys.version_info[0])) 10 | sys.exit(1) # return non-zero value for failure 11 | 12 | long_description = 'A tool for hydrogeologists to upload and display hydrographs and geochemical data' 13 | 14 | try: 15 | import pypandoc 16 | 17 | long_description = pypandoc.convert('README.md', 'rst') 18 | except: 19 | pass 20 | 21 | setup(name='wellapplication', 22 | description = 'Interface with xle files; analyze hydrographs; plot hydrographs; download USGS data', 23 | long_description = long_description, 24 | version = '0.5.10', 25 | author = 'Paul Inkenbrandt', 26 | author_email = 'paulinkenbrandt@utah.gov', 27 | url = 'https://github.com/inkenbrandt/WellApplication', 28 | license = 'LICENSE.txt', 29 | install_requires=["Pandas >= 0.16.0", 30 | "Numpy >= 0.7.0", 31 | "Matplotlib >= 1.1", 32 | "xmltodict >= 0.6.2", 33 | "scipy >= 0.10.0", 34 | "pyproj >= 1.9.4", 35 | "requests >= 2.11.1", 36 | "xlrd >= 0.5.4"], 37 | packages = find_packages(exclude=['contrib', 'docs', 'tests*'])) 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /test/usgsP.csv: -------------------------------------------------------------------------------- 1 | ,PO4,month,year 2 | 0,0.07,1,1972 3 | 1,0.33,1,1973 4 | 2,0.7,1,1974 5 | 3,0.08,1,1975 6 | 4,0.04,1,1976 7 | 5,0.05,1,1977 8 | 6,0.14,1,1978 9 | 7,0.08,1,1979 10 | 8,0.11,2,1972 11 | 9,0.24,2,1973 12 | 10,0.17,2,1974 13 | 11,0.11,2,1978 14 | 12,0.04,2,1979 15 | 13,0.6,3,1972 16 | 14,0.12,3,1973 17 | 15,0.16,3,1974 18 | 16,0.14,3,1976 19 | 17,0.05,3,1976 20 | 18,0.03,3,1977 21 | 19,0.02,3,1978 22 | 20,0.02,3,1979 23 | 21,0.1,4,1972 24 | 22,0.08,4,1973 25 | 23,1.2,4,1974 26 | 24,0.11,4,1975 27 | 25,0.06,4,1975 28 | 26,0.05,4,1976 29 | 27,0.04,4,1977 30 | 28,0.06,4,1978 31 | 29,0.01,4,1979 32 | 30,0.04,5,1972 33 | 31,0.03,5,1973 34 | 32,0.12,5,1974 35 | 33,0.09,5,1975 36 | 34,0.02,5,1976 37 | 35,0.04,5,1977 38 | 36,0.03,5,1978 39 | 37,0.03,5,1979 40 | 38,0.05,6,1972 41 | 39,0.01,6,1973 42 | 40,0.05,6,1974 43 | 41,0.05,6,1975 44 | 42,0.03,6,1978 45 | 43,0.01,6,1979 46 | 44,0.04,7,1972 47 | 45,0.04,7,1973 48 | 46,0.03,7,1974 49 | 47,0.02,7,1975 50 | 48,0.06,7,1977 51 | 49,0.02,7,1978 52 | 50,0.04,7,1979 53 | 51,0.05,8,1972 54 | 52,0.06,8,1973 55 | 53,0.03,8,1974 56 | 54,0.05,8,1975 57 | 55,0.07,8,1976 58 | 56,0.08,8,1977 59 | 57,0.06,8,1978 60 | 58,0.02,8,1979 61 | 59,0.1,9,1972 62 | 60,0.09,9,1973 63 | 61,0.06,9,1974 64 | 62,0.1,9,1975 65 | 63,0.08,9,1977 66 | 64,0.05,9,1978 67 | 65,0.06,9,1979 68 | 66,0.13,10,1972 69 | 67,0.13,10,1973 70 | 68,0.12,10,1974 71 | 69,0.13,10,1975 72 | 70,0.13,10,1976 73 | 71,0.09,10,1977 74 | 72,0.1,10,1978 75 | 73,0.33,10,1979 76 | 74,0.14,11,1972 77 | 75,0.42,11,1973 78 | 76,0.47,11,1977 79 | 77,0.14,11,1978 80 | 78,0.13,12,1972 81 | 79,0.15,12,1973 82 | 80,0.12,12,1974 83 | 81,0.09,12,1974 84 | 82,0.05,12,1975 85 | 83,0.3,12,1977 86 | 84,0.07,12,1978 87 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | v0.2.4, 2016-01-23 -- added to readme 2 | v0.2.2, 2016-01-22 -- added tests 3 | v0.2.0, 2016-01-22 -- added to readme and docs; changed setup file 4 | v0.1.7, 2016-01-22 -- added to readme and docs 5 | v0.1.6, 2016-01-18 -- added to readme and docs 6 | v0.1.5, 2016-01-18 -- added features; fixed bugs 7 | v0.1.4, 2016-01-17 -- added features 8 | v0.1.3, 2016-01-17 -- added features 9 | v0.1.2, 2016-01-16 -- added features 10 | v0.1.1, 2016-01-16 -- reupload 11 | v0.1.0, 2016-01-16 -- added a ton of functions, sped up usgs functions 12 | v0.0.39, 2016-01-08 -- urlliberr 13 | v0.0.38, 2016-01-08 -- added usgs functions 14 | v0.0.36, 2016-01-08 -- added gantt function 15 | v0.0.35, 2016-01-07 -- added fdc function 16 | v0.0.28, 2016-01-07 -- added piper class 17 | v0.0.27, 2016-01-04 -- added functions 18 | v0.0.26, 2016-01-04 -- indentation 19 | v0.0.25, 2016-01-04 -- coerce to numeric 20 | v0.0.24, 2016-01-03 -- bug fix, added docs 21 | v0.0.23, 2016-01-03 -- bug fix, added chem.py, fixed WQP 22 | v0.0.22, 2016-01-03 -- bug fix, added chem.py 23 | v0.0.18, 2016-01-03 -- version 24 | v0.0.17, 2016-01-03 -- fixed class 25 | v0.0.16, 2016-01-03 -- fixed class 26 | v0.0.15, 2016-01-03 -- fixed class 27 | v0.0.14, 2016-01-03 -- fixed class 28 | v0.0.13, 2016-01-03 -- fixed variable 29 | v0.0.12, 2016-01-03 -- fixed variable 30 | v0.0.11, 2016-01-03 -- added class 31 | v0.0.10, 2016-01-03 -- fixed standard library dependency 32 | v0.0.9, 2016-01-03 -- fixed standard library dependency added docs 33 | v0.0.8, 2016-01-03 -- added class usgsGis in gis 34 | v0.0.7, 2016-01-03 -- improved readme (.rst) 35 | v0.0.6, 2016-01-01 -- connected object in init 36 | v0.0.5, 2016-01-01 -- renamed file, added class 37 | v0.0.4, 2016-01-01 -- Improved Setup file 38 | v0.0.3, 2016-01-01 -- Added to Readme 39 | v0.0.2, 2016-01-01 -- Added to Readme 40 | v0.0.1, 2016-01-01 -- Initial release. 41 | -------------------------------------------------------------------------------- /docs/Transport.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import pandas as pd\n", 13 | "import platform\n", 14 | "import sys\n", 15 | "import numpy as np\n", 16 | "import matplotlib.pyplot as plt\n", 17 | "import matplotlib\n", 18 | "from pylab import rcParams\n", 19 | "rcParams['figure.figsize'] = 10, 10" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import wellapplication as wa" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "Operating System Linux 4.4.0-59-generic\n", 45 | "Python Version 2.7.11+ (default, Apr 17 2016, 14:00:29) \n", 46 | "[GCC 5.3.1 20160413]\n", 47 | "Pandas Version 0.19.1\n", 48 | "Numpy Version 1.11.2\n", 49 | "Matplotlib Version 1.5.3\n", 50 | "WellApplication Version 0.4.21\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "print(\"Operating System \" + platform.system() + \" \" + platform.release())\n", 56 | "print(\"Python Version \" + str(sys.version))\n", 57 | "print(\"Pandas Version \" + str(pd.__version__))\n", 58 | "print(\"Numpy Version \" + str(np.__version__))\n", 59 | "print(\"Matplotlib Version \" + str(matplotlib.__version__))\n", 60 | "print(\"WellApplication Version \" + str(wa.__version__))" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "'/home/pi/PycharmProjects/WellApplication/test'" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "collapsed": true 79 | }, 80 | "outputs": [], 81 | "source": [] 82 | } 83 | ], 84 | "metadata": { 85 | "kernelspec": { 86 | "display_name": "Python 2", 87 | "language": "python", 88 | "name": "python2" 89 | }, 90 | "language_info": { 91 | "codemirror_mode": { 92 | "name": "ipython", 93 | "version": 2 94 | }, 95 | "file_extension": ".py", 96 | "mimetype": "text/x-python", 97 | "name": "python", 98 | "nbconvert_exporter": "python", 99 | "pygments_lexer": "ipython2", 100 | "version": "2.7.11+" 101 | } 102 | }, 103 | "nbformat": 4, 104 | "nbformat_minor": 0 105 | } 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://zenodo.org/badge/48931715.svg)](https://zenodo.org/badge/latestdoi/48931715) 2 | [![Build Status](https://travis-ci.org/inkenbrandt/WellApplication.svg?branch=master)](https://travis-ci.org/inkenbrandt/WellApplication) 3 | [![PyPI version](https://badge.fury.io/py/WellApplication.svg)](https://badge.fury.io/py/WellApplication) 4 | [![codecov](https://codecov.io/gh/inkenbrandt/WellApplication/branch/master/graph/badge.svg)](https://codecov.io/gh/inkenbrandt/WellApplication) 5 | [![Binder](http://mybinder.org/badge.svg)](http://mybinder.org:/repo/inkenbrandt/wellapplication) 6 | ![Coolness](https://img.shields.io/badge/Coolness-very-brightgreen.svg) 7 | 8 | 9 | Developed with funding from the U.S. EPA Exchange Network 10 | 11 | # Well Application 12 | 13 | Set of tools for groundwater level and water chemistry analysis. Allows for rapid download and graphing of data from the USGS NWIS database and the Water Quality Portal. 14 | 15 | ## Installation 16 | Wellapplication should be compatible with both Python 2.7 and 3.5. It has been tested most rigously on Python 2.7. It should work on both 32 and 64-bit platforms. I have used it on Linux and Windows machines. 17 | 18 | To install the most recent version, use pip. 19 | ```Bash 20 | pip install wellapplication 21 | ``` 22 | ## Modules 23 | 24 | ### transport 25 | 26 | This module: 27 | 28 | * allows a user to upload data from an .xle file common with some water well transducers. 29 | 30 | * matches well and barometric data to same sample intervals 31 | 32 | * adjust with manual measurements 33 | 34 | * removes skips and jumps from data 35 | 36 | This class has functions used to import transducer data and condition it for analysis. 37 | 38 | The most important function in this library is `new_xle_imp`, which uses the path and filename of an xle file, commonly produced by pressure transducers, to convert that file into a Pandas DataFrame. 39 | 40 | A Jupyter Notebook using some of the transport functions can be found here. 41 | 42 | ### usgs 43 | 44 | This module has functions used to apply the USGS's rest-based api to download USGS data by leveraging `requests` package and Pandas. 45 | 46 | The most powerful class in this module is `nwis`. It is called by `nwis(service, location value, location type)`. 47 | The main USGS services are `dv` for daily values, `iv` for instantaneous values, `gwlevels` for groundwater levels, and `site` for site information. The `nwis` class allows for rapid download of NWIS data. 48 | 49 | ```Python 50 | >>> import wellapplication as wa 51 | >>> discharge = wa.nwis('dv','10109000','sites') 52 | >>> site_data = discharge.sites 53 | >>> flow_data = discharge.data 54 | ``` 55 | 56 | A Jupyter Notebook using some of the usgs functions can be found here. 57 | 58 | -------------------------------------------------------------------------------- /.idea/markdown-navigator.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 33 | 34 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /wellapplication/MannKendall.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Jan 8 19:55:22 2016 4 | 5 | @author: p 6 | """ 7 | from __future__ import absolute_import, division, print_function, unicode_literals 8 | import numpy as np 9 | import pandas as pd 10 | from scipy.stats import norm 11 | 12 | import sys 13 | 14 | if sys.version_info >= (3, 0): 15 | def xrange(*args, **kwargs): 16 | return iter(range(*args, **kwargs)) 17 | 18 | def mk_test(x, alpha = 0.05): 19 | """This perform the MK (Mann-Kendall) test to check if there is any trend present in 20 | data or not 21 | 22 | Args: 23 | x: a vector of data 24 | alpha: significance level 25 | 26 | Returns: 27 | trend: tells the trend (increasing, decreasing or no trend) 28 | h: True (if trend is present) or False (if trend is absence) 29 | p: p value of the sifnificance test 30 | z: normalized test statistics 31 | 32 | Examples:: 33 | >>> x = np.random.rand(100) 34 | >>> trend = mk_test(x,0.05) 35 | >>> print(trend.trend) 36 | increasing 37 | 38 | Credit: http://pydoc.net/Python/ambhas/0.4.0/ambhas.stats/ 39 | """ 40 | n = len(x) 41 | ta = n*(n-1)/2 42 | # calculate S 43 | s = 0 44 | for k in xrange(n-1): 45 | for j in xrange(k+1,n): 46 | s += np.sign(x[j] - x[k]) 47 | 48 | # calculate the unique data 49 | unique_x = np.unique(x) 50 | g = len(unique_x) 51 | 52 | # calculate the var(s) 53 | if n == g: # there is no tie 54 | var_s = (n*(n-1)*(2*n+5))/18 55 | else: # there are some ties in data 56 | tp = np.zeros(unique_x.shape) 57 | for i in xrange(len(unique_x)): 58 | tp[i] = sum(unique_x[i] == x) 59 | var_s = (n*(n-1)*(2*n+5) - np.sum(tp*(tp-1)*(2*tp+5)))/18 60 | 61 | if s>0: 62 | z = (s - 1)/np.sqrt(var_s) 63 | elif s == 0: 64 | z = 0 65 | elif s<0: 66 | z = (s + 1)/np.sqrt(var_s) 67 | else: 68 | z = 0 69 | 70 | # calculate the p_value 71 | p = 2*(1- norm.cdf(abs(z))) # two tail test 72 | h = abs(z) > norm.ppf(1-alpha/2) 73 | 74 | if (z<0) and h: 75 | trend = 'decreasing' 76 | elif (z>0) and h: 77 | trend = 'increasing' 78 | else: 79 | trend = 'no trend' 80 | 81 | return pd.Series({'trend':trend, 'varS':round(var_s,3), 'p':round(p,3), 'z':round(z,3), 's':round(s,3), 'n':n, 'ta':ta}) 82 | 83 | def mk_ts(df, const, group1, orderby = 'year', alpha = 0.05): 84 | """ 85 | df = dataframe 86 | const = variable tested for trend 87 | group1 = variable to group by 88 | orderby = variable to order by (typically a date) 89 | """ 90 | 91 | def zcalc(Sp, Varp): 92 | if Sp > 0: 93 | return (Sp - 1)/Varp**0.5 94 | elif Sp < 0: 95 | return (Sp + 1)/Varp**0.5 96 | else: 97 | return 0 98 | 99 | df.is_copy = False 100 | 101 | df[const] = pd.to_numeric(df.ix[:,const]) 102 | # remove null values 103 | df[const].dropna(inplace=True) 104 | # remove index 105 | df.reset_index(inplace=True, drop=True) 106 | # sort by groups, then time 107 | df.sort_values(by=[group1,orderby],axis=0, inplace=True) 108 | 109 | # group by group and apply mk_test 110 | dg = df.groupby(group1).apply(lambda x: mk_test(x.loc[:,const].dropna().values, alpha)) 111 | Var_S = dg.loc[:,'varS'].sum() 112 | S = dg.loc[:,'s'].sum() 113 | N = dg.loc[:,'n'].sum() 114 | Z = zcalc(S,Var_S) 115 | P = 2*(1-norm.cdf(abs(Z))) 116 | group_n = len(dg) 117 | h = abs(Z) > norm.ppf(1-alpha/2) 118 | tau = S/dg.loc[:,'ta'].sum() 119 | 120 | if (Z<0) and h: 121 | trend = 'decreasing' 122 | elif (Z>0) and h: 123 | trend = 'increasing' 124 | else: 125 | trend = 'no trend' 126 | 127 | 128 | return pd.Series({'S':S, 'Z':round(Z,2), 'p':P, 'trend':trend, 'group_n':group_n, 'sample_n':N, 'Var_S':Var_S, 'tau':round(tau,2)}) 129 | 130 | 131 | -------------------------------------------------------------------------------- /wellapplication/hydropy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hydropy package 3 | @author: Stijn Van Hoey 4 | from: https://github.com/stijnvanhoey/hydropy/tree/master/hydropy 5 | for a better and more up to date copy of this script go to the original repo. 6 | """ 7 | from __future__ import absolute_import, division, print_function, unicode_literals 8 | import pandas as pd 9 | import numpy as np 10 | from scipy.optimize import curve_fit 11 | 12 | 13 | def get_baseflow_chapman(flowserie, recession_time): 14 | """ 15 | Parameters 16 | ---------- 17 | flowserie : pd.TimeSeries 18 | River discharge flowserie 19 | recession_time : float [0-1] 20 | recession constant 21 | Notes 22 | ------ 23 | $$Q_b(i) = \frac{k}{2-k}Q_b(i-1) + \frac{1-k}{2-k}Q(i)$$ 24 | """ 25 | 26 | secterm = (1.-recession_time)*flowserie/(2.-recession_time) 27 | 28 | baseflow = np.empty(flowserie.shape[0]) 29 | for i, timestep in enumerate(baseflow): 30 | if i == 0: 31 | baseflow[i] = 0.0 32 | else: 33 | baseflow[i] = recession_time*baseflow[i-1]/(2.-recession_time) + \ 34 | secterm.values[i] 35 | baseflow = pd.DataFrame(baseflow, index=flowserie.index) 36 | return baseflow 37 | 38 | 39 | def get_baseflow_boughton(flowserie, recession_time, baseflow_index): 40 | """ 41 | Parameters 42 | ---------- 43 | flowserie : pd.TimeSeries 44 | River discharge flowserie 45 | recession_time : float [0-1] 46 | recession constant 47 | baseflow_index : float 48 | Notes 49 | ------ 50 | $$Q_b(i) = \frac{k}{1+C}Q_b(i-1) + \frac{C}{1+C}Q(i)$$ 51 | """ 52 | 53 | parC = baseflow_index 54 | 55 | secterm = parC*flowserie/(1 + parC) 56 | 57 | baseflow = np.empty(flowserie.shape[0]) 58 | for i, timestep in enumerate(baseflow): 59 | if i == 0: 60 | baseflow[i] = 0.0 61 | else: 62 | baseflow[i] = recession_time*baseflow[i-1]/(1 + parC) + \ 63 | secterm.values[i] 64 | return pd.DataFrame(baseflow, index=flowserie.index) 65 | 66 | 67 | def get_baseflow_ihacres(flowserie, recession_time, baseflow_index, alfa): 68 | """ 69 | Parameters 70 | ---------- 71 | flowserie : pd.TimeSeries 72 | River discharge flowserie 73 | recession_time : float [0-1] 74 | recession constant 75 | Notes 76 | ------ 77 | $$Q_b(i) = \frac{k}{1+C}Q_b(i-1) + \frac{C}{1+C}[Q(i)+\alpha Q(i-1)]$$ 78 | $\alpha$ < 0. 79 | """ 80 | 81 | parC = baseflow_index 82 | 83 | secterm = parC/(1 + parC) 84 | 85 | baseflow = np.empty(flowserie.shape[0]) 86 | for i, timestep in enumerate(baseflow): 87 | if i == 0: 88 | baseflow[i] = 0.0 89 | else: 90 | baseflow[i] = recession_time * baseflow[i-1]/(1 + parC) + \ 91 | secterm * (flowserie.values[i] + 92 | alfa * flowserie.values[i-1]) 93 | return pd.DataFrame(baseflow, index=flowserie.index) 94 | 95 | def exp_curve(x, a, b): 96 | """Exponential curve used for rating curves""" 97 | return (a * x**b) 98 | 99 | def ratingCurve(discharge, stage): 100 | """Computes rating curve based on discharge measurements coupled with stage 101 | readings. 102 | discharge = array of measured discharges; 103 | stage = array of corresponding stage readings; 104 | Returns coefficients a, b for the rating curve in the form y = a * x**b 105 | """ 106 | 107 | popt, pcov = curve_fit(exp_curve, stage, discharge) 108 | 109 | def r_squ(): 110 | a = 0.0 111 | b = 0.0 112 | for i, j in zip(discharge, stage): 113 | a += (i - exp_curve(j, popt[0], popt[1]))**2 114 | b += (i - np.mean(discharge))**2 115 | return 1 - a / b 116 | 117 | return popt, r_squ() 118 | 119 | def RB_Flashiness(series): 120 | """Richards-Baker Flashiness Index for a series of daily mean discharges. 121 | https://github.com/hydrogeog/hydro""" 122 | Qsum = np.sum(series) # sum of daily mean discharges 123 | Qpath = 0.0 124 | for i in range(len(series)): 125 | if i == 0: 126 | Qpath = series[i] # first entry only 127 | else: 128 | Qpath += np.abs(series[i] - series[i-1]) # sum the absolute differences of the mean discharges 129 | return Qpath/Qsum 130 | 131 | 132 | def flow_duration(series): 133 | """Creates the flow duration curve for a discharge dataset. Returns a pandas 134 | series whose index is the discharge values and series is exceedance probability. 135 | https://github.com/hydrogeog/hydro""" 136 | fd = pd.Series(series).value_counts() # frequency of unique values 137 | fd.sort_index(inplace=True) # sort in order of increasing discharges 138 | fd = fd.cumsum() # cumulative sum of frequencies 139 | fd = fd.apply(lambda x: 100 - x/fd.max() * 100) # normalize 140 | return fd 141 | 142 | def Lyne_Hollick(series, alpha=.925, direction='f'): 143 | """Recursive digital filter for baseflow separation. Based on Lyne and Hollick, 1979. 144 | series = array of discharge measurements 145 | alpha = filter parameter 146 | direction = (f)orward or (r)everse calculation 147 | https://github.com/hydrogeog/hydro 148 | """ 149 | series = np.array(series) 150 | f = np.zeros(len(series)) 151 | if direction == 'f': 152 | for t in np.arange(1,len(series)): 153 | f[t] = alpha * f[t-1] + (1 + alpha)/2 * (series[t] - series[t-1]) 154 | if series[t] - f[t] > series[t]: 155 | f[t] = 0 156 | elif direction == 'r': 157 | for t in np.arange(len(series)-2, 1, -1): 158 | f[t] = alpha * f[t+1] + (1 + alpha)/2 * (series[t] - series[t+1]) 159 | if series[t] - f[t] > series[t]: 160 | f[t] = 0 161 | return np.array(series - f) 162 | 163 | def Eckhardt(series, alpha=.98, BFI=.80): 164 | """Recursive digital filter for baseflow separation. Based on Eckhardt, 2004. 165 | series = array of discharge measurements 166 | alpha = filter parameter 167 | BFI = BFI_max (maximum baseflow index) 168 | https://github.com/hydrogeog/hydro 169 | """ 170 | series = np.array(series) 171 | f = np.zeros(len(series)) 172 | f[0] = series[0] 173 | for t in np.arange(1,len(series)): 174 | f[t] = ((1 - BFI) * alpha * f[t-1] + (1 - alpha) * BFI * series[t]) / (1 - alpha * BFI) 175 | if f[t] > series[t]: 176 | f[t] = series[t] 177 | return f 178 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Jan 23 13:03:00 2016 4 | 5 | @author: p 6 | """ 7 | from __future__ import absolute_import, division, print_function, unicode_literals 8 | import wellapplication as wa 9 | import pandas as pd 10 | import matplotlib 11 | import sys 12 | sys.path.append('../') 13 | import numpy as np 14 | 15 | m = wa.Meso(token='demotoken') 16 | 17 | def test_getelev(): 18 | print('Testing getelev') 19 | x = [-111.21, 41.4] 20 | m = wa.get_elev(x) 21 | assert m > 100.0 22 | 23 | def test_gethuc(): 24 | print('Testing gethuc') 25 | x = [-111.21, 41.4] 26 | huc_data = wa.get_huc(x) 27 | assert len(huc_data[0])>0 28 | 29 | def test_USGSID(): 30 | print('Testing USGSID') 31 | x = [-111.21, 41.4] 32 | usgs_id = wa.USGSID(x) 33 | assert usgs_id == '412400111123601' 34 | 35 | def test_nwis(): 36 | nw = wa.nwis('dv', '01585200', 'sites') 37 | assert len(nw.sites) == 1 38 | 39 | def test_nwis_gw(): 40 | nw = wa.nwis('gwlevels','16010204','huc',siteStatus='all') 41 | df = nw.avg_wl() 42 | assert len(df) > 5 43 | 44 | def test_fdc(): 45 | d16 = wa.nwis('dv','01659500','sites') 46 | ci = wa.fdc(d16.data,'value',1900,2016) 47 | assert type(ci[0]) == list 48 | 49 | def test_mktest(): 50 | x = range(0,100) 51 | trend = wa.MannKendall.mk_test(x,0.05) 52 | assert trend.trend == 'increasing' 53 | 54 | #def test_pipe(): 55 | # Chem = {'Type':[1,2,2,3], 'Cl':[1.72,0.90,4.09,1.52], 'HCO3':[4.02,1.28,4.29,3.04], 56 | # 'SO4':[0.58,0.54,0.38,0.46], 'NaK':[1.40,0.90,3.38,2.86], 'Ca':[4.53,None,4.74,1.90], 57 | # 'Mg':[0.79,0.74,0.72,0.66], 'EC':[672.0,308.0,884.0,542.0], 'NO3':[0.4,0.36,0.08,0.40], 58 | # 'Sicc':[0.21,0.56,None,-0.41]} 59 | # chem = pd.DataFrame(Chem) 60 | # pipr = wa.piper(chem) 61 | # assert type(pipr.plot) == matplotlib.figure.Figure 62 | 63 | def test_new_xle_imp(): 64 | xle = 'test/20160919_LittleHobble.xle' 65 | xle_df = wa.new_xle_imp(xle) 66 | assert len(xle_df) > 0 67 | 68 | def test_xle_head_table(): 69 | xle_dir = 'test/' 70 | dir_df = wa.xle_head_table(xle_dir) 71 | assert len(xle_dir) > 0 72 | 73 | def test_dataendclean(): 74 | xle = 'test/20160919_LittleHobble.xle' 75 | df = wa.new_xle_imp(xle) 76 | x = 'Level' 77 | xle1 = wa.dataendclean(df, x) 78 | assert len(xle1) > 1 79 | 80 | def test_smoother(): 81 | xle = 'test/20160919_LittleHobble.xle' 82 | df = wa.new_xle_imp(xle) 83 | x = 'Level' 84 | xle1 = wa.smoother(df, x, sd=1) 85 | assert len(xle1) > 1 86 | 87 | def test_hourly_resample(): 88 | xle = 'test/20160919_LittleHobble.xle' 89 | df = wa.new_xle_imp(xle) 90 | xle1 = wa.hourly_resample(df, minutes=30) 91 | 92 | # Basic Function Tests 93 | def testvars(): 94 | var_list = m.variables() 95 | 96 | def testmetadata(): 97 | stations = m.metadata(radius=['wbb', 5]) 98 | 99 | def test_WQP(): 100 | wqq = wa.WQP('16010204','huc') 101 | wqq.results = wqq.massage_results() 102 | pivchem = wqq.piv_chem() 103 | assert 'Alk' in pivchem.columns 104 | 105 | def test_WQ2(): 106 | wqq = wa.WQP('16010204','huc') 107 | wqq.stations = wqq.massage_stations() 108 | df = wqq.stations 109 | assert "OrgId" in list(df.columns) 110 | 111 | #def test_imp_new_well(): 112 | # inputfile = "test/ag13c 2016-08-02.xle" 113 | # manualwls = "test/All tape measurements.csv" 114 | # manual = pd.read_csv(manualwls, index_col="DateTime", engine="python") 115 | # barofile = "test/baro.csv" 116 | # baro = pd.read_csv(barofile,index_col=0, parse_dates=True) 117 | # wellinfo = pd.read_csv("test/wellinfo4.csv") 118 | # g, drift, wellname = wa.fix_well(wellinfo,inputfile, manual, baro) 119 | # assert wellname == 'ag13c' 120 | 121 | def test_well_baro_merge(): 122 | xle = "test/ag13c 2016-08-02.xle" 123 | xle_df = wa.new_xle_imp(xle) 124 | barofile = "test/baro.csv" 125 | baro = pd.read_csv(barofile,index_col=0, parse_dates=True) 126 | baro['Level'] = baro['pw03'] 127 | assert len(wa.well_baro_merge(xle_df, baro, sampint=60)) > 10 128 | 129 | #def test_fix_drift(): 130 | # xle = "test/ag13c 2016-08-02.xle" 131 | # xle_df = wa.new_xle_imp(xle) 132 | # manualwls = "test/All tape measurements.csv" 133 | # manual = pd.read_csv(manualwls, index_col="DateTime", engine="python") 134 | # manual35 = manual[manual['WellID']==35] 135 | # manual35['dt'] = pd.to_datetime(manual35.index) 136 | # manual_35 = manual35.reset_index() 137 | # manual_35.set_index('dt',inplace=True) 138 | # fd = wa.fix_drift(xle_df, manual_35, meas='Level', corrwl='Level', 139 | # manmeas='MeasuredDTW', outcolname='DriftCorrection') 140 | # assert 'DriftCorrection' in list(fd[0].columns) 141 | 142 | def test_getwellid(): 143 | inputfile = "test/ag13c 2016-08-02.xle" 144 | wellinfo = pd.read_csv("test/wellinfo4.csv") 145 | wid = wa.getwellid(inputfile, wellinfo) 146 | assert wid[1] == 35 147 | 148 | def test_barodistance(): 149 | wellinfo = pd.read_csv("test/wellinfo4.csv") 150 | bd = wa.barodistance(wellinfo) 151 | assert 'closest_baro' in list(bd.columns) 152 | 153 | #def test_imp_new_well_csv(): 154 | # inputfile = "test/ag14a 2016-08-02.csv" 155 | # manualwls = "test/All tape measurements.csv" 156 | # manual = pd.read_csv(manualwls, index_col="DateTime", engine="python") 157 | # barofile = "test/baro.csv" 158 | # baro = pd.read_csv(barofile,index_col=0, parse_dates=True) 159 | # wellinfo = pd.read_csv("test/wellinfo4.csv") 160 | # g, drift, wellname = wa.imp_new_well(inputfile, wellinfo, manual, baro) 161 | # assert wellname == 'ag14a' 162 | 163 | def test_jumpfix(): 164 | xle = "test/ag13c 2016-08-02.xle" 165 | df = wa.new_xle_imp(xle) 166 | jf = wa.jumpfix(df, 'Level', threashold=0.005) 167 | assert jf['newVal'][-1] > 10 168 | 169 | def test_gantt(): 170 | ashley = wa.nwis('dv', '09265500', 'sites') 171 | gn = wa.gantt(ashley.data, stations=['value']) 172 | assert type(gn.gantt()[2]) == matplotlib.figure.Figure 173 | 174 | #def test_scatterColor(): 175 | # x = np.arange(1, 100, 1) 176 | # y = np.arange(0.1, 10.0, 0.1) 177 | # w = np.arange(5, 500, 5) 178 | # out = wa.scatterColor(x, y, w) 179 | # assert round(out[0], 1) == 0.1 180 | 181 | def test_get_info(): 182 | nw = wa.nwis('gwlevels', '16010204', 'huc', siteStatus='all') 183 | df = nw.get_info(siteStatus='all') 184 | assert 'site_no' in list(df.columns) 185 | 186 | #def test_recess(): 187 | # ashley = wa.nwis('dv', '09265500', 'sites', startDT='2015-06-02', endDT='2015-06-14') 188 | # rec = wa.graphs.recess(ashley.data, 'value', st=[2015, 6, 2]) 189 | # assert round(rec.rec_results[0], 2) == 0.04 190 | 191 | #def test_get_recess_int(): 192 | # ashley = wa.nwis('dv', '09265500','sites', startDT='2015-01-02' ,endDT='2015-10-14') 193 | # assert type(wa.get_recess_int(ashley.data, 'value')[0]) == pd.DataFrame 194 | 195 | def test_mk_ts(): 196 | usgsP = pd.read_csv('test/usgsP.csv') 197 | var = wa.MannKendall.mk_ts(usgsP, 'PO4', 'month', 'year',0.05) 198 | assert var[0] == -87.0 199 | -------------------------------------------------------------------------------- /wellapplication/arcpy_functions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | import pandas as pd 4 | 5 | 6 | from .transport import * 7 | 8 | try: 9 | import arcpy 10 | 11 | arcpy.env.overwriteOutput = True 12 | 13 | except ImportError: 14 | pass 15 | 16 | 17 | def imp_one_well(well_file, baro_file, man_startdate, man_start_level, man_endate, man_end_level, 18 | conn_file_root, 19 | wellid, be=None, well_table="UGGP.UGGPADMIN.UGS_NGWMN_Monitoring_Locations", 20 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading", drift_tol=0.3, override=False): 21 | import arcpy 22 | arcpy.env.workspace = conn_file_root 23 | 24 | if os.path.splitext(well_file)[1] == '.xle': 25 | trans_type = 'Solinst' 26 | else: 27 | trans_type = 'Global Water' 28 | 29 | printmes('Trans type for well is {:}.'.format(trans_type)) 30 | 31 | welltable = table_to_pandas_dataframe(well_table, query="AlternateID is not Null") 32 | 33 | well = new_trans_imp(well_file) 34 | baro = new_trans_imp(baro_file) 35 | 36 | 37 | corrwl = well_baro_merge(well, baro, vented=(trans_type != 'Solinst')) 38 | 39 | if be: 40 | corrwl = correct_be(wellid, welltable, corrwl, be=be) 41 | corrwl['corrwl'] = corrwl['BAROEFFICIENCYLEVEL'] 42 | 43 | stickup, well_elev = get_stickup_elev(wellid, well_table) 44 | 45 | man = pd.DataFrame( 46 | {'DateTime': [man_startdate, man_endate], 'MeasuredDTW': [man_start_level, man_end_level]}).set_index( 47 | 'DateTime') 48 | printmes(man) 49 | man['Meas_GW_Elev'] = well_elev - (man['MeasuredDTW'] - stickup) 50 | 51 | man['MeasuredDTW'] = man['MeasuredDTW'] * -1 52 | 53 | dft = fix_drift(corrwl, man, meas='corrwl', manmeas='MeasuredDTW') 54 | drift = round(float(dft[1]['drift'].values[0]), 3) 55 | printmes('Drift for well {:} is {:}.'.format(wellid, drift)) 56 | df = dft[0] 57 | 58 | rowlist, fieldnames = prepare_fieldnames(df, wellid, stickup, well_elev) 59 | 60 | if drift <= drift_tol: 61 | edit_table(rowlist, gw_reading_table, fieldnames) 62 | printmes('Well {:} successfully imported!'.format(wellid)) 63 | elif override == 1: 64 | edit_table(rowlist, gw_reading_table, fieldnames) 65 | printmes('Override initiated. Well {:} successfully imported!'.format(wellid)) 66 | else: 67 | printmes('Well {:} drift greater than tolerance!'.format(wellid)) 68 | return df, man, be, drift 69 | 70 | 71 | def find_extreme(site_number, gw_table="UGGP.UGGPADMIN.UGS_GW_reading", extma='max'): 72 | """ 73 | Find date extrema from a SDE table using query parameters 74 | :param site_number: LocationID of the site of interest 75 | :param gw_table: SDE table to be queried 76 | :param extma: options are 'max' (default) or 'min' 77 | :return: date of extrema, depth to water of extrema, water elevation of extrema 78 | """ 79 | import arcpy 80 | from arcpy import env 81 | env.overwriteOutput = True 82 | 83 | if extma == 'max': 84 | sort = 'DESC' 85 | else: 86 | sort = 'ASC' 87 | query = "LOCATIONID = '{:}'".format(site_number) 88 | field_names = ['READINGDATE', 'LOCATIONID', 'DTWBELOWGROUNDSURFACE', 'WATERELEVATION'] 89 | sql_sn = ('TOP 1', 'ORDER BY READINGDATE {:}'.format(sort)) 90 | # use a search cursor to iterate rows 91 | dateval, dtw, wlelev = [], [], [] 92 | 93 | envtable = os.path.join(env.workspace, gw_table) 94 | 95 | with arcpy.da.SearchCursor(envtable, field_names, query, sql_clause=sql_sn) as search_cursor: 96 | # iterate the rows 97 | for row in search_cursor: 98 | dateval.append(row[0]) 99 | dtw.append(row[1]) 100 | wlelev.append(row[2]) 101 | if len(dateval) < 1: 102 | return None, 0, 0 103 | else: 104 | return dateval[0], dtw[0], wlelev[0] 105 | 106 | 107 | def get_field_names(table): 108 | read_descr = arcpy.Describe(table) 109 | field_names = [] 110 | for field in read_descr.fields: 111 | field_names.append(field.name) 112 | field_names.remove('OBJECTID') 113 | return field_names 114 | 115 | def get_gap_data(site_number, enviro, gap_tol = 0.5, 116 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading"): 117 | arcpy.env.workspace = enviro 118 | first_date = datetime.datetime(1900, 1, 1) 119 | last_date = datetime.datetime.now() 120 | 121 | query_txt = "LOCATIONID = '{:}' AND TAPE = 0" 122 | query = query_txt.format(site_number) 123 | 124 | sql_sn = (None, 'ORDER BY READINGDATE ASC') 125 | 126 | fieldnames = ['READINGDATE'] 127 | 128 | #readings = wa.table_to_pandas_dataframe(gw_reading_table, fieldnames, query, sql_sn) 129 | 130 | dt = [] 131 | 132 | # use a search cursor to iterate rows 133 | with arcpy.da.SearchCursor(gw_reading_table, 'READINGDATE', query, sql_clause=sql_sn) as search_cursor: 134 | # iterate the rows 135 | for row in search_cursor: 136 | # combine the field names and row items together, and append them 137 | dt.append(row[0]) 138 | 139 | df = pd.Series(dt,name='DateTime') 140 | df = df.to_frame() 141 | df['hr_diff'] = df['DateTime'].diff() 142 | df.set_index('DateTime',inplace=True) 143 | df['julian'] = df.index.to_julian_date() 144 | df['diff'] = df['julian'].diff() 145 | df['is_gap'] = df['diff'] > gap_tol 146 | def rowIndex(row): 147 | return row.name 148 | df['gap_end'] = df.apply(lambda x: rowIndex(x) if x['is_gap'] else pd.NaT, axis=1) 149 | df['gap_start'] = df.apply(lambda x: rowIndex(x) - x['hr_diff'] if x['is_gap'] else pd.NaT, axis=1) 150 | df = df[df['is_gap'] == True] 151 | return df 152 | 153 | 154 | 155 | def table_to_pandas_dataframe(table, field_names=None, query=None, sql_sn=(None, None)): 156 | """ 157 | Load data into a Pandas Data Frame for subsequent analysis. 158 | :param table: Table readable by ArcGIS. 159 | :param field_names: List of fields. 160 | :param query: SQL query to limit results 161 | :param sql_sn: sort fields for sql; see http://pro.arcgis.com/en/pro-app/arcpy/functions/searchcursor.htm 162 | :return: Pandas DataFrame object. 163 | """ 164 | 165 | # if field names are not specified 166 | if not field_names: 167 | field_names = get_field_names(table) 168 | # create a pandas data frame 169 | df = pd.DataFrame(columns=field_names) 170 | 171 | # use a search cursor to iterate rows 172 | with arcpy.da.SearchCursor(table, field_names, query, sql_clause=sql_sn) as search_cursor: 173 | # iterate the rows 174 | for row in search_cursor: 175 | # combine the field names and row items together, and append them 176 | df = df.append(dict(zip(field_names, row)), ignore_index=True) 177 | 178 | # return the pandas data frame 179 | return df 180 | 181 | 182 | def edit_table(df, gw_reading_table, fieldnames): 183 | """ 184 | Edits SDE table by inserting new rows 185 | :param df: pandas DataFrame 186 | :param gw_reading_table: sde table to edit 187 | :param fieldnames: field names that are being appended in order of appearance in dataframe or list row 188 | :return: 189 | """ 190 | 191 | table_names = get_field_names(gw_reading_table) 192 | 193 | for name in fieldnames: 194 | if name not in table_names: 195 | fieldnames.remove(name) 196 | printmes("{:} not in {:} fieldnames!".format(name, gw_reading_table)) 197 | 198 | if len(fieldnames) > 0: 199 | subset = df[fieldnames] 200 | rowlist = subset.values.tolist() 201 | 202 | arcpy.env.overwriteOutput = True 203 | edit = arcpy.da.Editor(arcpy.env.workspace) 204 | edit.startEditing(False, False) 205 | edit.startOperation() 206 | 207 | cursor = arcpy.da.InsertCursor(gw_reading_table, fieldnames) 208 | for j in range(len(rowlist)): 209 | cursor.insertRow(rowlist[j]) 210 | 211 | del cursor 212 | edit.stopOperation() 213 | edit.stopEditing(True) 214 | else: 215 | printmes('No data imported!') 216 | 217 | 218 | def simp_imp_well(well_table, file, baro_out, wellid, manual, stbl_elev=True, 219 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading", drift_tol=0.3, override=False): 220 | """ 221 | Imports single well 222 | :param well_table: pandas dataframe of well data with ALternateID as index; needs altitude, be, stickup, and barolooger 223 | :param file: raw well file (xle, csv, or lev) 224 | :param baro_out: dictionary with barometer ID defining dataframe names 225 | :param wellid: unique ID of well field 226 | :param manual: manual data dataframe indexed by measure datetime 227 | :param stbl_elev: 228 | :param gw_reading_table: 229 | :param drift_tol: 230 | :param override: 231 | :return: 232 | """ 233 | # import well file 234 | well = new_trans_imp(file) 235 | 236 | file_ext = os.path.splitext(file)[1] 237 | if file_ext == '.xle': 238 | trans_type = 'Solinst' 239 | else: 240 | trans_type = 'Global Water' 241 | try: 242 | baroid = well_table.loc[wellid, 'BaroLoggerType'] 243 | printmes('{:}'.format(baroid)) 244 | corrwl = well_baro_merge(well, baro_out[str(baroid)], barocolumn='MEASUREDLEVEL', 245 | vented=(trans_type != 'Solinst')) 246 | except: 247 | corrwl = well_baro_merge(well, baro_out['9003'], barocolumn='MEASUREDLEVEL', 248 | vented=(trans_type != 'Solinst')) 249 | 250 | # be, intercept, r = clarks(corrwl, 'barometer', 'corrwl') 251 | # correct barometric efficiency 252 | wls, be = correct_be(wellid, well_table, corrwl) 253 | 254 | # get manual groundwater elevations 255 | # man, stickup, well_elev = self.get_gw_elevs(wellid, well_table, manual, stable_elev = stbl_elev) 256 | stdata = well_table[well_table['WellID'] == str(wellid)] 257 | man_sub = manual[manual['LOCATIONID'] == int(wellid)] 258 | well_elev = float(stdata['Altitude'].values[0]) # Should be in feet 259 | 260 | if stbl_elev: 261 | if stdata['Offset'].values[0] is None: 262 | stickup = 0 263 | printmes('Well ID {:} missing stickup!'.format(wellid)) 264 | else: 265 | stickup = float(stdata['Offset'].values[0]) 266 | else: 267 | 268 | stickup = man_sub.loc[man_sub.last_valid_index(), 'Current Stickup Height'] 269 | 270 | # manual = manual['MeasuredDTW'].to_frame() 271 | man_sub.loc[:, 'MeasuredDTW'] = man_sub['DTWBELOWCASING'] * -1 272 | man_sub.loc[:, 'Meas_GW_Elev'] = man_sub.loc[:, 'WATERELEVATION'] 273 | #man_sub.loc[:, 'Meas_GW_Elev'] = man_sub['MeasuredDTW'].apply(lambda x: float(well_elev) + (x + float(stickup)),1) 274 | printmes('Stickup: {:}, Well Elev: {:}'.format(stickup, well_elev)) 275 | 276 | # fix transducer drift 277 | 278 | dft = fix_drift(wls, man_sub, meas='BAROEFFICIENCYLEVEL', manmeas='MeasuredDTW') 279 | drift = np.round(float(dft[1]['drift'].values[0]), 3) 280 | 281 | df = dft[0] 282 | df.sort_index(inplace=True) 283 | first_index = df.first_valid_index() 284 | 285 | # Get last reading at the specified location 286 | read_max, dtw, wlelev = find_extreme(wellid) 287 | 288 | printmes("Last database date is {:}. First transducer reading is on {:}.".format(read_max, first_index)) 289 | 290 | rowlist, fieldnames = prepare_fieldnames(df, wellid, stickup, well_elev) 291 | 292 | if (read_max is None or read_max < first_index) and (drift < drift_tol): 293 | edit_table(rowlist, gw_reading_table, fieldnames) 294 | printmes(arcpy.GetMessages()) 295 | printmes("Well {:} imported.".format(wellid)) 296 | elif override and (drift < drift_tol): 297 | edit_table(rowlist, gw_reading_table, fieldnames) 298 | printmes(arcpy.GetMessages()) 299 | printmes("Override Activated. Well {:} imported.".format(wellid)) 300 | elif drift > drift_tol: 301 | printmes('Drift for well {:} exceeds tolerance!'.format(wellid)) 302 | else: 303 | printmes('Dates later than import data for well {:} already exist!'.format(wellid)) 304 | pass 305 | 306 | # except (ValueError, ZeroDivisionError): 307 | 308 | # drift = -9999 309 | # df = corrwl 310 | # pass 311 | return rowlist, man_sub, be, drift 312 | 313 | 314 | 315 | 316 | 317 | def upload_bp_data(df, site_number, return_df=False, gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading"): 318 | import arcpy 319 | 320 | df.sort_index(inplace=True) 321 | first_index = df.first_valid_index() 322 | 323 | # Get last reading at the specified location 324 | read_max, dtw, wlelev = find_extreme(site_number) 325 | 326 | if read_max is None or read_max < first_index: 327 | 328 | df['MEASUREDLEVEL'] = df['Level'] 329 | df['TAPE'] = 0 330 | df['LOCATIONID'] = site_number 331 | 332 | df.sort_index(inplace=True) 333 | 334 | fieldnames = ['READINGDATE', 'MEASUREDLEVEL', 'TEMP', 'LOCATIONID', 'TAPE'] 335 | 336 | if 'Temperature' in df.columns: 337 | df.rename(columns={'Temperature': 'TEMP'}, inplace=True) 338 | 339 | if 'TEMP' in df.columns: 340 | df['TEMP'] = df['TEMP'].apply(lambda x: np.round(x, 4), 1) 341 | else: 342 | df['TEMP'] = None 343 | 344 | df.index.name = 'READINGDATE' 345 | 346 | subset = df.reset_index() 347 | 348 | edit_table(subset, gw_reading_table, fieldnames) 349 | 350 | if return_df: 351 | return df 352 | 353 | else: 354 | printmes('Dates later than import data for this station already exist!') 355 | pass 356 | 357 | 358 | def get_location_data(site_number, enviro, first_date=None, last_date=None, limit=None, 359 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading"): 360 | arcpy.env.workspace = enviro 361 | if not first_date: 362 | first_date = datetime.datetime(1900, 1, 1) 363 | elif type(first_date) == str: 364 | try: 365 | datetime.datetime.strptime(first_date, '%m/%d/%Y') 366 | except: 367 | first_date = datetime.datetime(1900, 1, 1) 368 | # Get last reading at the specified location 369 | if not last_date or last_date > datetime.datetime.now(): 370 | last_date = datetime.datetime.now() 371 | 372 | query_txt = "LOCATIONID = '{:}' and (READINGDATE >= '{:%m/%d/%Y}' and READINGDATE <= '{:%m/%d/%Y}')" 373 | query = query_txt.format(site_number, first_date, last_date + datetime.timedelta(days=1)) 374 | printmes(query) 375 | sql_sn = (limit, 'ORDER BY READINGDATE ASC') 376 | 377 | fieldnames = get_field_names(gw_reading_table) 378 | 379 | readings = table_to_pandas_dataframe(gw_reading_table, fieldnames, query, sql_sn) 380 | readings.set_index('READINGDATE', inplace=True) 381 | if len(readings) == 0: 382 | printmes('No Records for location {:}'.format(site_number)) 383 | return readings 384 | -------------------------------------------------------------------------------- /docs/UMAR_WL_Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "This notebook provides Python scripts to import, compile, modify, graph, and export Solinst transducer data. " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "%matplotlib inline\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "import os\n", 22 | "import sys\n", 23 | "import platform\n", 24 | "import glob\n", 25 | "import re\n", 26 | "import xmltodict\n", 27 | "import matplotlib\n", 28 | "import matplotlib.pyplot as plt\n", 29 | "import matplotlib.dates as dates\n", 30 | "import matplotlib.ticker as tick\n", 31 | "from matplotlib.backends.backend_pdf import PdfPages\n", 32 | "import statsmodels.tsa.tsatools as tools\n", 33 | "from pandas.stats.api import ols\n", 34 | "from datetime import datetime\n", 35 | "from pylab import rcParams\n", 36 | "rcParams['figure.figsize'] = 15, 10" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "print(\"Operating System \" + platform.system() + \" \" + platform.release())\n", 48 | "print(\"Python Version \" + str(sys.version))\n", 49 | "print(\"Pandas Version \" + str(pd.__version__))\n", 50 | "print(\"Numpy Version \" + str(np.__version__))\n", 51 | "print(\"Matplotlib Version \" + str(matplotlib.__version__))" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "#rootname = '/media/p/Transcend/PROJECTS/UMAR/Phase_II/Data/RAW/'\n", 63 | "rootname = 'E:/PROJECTS/UMAR/Data/RAW/'" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Scat" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "def Scat(data,bp,wl):\n", 82 | " data['dwl'] = data[wl].diff()\n", 83 | " data['dbp'] = data[bp].diff()\n", 84 | "\n", 85 | " regression = ols(y=data['dwl'], x=data['dbp'])\n", 86 | " m = regression.beta.x\n", 87 | " b = regression.beta.intercept\n", 88 | " r = regression.r2\n", 89 | " #r = (regression.beta.r)**2\n", 90 | " plt.scatter(y=data['dwl'], x=data['dbp'])\n", 91 | "\n", 92 | " y_reg = [data['dbp'][i]*m+b for i in range(len(data['dbp']))]\n", 93 | "\n", 94 | " plt.plot(data['dbp'],y_reg, \n", 95 | " label='Regression: Y = {m:.4f}X + {b:.5}\\nr^2 = {r:.4f}\\n BE = {be:.2f} '.format(m=m,b=b,r=r,be=m))\n", 96 | " plt.legend()\n", 97 | " plt.xlabel('Sum of Barometric Pressure Changes (ft)')\n", 98 | " plt.ylabel('Sum of Water-Level Changes (ft)')" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## clarks" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "# clark's method\n", 117 | "def clarks(data,bp,wl):\n", 118 | " '''\n", 119 | " clarks method\n", 120 | " Input dataframe (data) with barometric pressure (bp) and water level (wl) data\n", 121 | " Returns slope, intercept, and r squared value'''\n", 122 | " data['dwl'] = data[wl].diff()\n", 123 | " data['dbp'] = data[bp].diff()\n", 124 | " \n", 125 | " data['beta'] = data['dbp']*data['dwl']\n", 126 | " data['Sbp'] = np.abs(data['dbp']).cumsum()\n", 127 | " data['Swl'] = data[['dwl','beta']].apply(lambda x: -1*np.abs(x[0]) if x[1]>0 else np.abs(x[0]), axis=1).cumsum()\n", 128 | " plt.figure()\n", 129 | " plt.plot(data['Sbp'],data['Swl'])\n", 130 | " regression = ols(y=data['Swl'], x=data['Sbp'])\n", 131 | " \n", 132 | " m = regression.beta.x\n", 133 | " b = regression.beta.intercept\n", 134 | " r = regression.r2\n", 135 | " \n", 136 | " y_reg = [data.ix[i,'Sbp']*m+b for i in range(len(data['Sbp']))]\n", 137 | "\n", 138 | " plt.plot(data['Sbp'],y_reg,\n", 139 | " label='Regression: Y = {m:.4f}X + {b:.5}\\nr^2 = {r:.4f}\\n BE = {be:.2f} '.format(m=m,b=b,r=r,be=m))\n", 140 | " plt.legend()\n", 141 | " plt.xlabel('Sum of Barometric Pressure Changes (ft)')\n", 142 | " plt.ylabel('Sum of Water-Level Changes (ft)')\n", 143 | " data.drop(['dwl','dbp','Sbp','Swl'], axis=1, inplace=True)\n", 144 | " return m,b,r" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "# Setting Up the Solinst Barologger and Levelogger" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "I always set my transducers to `future start` to make the tranducer start on the hour. I also allow the Levelogger to take an instantaneous measurement out of water, and zero the transducer out to accomodate for elevation." 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "# Import Relevant Files" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "First, we must import all of the relevant data. To properly import transducer data, we need:\n", 173 | "* Transducer (Levelogger) data\n", 174 | "* Barometric (Barologger) data\n", 175 | "* Manual Depth to Water Measurements\n", 176 | " \n", 177 | "If we want to calculate water-level elevation, we also need:\n", 178 | "* Well stickup length (ground to measure point distance)\n", 179 | "* Ground surface elevation at well\n", 180 | "
OR
\n", 181 | "* Elevation of measure point" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "barofile = new_xle_imp(rootname + \"baro_2015-07-16.xle\")\n", 193 | "barofile2 = pd.read_csv(rootname + \"UCC.csv\",parse_dates=True,index_col='Day',skiprows=14, na_values=['M','S'])\n", 194 | "wellfile = new_xle_imp(rootname +\"arnold_well_2015-07-16.xle\")\n", 195 | "wellfile2 = new_xle_imp(rootname +\"arnold_well_2015-04-01.xle\")\n", 196 | "manualfile = pd.read_excel(rootname +\"Manual_Readings.xlsx\",\"Arn_Well\",index_col=\"datetime\")" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "barofile2['ft_water_bp']= barofile2['Sea Level Pressure']*0.0335 - (31.17 - 4806/826 + 7.8) # convert hPa to ft water\n", 208 | "barofile2 = barofile2.interpolate(method='time') # fill NA spots" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "## Compile Files if Necessary" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": {}, 221 | "source": [ 222 | "Concatonate the well files so that they are one seamless file." 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": true 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "wellfile = pd.concat([wellfile,wellfile2])\n", 234 | "wellfile.sort_index(inplace=True)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "wellfile.columns" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "## Graph Raw Data" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "You should always graph raw data to see if there are any tares in the data from users moving the tranducer placement. Sometimes, the transducer is out of the water when it takes a measurement. These points should be removed or adjusted." 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": { 266 | "collapsed": false 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "#http://stackoverflow.com/questions/7733693/matplotlib-overlay-plots-with-different-scales\n", 271 | "x1 = wellfile.index.to_datetime() #converts pandas dataframe index into datetime format for graph\n", 272 | "x2 = barofile.index.to_datetime()\n", 273 | "x3 = manualfile.index.to_datetime()\n", 274 | "\n", 275 | "y1 = wellfile['Level']\n", 276 | "y2 = barofile['Level']\n", 277 | "y3 = manualfile['dtw_ft']\n", 278 | "\n", 279 | "data = [(x1,y1),(x2,y2),(x3,y3)]\n", 280 | "\n", 281 | "fig, ax = plt.subplots()\n", 282 | "\n", 283 | "# Twin the x-axis twice to make independent y-axes.\n", 284 | "axes = [ax, ax.twinx(), ax.twinx()]\n", 285 | "\n", 286 | "# Make some space on the right side for the extra y-axis.\n", 287 | "fig.subplots_adjust(right=0.75)\n", 288 | "\n", 289 | "# Move the last y-axis spine over to the right by 20% of the width of the axes\n", 290 | "axes[-1].spines['right'].set_position(('axes', 1.2))\n", 291 | "\n", 292 | "# To make the border of the right-most axis visible, we need to turn the frame\n", 293 | "# on. This hides the other plots, however, so we need to turn its fill off.\n", 294 | "axes[-1].set_frame_on(True)\n", 295 | "axes[-1].patch.set_visible(False)\n", 296 | "\n", 297 | "# And finally we get to plot things...\n", 298 | "colors = ['Green', 'Red', 'Blue']\n", 299 | "labels = ['Levelogger Pressure (ft)','Barologger Pressure (ft)','Manual Readings (ft to water)' ]\n", 300 | "marks = ['','','o']\n", 301 | "linetypes = ['solid','solid','none']\n", 302 | "\n", 303 | "for ax, color, datum, label, mark, linety in zip(axes, colors, data, labels, marks, linetypes):\n", 304 | " ax.plot(datum[0],datum[1], marker=mark, linestyle=linety, color=color, label=label)\n", 305 | " ax.set_ylabel(label, color=color)\n", 306 | " ax.tick_params(axis='y', colors=color)\n", 307 | " \n", 308 | "h1, l1 = axes[0].get_legend_handles_labels()\n", 309 | "h2, l2 = axes[1].get_legend_handles_labels()\n", 310 | "h3, l3 = axes[2].get_legend_handles_labels()\n", 311 | "axes[0].legend(h1+h2+h3, l1+l2+l3, loc=4)\n", 312 | "\n", 313 | "\n", 314 | "plt.show()" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "collapsed": false 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "print range(-10,10)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "# Fix Jumps" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "This tranducer has a jump in the middle of the data caused by adjustments during manual recordings, as well as a jump at the beginning due to the transducer being out of water at the time of measurement." 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": { 346 | "collapsed": false 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "wellfile = smoother(wellfile, 'Level', 30, 3)\n", 351 | "wellfile = smoother(wellfile, 'Conductivity', 30, 3)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": null, 357 | "metadata": { 358 | "collapsed": false 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "wellfile = jumpfix(wellfile,'Level',0.1)\n", 363 | "wellfile = jumpfix(wellfile,'Conductivity',0.005)\n", 364 | "wellfile['Level'].plot()" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": {}, 370 | "source": [ 371 | "# Remove Barometric Pressure" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": {}, 377 | "source": [ 378 | "Solinst transducers are nonvented, meaning that they measure absolute pressure. When they are submerged in a well, they are measuring the pressure of the water and the atmosphere. In most cases, we are only interested in the pressure that the water exerts, so we have to subtract the pressure that the atmosphere is exerting." 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "execution_count": null, 384 | "metadata": { 385 | "collapsed": false 386 | }, 387 | "outputs": [], 388 | "source": [ 389 | "wellbaro = baro_drift_correct(wellfile,barofile,manualfile)" 390 | ] 391 | }, 392 | { 393 | "cell_type": "code", 394 | "execution_count": null, 395 | "metadata": { 396 | "collapsed": false 397 | }, 398 | "outputs": [], 399 | "source": [ 400 | "wellbaro.columns" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": null, 406 | "metadata": { 407 | "collapsed": false 408 | }, 409 | "outputs": [], 410 | "source": [ 411 | "wellbaro['WaterElevation'].plot()\n", 412 | "plt.vlines('11/4/2014 11:16',wellbaro['WaterElevation'].min(),wellbaro['WaterElevation'].max(),color='green')" 413 | ] 414 | }, 415 | { 416 | "cell_type": "code", 417 | "execution_count": null, 418 | "metadata": { 419 | "collapsed": false 420 | }, 421 | "outputs": [], 422 | "source": [ 423 | "Scat(wellbaro,'abs_feet_above_barologger','WaterElevation')" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": { 430 | "collapsed": false 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "s, m, r = clarks(wellbaro,'abs_feet_above_barologger','WaterElevation')" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "collapsed": false 442 | }, 443 | "outputs": [], 444 | "source": [ 445 | "negcumls, cumls, ymod, resid, lag_time, dwl, dbp = baro_eff(wellbaro,'abs_feet_above_barologger','WaterElevation',100)\n", 446 | "plt.figure()\n", 447 | "lag_trim = lag_time[0:len(negcumls)]\n", 448 | "plt.scatter(lag_trim*24,negcumls, label='b.p. alone')\n", 449 | "plt.xlabel('lag (hours)')\n", 450 | "plt.ylabel('barometric response')\n", 451 | "\n", 452 | "ymin = wellbaro['WaterElevation'].min()\n", 453 | "\n", 454 | "fig, ax = plt.subplots()\n", 455 | "plt.plot(wellbaro.index[1:-1], resid)\n", 456 | "plt.text(x='11/3/2014 1:00',y=ymin+2,s='Injection Began',rotation=90,color='green',fontsize=12)\n", 457 | "y_formatter = tick.ScalarFormatter(useOffset=False)\n", 458 | "ax.yaxis.set_major_formatter(y_formatter)\n", 459 | "plt.vlines('11/4/2014 11:16',ymin+3,wellbaro['WaterElevation'].max(),color='green')\n", 460 | "\n", 461 | "print len(resid)\n", 462 | "print len(wellbaro.index[1:-1])" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": { 469 | "collapsed": false 470 | }, 471 | "outputs": [], 472 | "source": [ 473 | "wellbaro['corrwl'] = wellbaro['WaterElevation'] - wellbaro['abs_feet_above_barologger']*1\n", 474 | "manualfile['wlelev'] = 4800-manualfile['dtw_ft']\n", 475 | "\n", 476 | "x1 = wellbaro.index.to_datetime()[1:-1] #converts pandas dataframe index into datetime format for graph\n", 477 | "x2 = barofile.index.to_datetime()\n", 478 | "x3 = manualfile.index.to_datetime()\n", 479 | "\n", 480 | "y1 = resid\n", 481 | "y2 = barofile['Level']\n", 482 | "y3 = manualfile['wlelev']\n", 483 | "\n", 484 | "data = [(x1,y1),(x2,y2),(x3,y3)]\n", 485 | "\n", 486 | "fig, ax = plt.subplots()\n", 487 | "\n", 488 | "# Twin the x-axis twice to make independent y-axes.\n", 489 | "axes = [ax, ax.twinx(), ax.twinx()]\n", 490 | "\n", 491 | "# Make some space on the right side for the extra y-axis.\n", 492 | "fig.subplots_adjust(right=0.75)\n", 493 | "\n", 494 | "# Move the last y-axis spine over to the right by 20% of the width of the axes\n", 495 | "axes[-1].spines['right'].set_position(('axes', 1.2))\n", 496 | "\n", 497 | "# To make the border of the right-most axis visible, we need to turn the frame\n", 498 | "# on. This hides the other plots, however, so we need to turn its fill off.\n", 499 | "axes[-1].set_frame_on(True)\n", 500 | "axes[-1].patch.set_visible(False)\n", 501 | "\n", 502 | "# And finally we get to plot things...\n", 503 | "colors = ['Green', 'Red', 'Blue']\n", 504 | "labels = ['Levelogger Pressure (ft)','Barologger Pressure (ft)','Manual Readings (ft to water)' ]\n", 505 | "marks = ['','','o']\n", 506 | "linetypes = ['solid','solid','none']\n", 507 | "\n", 508 | "y_formatter = tick.ScalarFormatter(useOffset=False)\n", 509 | "\n", 510 | "for ax, color, datum, label, mark, linety in zip(axes, colors, data, labels, marks, linetypes):\n", 511 | " ax.plot(datum[0],datum[1], marker=mark, linestyle=linety, color=color, label=label)\n", 512 | " ax.set_ylabel(label, color=color)\n", 513 | " ax.tick_params(axis='y', colors=color)\n", 514 | " ax.yaxis.set_major_formatter(y_formatter)\n", 515 | "\n", 516 | "h1, l1 = axes[0].get_legend_handles_labels()\n", 517 | "h2, l2 = axes[1].get_legend_handles_labels()\n", 518 | "h3, l3 = axes[2].get_legend_handles_labels()\n", 519 | "axes[0].legend(h1+h2+h3, l1+l2+l3, loc=4)\n", 520 | "axes[2].set_ylim(4485,4493)\n", 521 | "\n", 522 | "\n", 523 | "plt.show()" 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": {}, 529 | "source": [ 530 | "## Match Measurement Interval of Barometer (Barologger) and Transducer " 531 | ] 532 | }, 533 | { 534 | "cell_type": "markdown", 535 | "metadata": {}, 536 | "source": [ 537 | "It is best to set Solinst transducers (Leveloggers) to start at the same time and to measure at the same frequency as your Barologger. Sometimes, this does not happen. To solve mismatches in sampling interval, we can resample the barometer data to same base (start time) and frequency as the transducer." 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "Using the `hourly_resample` function above, we can resample each transducer dataset." 545 | ] 546 | } 547 | ], 548 | "metadata": { 549 | "kernelspec": { 550 | "display_name": "Python 2", 551 | "language": "python", 552 | "name": "python2" 553 | }, 554 | "language_info": { 555 | "codemirror_mode": { 556 | "name": "ipython", 557 | "version": 2 558 | }, 559 | "file_extension": ".py", 560 | "mimetype": "text/x-python", 561 | "name": "python", 562 | "nbconvert_exporter": "python", 563 | "pygments_lexer": "ipython2", 564 | "version": "2.7.10" 565 | } 566 | }, 567 | "nbformat": 4, 568 | "nbformat_minor": 0 569 | } 570 | -------------------------------------------------------------------------------- /wellapplication/ros.py: -------------------------------------------------------------------------------- 1 | # directly copied from https://github.com/Geosyntec/wqio/blob/master/wqio/ros.py 2 | # Copyright (c) 2015, Geosyntec Consultants 3 | # All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # * Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # * Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # * Neither the name of wqio nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | import warnings 31 | 32 | import numpy 33 | from scipy import stats 34 | import pandas 35 | 36 | 37 | def _ros_sort(df, result, censorship): 38 | """ 39 | This function prepares a dataframe for ROS. It sorts ascending with 40 | left-censored observations on top. Censored results larger than the 41 | maximum uncensored results are removed from the dataframe. 42 | Parameters 43 | ---------- 44 | df : pandas.DataFrame 45 | result : str 46 | Name of the column in the dataframe that contains observed 47 | values. Censored values should be set to the detection (upper) 48 | limit. 49 | censorship : str 50 | Name of the column in the dataframe that indicates that a 51 | result is left-censored. (i.e., True -> censored, 52 | False -> uncensored) 53 | Returns 54 | ------ 55 | sorted_df : pandas.DataFrame 56 | The sorted dataframe with all columns dropped except the 57 | result and censorship columns. 58 | """ 59 | 60 | # separate uncensored data from censored data 61 | max_uncensored = df.loc[~df[censorship], result].max() 62 | if (df.loc[df[censorship], result] > max_uncensored).any(): 63 | msg = ( 64 | "Dropping censored results greater than " 65 | "the max uncensored result." 66 | ) 67 | warnings.warn(msg) 68 | 69 | df_sorted = ( 70 | df[[censorship, result]] 71 | .sort_values(by=[censorship, result], ascending=[False, True]) 72 | .where(lambda df: 73 | (~df[censorship]) | # uncensored values 74 | ((df[result] < max_uncensored) & df[censorship]) # censored values < max_uncen 75 | ) 76 | .dropna(how='all') 77 | .reset_index(drop=True) 78 | .assign(**{censorship: lambda df: df[censorship].astype(bool)}) 79 | ) 80 | return df_sorted[[result, censorship]] 81 | 82 | 83 | def cohn_numbers(df, result, censorship): 84 | """ 85 | Computes the Cohn numbers for the detection limits in the dataset. 86 | The Cohn Numbers are: 87 | - :math:`A_j =` the number of uncensored obs above the 88 | :math:`j^\mathrm{th}` threshold. 89 | - :math:`B_j =` the number of observations (cen & uncen) below 90 | the :math:`j^\mathrm{th}` threshold. 91 | - :math:`C_j =` the number of censored observations at the 92 | :math:`j^\mathrm{th}` threshold. 93 | - :math:`\mathrm{PE}_j =` the probability of exceeding the 94 | :math:`j^\mathrm{th}` threshold 95 | - :math:`\mathrm{DL}_j =` the unique, sorted detection limits 96 | - :math:`\mathrm{DL}_{j+1} = \mathrm{DL}_j` shifted down a 97 | single index (row) 98 | Parameters 99 | ---------- 100 | dataframe : pandas.DataFrame 101 | result : str 102 | Name of the column in the dataframe that contains observed 103 | values. Censored values should be set to the detection (upper) 104 | limit. 105 | censorship : str 106 | Name of the column in the dataframe that indicates that a 107 | result is left-censored. (i.e., True -> censored, 108 | False -> uncensored) 109 | Returns 110 | ------- 111 | cohn : pandas.DataFrame 112 | """ 113 | 114 | def nuncen_above(row): 115 | """ A, the number of uncensored obs above the given threshold. 116 | """ 117 | 118 | # index of results above the lower_dl DL 119 | above = df[result] >= row['lower_dl'] 120 | 121 | # index of results below the upper_dl DL 122 | below = df[result] < row['upper_dl'] 123 | 124 | # index of non-detect results 125 | detect = df[censorship].eq(False) 126 | 127 | # return the number of results where all conditions are True 128 | return df[above & below & detect].shape[0] 129 | 130 | def nobs_below(row): 131 | """ B, the number of observations (cen & uncen) below the given 132 | threshold 133 | """ 134 | 135 | # index of data less than the lower_dl DL 136 | less_than = df[result] < row['lower_dl'] 137 | 138 | # index of data less than or equal to the lower_dl DL 139 | less_thanequal = df[result] <= row['lower_dl'] 140 | 141 | # index of detects, non-detects 142 | uncensored = df[censorship].eq(False) 143 | censored = df[censorship].eq(True) 144 | 145 | # number results less than or equal to lower_dl DL and non-detect 146 | LTE_censored = df[less_thanequal & censored].shape[0] 147 | 148 | # number of results less than lower_dl DL and detected 149 | LT_uncensored = df[less_than & uncensored].shape[0] 150 | 151 | # return the sum 152 | return LTE_censored + LT_uncensored 153 | 154 | def ncen_equal(row): 155 | """ C, the number of censored observations at the given 156 | threshold. 157 | """ 158 | 159 | censored_index = df[censorship] 160 | censored_data = df[result][censored_index] 161 | censored_below = censored_data == row['lower_dl'] 162 | return censored_below.sum() 163 | 164 | def set_upper_limit(cohn): 165 | """ Sets the upper_dl DL for each row of the Cohn dataframe. """ 166 | if cohn.shape[0] > 1: 167 | return cohn['lower_dl'].shift(-1).fillna(value=numpy.inf) 168 | else: 169 | return [numpy.inf] 170 | 171 | def compute_PE(A, B): 172 | """ Computes the probability of excedance for each row of the 173 | Cohn dataframe. """ 174 | N = len(A) 175 | PE = numpy.empty(N, dtype='float64') 176 | PE[-1] = 0.0 177 | for j in range(N - 2, -1, -1): 178 | PE[j] = PE[j + 1] + (1 - PE[j + 1]) * A[j] / (A[j] + B[j]) 179 | 180 | return PE 181 | 182 | # unique, sorted detection limts 183 | censored_data = df[censorship] 184 | DLs = pandas.unique(df.loc[censored_data, result]) 185 | DLs.sort() 186 | 187 | # if there is a results smaller than the minimum detection limit, 188 | # add that value to the array 189 | if DLs.shape[0] > 0: 190 | if df[result].min() < DLs.min(): 191 | DLs = numpy.hstack([df[result].min(), DLs]) 192 | 193 | # create a dataframe 194 | cohn = ( 195 | pandas.DataFrame(DLs, columns=['lower_dl']) 196 | .assign(upper_dl=lambda df: set_upper_limit(df)) 197 | .assign(nuncen_above=lambda df: df.apply(nuncen_above, axis=1)) 198 | .assign(nobs_below=lambda df: df.apply(nobs_below, axis=1)) 199 | .assign(ncen_equal=lambda df: df.apply(ncen_equal, axis=1)) 200 | .reindex(range(DLs.shape[0] + 1)) 201 | .assign(prob_exceedance=lambda df: compute_PE(df['nuncen_above'], df['nobs_below'])) 202 | ) 203 | 204 | else: 205 | dl_cols = ['lower_dl', 'upper_dl', 'nuncen_above', 206 | 'nobs_below', 'ncen_equal', 'prob_exceedance'] 207 | cohn = pandas.DataFrame(numpy.empty((0, len(dl_cols))), columns=dl_cols) 208 | 209 | return cohn 210 | 211 | 212 | def _detection_limit_index(res, cohn): 213 | """ Helper function to create an array of indices for the detection 214 | limits (cohn) corresponding to each data point. 215 | Parameters 216 | ---------- 217 | res : float 218 | A single observed result from the larger dataset. 219 | cohn : pandas.DataFrame 220 | Dataframe of Cohn numbers. 221 | Returns 222 | ------- 223 | det_limit_index : int 224 | The index of the corresponding detection limit in `cohn` 225 | See also 226 | -------- 227 | cohn_numbers 228 | """ 229 | 230 | if cohn.shape[0] > 0: 231 | index, = numpy.where(cohn['lower_dl'] <= res) 232 | det_limit_index = index[-1] 233 | else: 234 | det_limit_index = 0 235 | 236 | return det_limit_index 237 | 238 | 239 | def _ros_group_rank(df, dl_idx, censorship): 240 | """ 241 | Ranks each result within the groups defined by the record's 242 | detection limit index and censorship. 243 | Parameters 244 | ---------- 245 | df : pandas.DataFrame 246 | dl_idx : str 247 | Name of the column in the dataframe the index of the result's 248 | corresponding detection limit in the `cohn` dataframe. 249 | censorship : str 250 | Name of the column in the dataframe that indicates that a 251 | result is left-censored. (i.e., True -> censored, 252 | False -> uncensored) 253 | Returns 254 | ------- 255 | ranks : numpy.array 256 | Array of ranks for the dataset. 257 | """ 258 | 259 | ranks = ( 260 | df.assign(rank=1) 261 | .groupby(by=[dl_idx, censorship])['rank'] 262 | .transform(lambda g: g.cumsum()) 263 | ) 264 | return ranks 265 | 266 | 267 | def _ros_plot_pos(row, censorship, cohn): 268 | """ 269 | Compute the ROS plotting position for a result based on its rank, 270 | censorship, detection limit index. 271 | Parameters 272 | ---------- 273 | row : pandas.Series or dict-like 274 | Full observation (row) from a censored dataset. Requires a 275 | 'rank', 'detection_limit', and `censorship` column. 276 | censorship : str 277 | Name of the column in the dataframe that indicates that a 278 | result is left-censored. (i.e., True -> censored, 279 | False -> uncensored) 280 | cohn : pandas.DataFrame 281 | Dataframe of Cohn numbers. 282 | Returns 283 | ------- 284 | plotting_position : float 285 | See also 286 | -------- 287 | cohn_numbers 288 | """ 289 | 290 | DL_index = row['det_limit_index'] 291 | rank = row['rank'] 292 | censored = row[censorship] 293 | 294 | dl_1 = cohn.iloc[DL_index] 295 | dl_2 = cohn.iloc[DL_index + 1] 296 | if censored: 297 | return (1 - dl_1['prob_exceedance']) * rank / (dl_1['ncen_equal'] + 1) 298 | else: 299 | return (1 - dl_1['prob_exceedance']) + (dl_1['prob_exceedance'] - dl_2['prob_exceedance']) * rank / (dl_1['nuncen_above'] + 1) 300 | 301 | 302 | def _norm_plot_pos(results): 303 | """ 304 | Computes standard normal (Gaussian) plotting positions using scipy. 305 | Parameters 306 | ---------- 307 | results : array-like 308 | Sequence of observed quantities. 309 | Returns 310 | ------- 311 | plotting_position : array of floats 312 | """ 313 | ppos, sorted_res = stats.probplot(results, fit=False) 314 | return stats.norm.cdf(ppos) 315 | 316 | 317 | def plotting_positions(df, censorship, cohn): 318 | """ 319 | Compute the ROS plotting positions for results based on their rank, 320 | censorship, detection limit index. 321 | Parameters 322 | ---------- 323 | df : pandas.DataFrame. 324 | censorship : str 325 | Name of the column in the dataframe that indicates that a 326 | result is left-censored. (i.e., True -> censored, 327 | False -> uncensored) 328 | cohn : pandas.DataFrame 329 | Dataframe of Cohn numbers. 330 | Returns 331 | ------- 332 | plotting_position : array of float 333 | See also 334 | -------- 335 | cohn_numbers 336 | """ 337 | 338 | plot_pos = df.apply(lambda r: _ros_plot_pos(r, censorship, cohn), axis=1) 339 | 340 | # correctly sort the plotting positions of the ND data: 341 | ND_plotpos = plot_pos[df[censorship]] 342 | ND_plotpos.values.sort() 343 | plot_pos.loc[df[censorship]] = ND_plotpos 344 | 345 | return plot_pos 346 | 347 | 348 | def _ros_estimate(df, result, censorship, transform_in, transform_out): 349 | """ Computed the estimated censored from the best-fit line of a 350 | probability plot of the uncensored values. 351 | Parameters 352 | ---------- 353 | df : pandas.DataFrame 354 | result : str 355 | Name of the column in the dataframe that contains observed 356 | values. Censored values should be set to the detection (upper) 357 | limit. 358 | censorship : str 359 | Name of the column in the dataframe that indicates that a 360 | result is left-censored. (i.e., True -> censored, 361 | False -> uncensored) 362 | transform_in, transform_out : callable 363 | Transformations to be applied to the data prior to fitting 364 | the line and after estimated values from that line. Typically, 365 | `numpy.log` and `numpy.exp` are used, respectively. 366 | Returns 367 | ------- 368 | estimated : pandas.DataFrame 369 | A new dataframe with two new columns: "estimated" and "final". 370 | The "estimated" column contains of the values inferred from the 371 | best-fit line. The "final" column contains the estimated values 372 | only where the original results were censored, and the original 373 | results everwhere else. 374 | """ 375 | 376 | # detect/non-detect selectors 377 | uncensored_mask = df[censorship].eq(False) 378 | censored_mask = df[censorship].eq(True) 379 | 380 | # fit a line to the logs of the detected data 381 | fit_params = stats.linregress( 382 | df['Zprelim'][uncensored_mask], 383 | transform_in(df[result][uncensored_mask]) 384 | ) 385 | 386 | # pull out the slope and intercept for use later 387 | slope, intercept = fit_params[:2] 388 | 389 | # model the data based on the best-fit curve 390 | df = ( 391 | df.assign(estimated=transform_out(slope * df['Zprelim'][censored_mask] + intercept)) 392 | .assign(final=lambda df: numpy.where(df[censorship], df['estimated'], df[result])) 393 | ) 394 | 395 | return df 396 | 397 | 398 | def _do_ros(df, result, censorship, transform_in, transform_out): 399 | """ 400 | Prepares a dataframe for, and then esimates the values of a censored 401 | dataset using Regression on Order Statistics 402 | Parameters 403 | ---------- 404 | df : pandas.DataFrame 405 | result : str 406 | Name of the column in the dataframe that contains observed 407 | values. Censored values should be set to the detection (upper) 408 | limit. 409 | censorship : str 410 | Name of the column in the dataframe that indicates that a 411 | result is left-censored. (i.e., True -> censored, 412 | False -> uncensored) 413 | transform_in, transform_out : callable 414 | Transformations to be applied to the data prior to fitting 415 | the line and after estimated values from that line. Typically, 416 | `numpy.log` and `numpy.exp` are used, respectively. 417 | Returns 418 | ------- 419 | estimated : pandas.DataFrame 420 | A new dataframe with two new columns: "estimated" and "final". 421 | The "estimated" column contains of the values inferred from the 422 | best-fit line. The "final" column contains the estimated values 423 | only where the original results were censored, and the original 424 | results everwhere else. 425 | """ 426 | 427 | # compute the Cohn numbers 428 | cohn = cohn_numbers(df, result=result, censorship=censorship) 429 | 430 | modeled = ( 431 | df.pipe(_ros_sort, result=result, censorship=censorship) 432 | .assign(det_limit_index=lambda df: df[result].apply(_detection_limit_index, args=(cohn,))) 433 | .assign(rank=lambda df: _ros_group_rank(df, 'det_limit_index', censorship)) 434 | .assign(plot_pos=lambda df: plotting_positions(df, censorship, cohn)) 435 | .assign(Zprelim=lambda df: stats.norm.ppf(df['plot_pos'])) 436 | .pipe(_ros_estimate, result, censorship, transform_in, transform_out) 437 | ) 438 | 439 | return modeled 440 | 441 | 442 | def ROS(result, censorship, df=None, min_uncensored=2, 443 | max_fraction_censored=0.8, substitution_fraction=0.5, 444 | transform_in=numpy.log, transform_out=numpy.exp, 445 | as_array=False): 446 | """ 447 | Impute censored dataset using Regression on Order Statistics (ROS) 448 | or simple substitution if insufficient uncensored data exists. 449 | Method described in *Nondetects and Data Analysis* by Dennis R. 450 | Helsel (John Wiley, 2005) to estimate the left-censored (non-detect) 451 | values of a dataset. 452 | Parameters 453 | ---------- 454 | result : str or array-like 455 | Label of the column or the float array of censored results 456 | censorship : str 457 | Label of the column or the bool array of the censorship 458 | status of the results. 459 | * True if censored, 460 | * False if uncensored 461 | df : pandas.DataFrame, optional 462 | If `result` and `censorship` are labels, this is the DataFrame 463 | that contains those columns. 464 | min_uncensored : int (default is 2) 465 | The minimum number of uncensored values required before ROS 466 | can be used to impute the censored results. When this criterion 467 | is not met, simple substituion is used instead. 468 | max_fraction_censored : float (default is 0.8) 469 | The maximum fraction of censored data below which ROS can be 470 | used to impute the censored results. When this fraction is 471 | exceeded, simple substituion is used instead. 472 | substitution_fraction : float (default is 0.5) 473 | The fraction of the detection limit to be used during simple 474 | substitution of the censored values. 475 | transform_in : callable (default is numpy.log) 476 | Transformation to be applied to the values prior to fitting a 477 | line to the plotting positions vs. uncensored values. 478 | transform_out : callable (default is numpy.exp) 479 | Transformation to be applied to the imputed censored values 480 | estimated from the previously computed best-fit line. 481 | as_array : bool (default is True) 482 | When True, a numpy array of the imputed results is returned. 483 | Otherwise, a modified copy of the original dataframe with all 484 | of the intermediate calculations is returned. 485 | Returns 486 | ------- 487 | imputed : numpy.array (default) or pandas.DataFrame 488 | The final results where the censored values have either been 489 | imputed through ROS or substituted as a fraction of the 490 | detection limit. 491 | """ 492 | 493 | # process arrays into a dataframe, if necessary 494 | if df is None: 495 | df = pandas.DataFrame({'res': result, 'cen': censorship}) 496 | result = 'res' 497 | censorship = 'cen' 498 | 499 | # basic counts/metrics of the dataset 500 | N_observations = df.shape[0] 501 | N_censored = df[censorship].astype(int).sum() 502 | N_uncensored = N_observations - N_censored 503 | fraction_censored = N_censored / N_observations 504 | print(N_censored,N_uncensored,fraction_censored) 505 | 506 | # add plotting positions if there are no censored values 507 | if N_censored == 0: 508 | output = df[[result, censorship]].assign(final=df[result]) 509 | 510 | # substitute w/ fraction of the DLs if there's insufficient 511 | # uncensored data 512 | elif (N_uncensored < min_uncensored) or (fraction_censored > max_fraction_censored): 513 | final = numpy.where(df[censorship], df[result] * substitution_fraction, df[result]) 514 | output = df.assign(final=final)[[result, censorship, 'final']] 515 | 516 | # normal ROS stuff 517 | else: 518 | output = _do_ros(df, result, censorship, transform_in, transform_out) 519 | 520 | # convert to an array if necessary 521 | if as_array: 522 | output = output['final'].values 523 | 524 | 525 | return output -------------------------------------------------------------------------------- /wellapplication/chem.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jan 05 09:50:51 2016 4 | 5 | @author: paulinkenbrandt 6 | """ 7 | from __future__ import absolute_import, division, print_function, unicode_literals 8 | import pandas as pd 9 | from datetime import datetime 10 | import numpy as np 11 | import requests 12 | 13 | class WQP(object): 14 | """Downloads Water Quality Data from thw Water Quality Portal based on parameters entered 15 | :param values: query parameter designating location to select site; this is the Argument for the REST parameter in 16 | table 1 of https://www.waterqualitydata.us/webservices_documentation/ 17 | :param loc_type: type of query to perform; valid inputs include 'huc', 'bBox', 'countycode', 'siteid'; 18 | this is the REST parameter of table 1 of https://www.waterqualitydata.us/webservices_documentation/ 19 | :type loc_type: str 20 | :type values: str 21 | :param **kwargs: additional Rest Parameters 22 | 23 | :Example: 24 | >>> wq = WQP('-111.54,40.28,-111.29,40.48','bBox') 25 | https://www.waterqualitydata.us/Result/search?mimeType=csv&zip=no&siteType=Spring&siteType=Well&characteristicType=Inorganics%2C+Major%2C+Metals&characteristicType=Inorganics%2C+Major%2C+Non-metals&characteristicType=Nutrient&characteristicType=Physical&bBox=-111.54%2C40.28%2C-111.29%2C40.48&sorted=no&sampleMedia=Water 26 | 27 | """ 28 | 29 | def __init__(self, values, loc_type, **kwargs): 30 | r"""Downloads Water Quality Data from thw Water Quality Portal based on parameters entered 31 | """ 32 | self.loc_type = loc_type 33 | self.values = values 34 | self.url = 'https://www.waterqualitydata.us/' 35 | self.geo_criteria = ['sites', 'stateCd', 'huc', 'countyCd', 'bBox'] 36 | self.cTgroups = ['Inorganics, Major, Metals', 'Inorganics, Major, Non-metals', 'Nutrient', 'Physical'] 37 | self.results = self.get_wqp_results('Result', **kwargs) 38 | self.stations = self.get_wqp_stations('Station', **kwargs) 39 | 40 | def get_response(self, service, **kwargs): 41 | """ Returns a dictionary of data requested by each function. 42 | :param service: options include 'Station' or 'Results' 43 | table 1 of https://www.waterqualitydata.us/webservices_documentation/ 44 | """ 45 | http_error = 'Could not connect to the API. This could be because you have no internet connection, a parameter' \ 46 | ' was input incorrectly, or the API is currently down. Please try again.' 47 | # For python 3.4 48 | # try: 49 | kwargs[self.loc_type] = self.values 50 | kwargs['mimeType'] = 'csv' 51 | kwargs['zip'] = 'no' 52 | kwargs['sorted'] = 'no' 53 | 54 | if 'siteType' not in kwargs: 55 | kwargs['sampleMedia'] = 'Water' 56 | 57 | if 'siteType' not in kwargs: 58 | kwargs['siteType'] = ['Spring', 'Well'] 59 | print('This function is biased towards groundwater. For all sites, use') 60 | 61 | if 'characteristicType' not in kwargs: 62 | kwargs['characteristicType'] = self.cTgroups 63 | 64 | total_url = self.url + service + '/search?' 65 | response_ob = requests.get(total_url, params=kwargs) 66 | 67 | return response_ob 68 | 69 | def get_wqp_stations(self, service, **kwargs): 70 | nwis_dict = self.get_response(service, **kwargs).url 71 | 72 | stations = pd.read_csv(nwis_dict) 73 | return stations 74 | 75 | def get_wqp_results(self, service, **kwargs): 76 | """Bring data from WQP site into a Pandas DataFrame for analysis""" 77 | 78 | # set data types 79 | Rdtypes = {"OrganizationIdentifier": np.str_, "OrganizationFormalName": np.str_, "ActivityIdentifier": np.str_, 80 | "ActivityStartTime/Time": np.str_, 81 | "ActivityTypeCode": np.str_, "ActivityMediaName": np.str_, "ActivityMediaSubdivisionName": np.str_, 82 | "ActivityStartDate": np.str_, "ActivityStartTime/TimeZoneCode": np.str_, 83 | "ActivityEndDate": np.str_, "ActivityEndTime/Time": np.str_, "ActivityEndTime/TimeZoneCode": np.str_, 84 | "ActivityDepthHeightMeasure/MeasureValue": np.float16, 85 | "ActivityDepthHeightMeasure/MeasureUnitCode": np.str_, 86 | "ActivityDepthAltitudeReferencePointText": np.str_, 87 | "ActivityTopDepthHeightMeasure/MeasureValue": np.float16, 88 | "ActivityTopDepthHeightMeasure/MeasureUnitCode": np.str_, 89 | "ActivityBottomDepthHeightMeasure/MeasureValue": np.float16, 90 | "ActivityBottomDepthHeightMeasure/MeasureUnitCode": np.str_, 91 | "ProjectIdentifier": np.str_, "ActivityConductingOrganizationText": np.str_, 92 | "MonitoringLocationIdentifier": np.str_, "ActivityCommentText": np.str_, 93 | "SampleAquifer": np.str_, "HydrologicCondition": np.str_, "HydrologicEvent": np.str_, 94 | "SampleCollectionMethod/MethodIdentifier": np.str_, 95 | "SampleCollectionMethod/MethodIdentifierContext": np.str_, 96 | "SampleCollectionMethod/MethodName": np.str_, "SampleCollectionEquipmentName": np.str_, 97 | "ResultDetectionConditionText": np.str_, "CharacteristicName": np.str_, 98 | "ResultSampleFractionText": np.str_, 99 | "ResultMeasureValue": np.str_, "ResultMeasure/MeasureUnitCode": np.str_, 100 | "MeasureQualifierCode": np.str_, 101 | "ResultStatusIdentifier": np.str_, "StatisticalBaseCode": np.str_, "ResultValueTypeName": np.str_, 102 | "ResultWeightBasisText": np.str_, "ResultTimeBasisText": np.str_, 103 | "ResultTemperatureBasisText": np.str_, 104 | "ResultParticleSizeBasisText": np.str_, "PrecisionValue": np.str_, "ResultCommentText": np.str_, 105 | "USGSPCode": np.str_, "ResultDepthHeightMeasure/MeasureValue": np.float16, 106 | "ResultDepthHeightMeasure/MeasureUnitCode": np.str_, 107 | "ResultDepthAltitudeReferencePointText": np.str_, 108 | "SubjectTaxonomicName": np.str_, "SampleTissueAnatomyName": np.str_, 109 | "ResultAnalyticalMethod/MethodIdentifier": np.str_, 110 | "ResultAnalyticalMethod/MethodIdentifierContext": np.str_, 111 | "ResultAnalyticalMethod/MethodName": np.str_, "MethodDescriptionText": np.str_, 112 | "LaboratoryName": np.str_, 113 | "AnalysisStartDate": np.str_, "ResultLaboratoryCommentText": np.str_, 114 | "DetectionQuantitationLimitTypeName": np.str_, 115 | "DetectionQuantitationLimitMeasure/MeasureValue": np.str_, 116 | "DetectionQuantitationLimitMeasure/MeasureUnitCode": np.str_, "PreparationStartDate": np.str_, 117 | "ProviderName": np.str_} 118 | 119 | # define date field indices 120 | dt = [6, 56, 61] 121 | csv = self.get_response(service, **kwargs).url 122 | print(csv) 123 | # read csv into DataFrame 124 | df = pd.read_csv(csv, dtype=Rdtypes, parse_dates=dt) 125 | return df 126 | 127 | def massage_results(self, df = ''): 128 | """Massage WQP result data for analysis 129 | 130 | When called, this function: 131 | - renames all of the results fields, abbreviating the fields and eliminating slashes and spaces. 132 | - parses the datetime fields, fixing errors when possible (see :func:`datetimefix`) 133 | - standardizes units to mg/L 134 | - normalizes nutrient species(See :func:`parnorm`) 135 | 136 | 137 | """ 138 | if df == '': 139 | df = self.results 140 | 141 | # Map new names for columns 142 | ResFieldDict = {"AnalysisStartDate": "AnalysisDate", "ResultAnalyticalMethod/MethodIdentifier": "AnalytMeth", 143 | "ResultAnalyticalMethod/MethodName": "AnalytMethId", 144 | "ResultDetectionConditionText": "DetectCond", 145 | "ResultLaboratoryCommentText": "LabComments", "LaboratoryName": "LabName", 146 | "DetectionQuantitationLimitTypeName": "LimitType", 147 | "DetectionQuantitationLimitMeasure/MeasureValue": "MDL", 148 | "DetectionQuantitationLimitMeasure/MeasureUnitCode": "MDLUnit", 149 | "MethodDescriptionText": "MethodDescript", 150 | "OrganizationIdentifier": "OrgId", "OrganizationFormalName": "OrgName", 151 | "CharacteristicName": "Param", 152 | "ProjectIdentifier": "ProjectId", "MeasureQualifierCode": "QualCode", 153 | "ResultCommentText": "ResultComment", 154 | "ResultStatusIdentifier": "ResultStatus", "ResultMeasureValue": "ResultValue", 155 | "ActivityCommentText": "SampComment", "ActivityDepthHeightMeasure/MeasureValue": "SampDepth", 156 | "ActivityDepthAltitudeReferencePointText": "SampDepthRef", 157 | "ActivityDepthHeightMeasure/MeasureUnitCode": "SampDepthU", 158 | "SampleCollectionEquipmentName": "SampEquip", 159 | "ResultSampleFractionText": "SampFrac", "ActivityStartDate": "SampleDate", 160 | "ActivityIdentifier": "SampleId", 161 | "ActivityStartTime/Time": "SampleTime", "ActivityMediaSubdivisionName": "SampMedia", 162 | "SampleCollectionMethod/MethodIdentifier": "SampMeth", 163 | "SampleCollectionMethod/MethodName": "SampMethName", 164 | "ActivityTypeCode": "SampType", "MonitoringLocationIdentifier": "StationId", 165 | "ResultMeasure/MeasureUnitCode": "Unit", "USGSPCode": "USGSPCode"} 166 | 167 | # Rename Data 168 | df = self.results 169 | df1 = df.rename(columns=ResFieldDict) 170 | 171 | # Remove unwanted and bad times 172 | df1["SampleDate"] = df1[["SampleDate", "SampleTime"]].apply(lambda x: self.datetimefix(x, "%Y-%m-%d %H:%M"), 1) 173 | 174 | # Define unneeded fields to drop 175 | resdroplist = ["ActivityBottomDepthHeightMeasure/MeasureUnitCode", 176 | "ActivityBottomDepthHeightMeasure/MeasureValue", 177 | "ActivityConductingOrganizationText", "ActivityEndDate", "ActivityEndTime/Time", 178 | "ActivityEndTime/TimeZoneCode", "ActivityMediaName", "ActivityStartTime/TimeZoneCode", 179 | "ActivityTopDepthHeightMeasure/MeasureUnitCode", "ActivityTopDepthHeightMeasure/MeasureValue", 180 | "HydrologicCondition", "HydrologicEvent", "PrecisionValue", "PreparationStartDate", 181 | "ProviderName", 182 | "ResultAnalyticalMethod/MethodIdentifierContext", "ResultDepthAltitudeReferencePointText", 183 | "ResultDepthHeightMeasure/MeasureUnitCode", "ResultDepthHeightMeasure/MeasureValue", 184 | "ResultParticleSizeBasisText", "ResultTemperatureBasisText", 185 | "ResultTimeBasisText", "ResultValueTypeName", "ResultWeightBasisText", "SampleAquifer", 186 | "SampleCollectionMethod/MethodIdentifierContext", "SampleTissueAnatomyName", 187 | "StatisticalBaseCode", 188 | "SubjectTaxonomicName", "SampleTime"] 189 | 190 | # Drop fields 191 | df1 = df1.drop(resdroplist, axis=1) 192 | 193 | # convert results and mdl to float 194 | df1['ResultValue'] = pd.to_numeric(df1['ResultValue'], errors='coerce') 195 | df1['MDL'] = pd.to_numeric(df1['MDL'], errors='coerce') 196 | 197 | # match old and new station ids 198 | df1['StationId'] = df1['StationId'].str.replace('_WQX-', '-') 199 | 200 | # standardize all ug/l data to mg/l 201 | df1.Unit = df1.Unit.apply(lambda x: str(x).rstrip(), 1) 202 | df1.ResultValue = df1[["ResultValue", "Unit"]].apply( 203 | lambda x: x[0] / 1000 if str(x[1]).lower() == "ug/l" else x[0], 1) 204 | df1.Unit = df1.Unit.apply(lambda x: self.unitfix(x), 1) 205 | 206 | df1['Param'], df1['ResultValue'], df1['Unit'] = zip( 207 | *df1[['Param', 'ResultValue', 'Unit']].apply(lambda x: self.parnorm(x), 1)) 208 | 209 | #self.results = df1 210 | 211 | return df1 212 | 213 | def datetimefix(self, x, form): 214 | """This script cleans date-time errors 215 | 216 | :param x: date-time string 217 | :param form: format of date-time string 218 | 219 | :returns: formatted datetime type 220 | """ 221 | d = str(x[0]).lstrip().rstrip()[0:10] 222 | t = str(x[1]).lstrip().rstrip()[0:5].zfill(5) 223 | try: 224 | int(d[0:2]) 225 | except(ValueError, TypeError, NameError): 226 | return np.nan 227 | try: 228 | int(t[0:2]) 229 | int(t[3:5]) 230 | except(ValueError, TypeError, NameError): 231 | t = "00:00" 232 | 233 | if int(t[0:2]) > 23: 234 | t = "00:00" 235 | elif int(t[3:5]) > 59: 236 | t = "00:00" 237 | else: 238 | t = t[0:2].zfill(2) + ":" + t[3:5] 239 | return datetime.strptime(d + " " + t, form) 240 | 241 | def parnorm(self, x): 242 | """Standardizes nutrient species 243 | 244 | - Nitrate as N to Nitrate 245 | - Nitrite as N to Nitrite 246 | - Sulfate as s to Sulfate 247 | """ 248 | p = str(x[0]).rstrip().lstrip().lower() 249 | u = str(x[2]).rstrip().lstrip().lower() 250 | if p == 'nitrate' and u == 'mg/l as n': 251 | return 'Nitrate', x[1] * 4.427, 'mg/l' 252 | elif p == 'nitrite' and u == 'mg/l as n': 253 | return 'Nitrite', x[1] * 3.285, 'mg/l' 254 | elif p == 'ammonia-nitrogen' or p == 'ammonia-nitrogen as n' or p == 'ammonia and ammonium': 255 | return 'Ammonium', x[1] * 1.288, 'mg/l' 256 | elif p == 'ammonium' and u == 'mg/l as n': 257 | return 'Ammonium', x[1] * 1.288, 'mg/l' 258 | elif p == 'sulfate as s': 259 | return 'Sulfate', x[1] * 2.996, 'mg/l' 260 | elif p in ('phosphate-phosphorus', 'phosphate-phosphorus as p', 'orthophosphate as p'): 261 | return 'Phosphate', x[1] * 3.066, 'mg/l' 262 | elif (p == 'phosphate' or p == 'orthophosphate') and u == 'mg/l as p': 263 | return 'Phosphate', x[1] * 3.066, 'mg/l' 264 | elif u == 'ug/l': 265 | return x[0], x[1] / 1000, 'mg/l' 266 | else: 267 | return x[0], x[1], str(x[2]).rstrip() 268 | 269 | def unitfix(self, x): 270 | """Standardizes unit labels from ug/l to mg/l 271 | 272 | :param x: unit label to convert 273 | :type x: str 274 | 275 | :returns: unit string as mg/l 276 | .. warning:: must be used with a value conversion tool 277 | """ 278 | z = str(x).lower() 279 | if z == "ug/l": 280 | return "mg/l" 281 | elif z == "mg/l": 282 | return "mg/l" 283 | else: 284 | return x 285 | 286 | def massage_stations(self): 287 | """Massage WQP station data for analysis 288 | """ 289 | StatFieldDict = {"MonitoringLocationIdentifier": "StationId", "AquiferName": "Aquifer", 290 | "AquiferTypeName": "AquiferType", 291 | "ConstructionDateText": "ConstDate", "CountyCode": "CountyCode", 292 | "WellDepthMeasure/MeasureValue": "Depth", 293 | "WellDepthMeasure/MeasureUnitCode": "DepthUnit", "VerticalMeasure/MeasureValue": "Elev", 294 | "VerticalAccuracyMeasure/MeasureValue": "ElevAcc", 295 | "VerticalAccuracyMeasure/MeasureUnitCode": "ElevAccUnit", 296 | "VerticalCollectionMethodName": "ElevMeth", 297 | "VerticalCoordinateReferenceSystemDatumName": "ElevRef", 298 | "VerticalMeasure/MeasureUnitCode": "ElevUnit", "FormationTypeText": "FmType", 299 | "WellHoleDepthMeasure/MeasureValue": "HoleDepth", 300 | "WellHoleDepthMeasure/MeasureUnitCode": "HoleDUnit", 301 | "HorizontalAccuracyMeasure/MeasureValue": "HorAcc", 302 | "HorizontalAccuracyMeasure/MeasureUnitCode": "HorAccUnit", 303 | "HorizontalCollectionMethodName": "HorCollMeth", 304 | "HorizontalCoordinateReferenceSystemDatumName": "HorRef", 305 | "HUCEightDigitCode": "HUC8", "LatitudeMeasure": "Lat_Y", "LongitudeMeasure": "Lon_X", 306 | "OrganizationIdentifier": "OrgId", "OrganizationFormalName": "OrgName", 307 | "StateCode": "StateCode", 308 | "MonitoringLocationDescriptionText": "StationComment", "MonitoringLocationName": "StationName", 309 | "MonitoringLocationTypeName": "StationType"} 310 | 311 | df = self.stations 312 | df.rename(columns=StatFieldDict, inplace=True) 313 | 314 | statdroplist = ["ContributingDrainageAreaMeasure/MeasureUnitCode", 315 | "ContributingDrainageAreaMeasure/MeasureValue", 316 | "DrainageAreaMeasure/MeasureUnitCode", "DrainageAreaMeasure/MeasureValue", "CountryCode", 317 | "ProviderName", 318 | "SourceMapScaleNumeric"] 319 | 320 | df.drop(statdroplist, inplace=True, axis=1) 321 | 322 | TypeDict = {"River/Stream": "Stream", "Stream: Canal": "Stream", 323 | "Well: Test hole not completed as a well": "Well"} 324 | 325 | # Make station types in the StationType field consistent for easier summary and compilation later on. 326 | df.StationType = df["StationType"].apply(lambda x: TypeDict.get(x, x), 1) 327 | df.Elev = df.Elev.apply(lambda x: np.nan if x == 0.0 else round(x, 1), 1) 328 | 329 | # Remove preceding WQX from StationId field to remove duplicate station data created by legacy database. 330 | df['StationId'] = df['StationId'].str.replace('_WQX-', '-') 331 | df.drop_duplicates(subset=['StationId'], inplace=True) 332 | #self.stations = df 333 | return df 334 | 335 | def piv_chem(self, results='', chems='piper'): 336 | """pivots results DataFrame for input into piper class 337 | 338 | :param results: DataFrame of results data from WQP; default is return from call of :class:`WQP` 339 | :param chems: set of chemistry that must be present to retain row; default are the major ions for a piper plot 340 | :return: pivoted table of result values 341 | 342 | .. warnings:: this method drops < and > signs from values; do not use it for statistics 343 | """ 344 | 345 | if results == '': 346 | results = self.results 347 | 348 | ParAbb = {"Alkalinity": "Alk", "Alkalinity, Carbonate as CaCO3": "Alk", "Alkalinity, total": "Alk", 349 | "Arsenic": "As", "Calcium": "Ca", "Chloride": "Cl", "Carbon dioxide": "CO2", "Carbonate": "CO3", 350 | "Carbonate (CO3)": "CO3", "Specific conductance": "Cond", "Conductivity": "Cond", "Copper": "Cu", 351 | "Depth": "Depth", "Dissolved oxygen (DO)": "DO", "Iron": "Fe", 352 | "Hardness, Ca, Mg": "Hard", "Total hardness -- SDWA NPDWR": "Hard", 353 | "Bicarbonate": "HCO3", "Potassium": "K", "Magnesium": "Mg", "Kjeldahl nitrogen": "N", 354 | "Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)": "N", "Nitrogen": "N", "Sodium": "Na", 355 | "Sodium plus potassium": "NaK", "Ammonia-nitrogen": "NH3_N", "Ammonia-nitrogen as N": "N", 356 | "Nitrite": "NO2", 357 | "Nitrate": "NO3", "Nitrate as N": "N", "pH, lab": "pH", "pH": "pH", "Phosphate-phosphorus": "PO4", 358 | "Orthophosphate": "PO4", "Phosphate": "PO4", "Stream flow, instantaneous": "Q", "Flow": "Q", 359 | "Flow rate, instantaneous": "Q", "Silica": "Si", "Sulfate": "SO4", "Sulfate as SO4": "SO4", 360 | "Boron": "B", "Barium": "Ba", "Bromine": "Br", "Lithium": "Li", "Manganese": "Mn", "Strontium": "Sr", 361 | "Total dissolved solids": "TDS", "Temperature, water": "Temp", 362 | "Total Organic Carbon": "TOC", "delta Dueterium": "d2H", "delta Oxygen 18": "d18O", 363 | "delta Carbon 13 from Bicarbonate": "d13CHCO3", "delta Oxygen 18 from Bicarbonate": "d18OHCO3", 364 | "Total suspended solids": "TSS", "Turbidity": "Turb"} 365 | 366 | results['ParAbb'] = results['Param'].apply(lambda x: ParAbb.get(x, ''), 1) 367 | results.dropna(subset=['SampleId'], how='any', inplace=True) 368 | results = results[pd.isnull(results['DetectCond'])] 369 | results.drop_duplicates(subset=['SampleId', 'ParAbb'], inplace=True) 370 | datap = results.pivot(index='SampleId', columns='ParAbb', values='ResultValue') 371 | if chems == '': 372 | pass 373 | elif chems == 'piper': 374 | datap.dropna(subset=['SO4', 'Cl', 'Ca', 'HCO3', 'pH'], how='any', inplace=True) 375 | else: 376 | datap.dropna(subset=chems, how='any', inplace=True) 377 | return datap 378 | -------------------------------------------------------------------------------- /wellapplication/usgs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Jan 3 00:30:36 2016 4 | 5 | @author: p 6 | """ 7 | from __future__ import absolute_import, division, print_function, unicode_literals 8 | import pandas as pd 9 | from datetime import datetime 10 | from pylab import rcParams 11 | 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | import requests 15 | 16 | 17 | class nwisError(Exception): 18 | def __init__(self, error_message): 19 | self.error_message = error_message 20 | 21 | def __str__(self): 22 | r""" This just returns one of the error messages listed in the checkresponse() function""" 23 | return repr(self.error_message) 24 | 25 | 26 | class nwis(object): 27 | """Class to quickly download NWIS data using NWIS_ services 28 | .. _NWIS: https://waterservices.usgs.gov/ 29 | 30 | :param service: name of web service to use; options are daily values ('dv'), instantaneous values ('iv'), 31 | site ('site'), and groundwater levels ('gwlevels') 32 | :param values: values for REST query; valid site is '01646500'; valid huc is '02070010'; valid bBox is 33 | '-83.000000,36.500000,-81.000000,38.500000' 34 | :param loc_type: filter type; valid values are 'huc', 'bBox', 'sites', and 'countyCd'; 35 | see https://waterservices.usgs.gov/rest/IV-Service.html#MajorFilters for details 36 | :param **kwargs: other query parameters; optional 37 | 38 | """ 39 | def __init__(self, service, values, loc_type, **kwargs): 40 | r""" Instantiates an instance of nwis""" 41 | self.service = service 42 | self.loc_type = loc_type 43 | self.values = self.parsesitelist(values) 44 | self.header = {'Accept-encoding': 'gzip'} 45 | self.url = 'https://waterservices.usgs.gov/nwis/' 46 | self.geo_criteria = ['sites', 'stateCd', 'huc', 'countyCd', 'bBox'] 47 | self.out_format = 'json' 48 | self.start_date = '1800-01-01' 49 | self.input = kwargs 50 | self.end_date = str(datetime.today().year) + '-' + str(datetime.today().month).zfill(2) + '-' + str( 51 | datetime.today().day).zfill(2) 52 | self.sites, self.data = self.get_nwis(**kwargs) 53 | 54 | @staticmethod 55 | def _checkresponse(response): 56 | r""" Returns the data requested by the other methods assuming the response from the API is ok. If not, provides 57 | error handling for all possible API errors. HTTP errors are handled in the get_response() function. 58 | 59 | :param response: The response from the API as a dictionary if the API code is 200. 60 | 61 | :returns: The response from the API as a dictionary if the API code is 200. 62 | 63 | .. raises:: nwisError; Gives different response messages depending on returned code from API. 64 | .. notes:: https://waterservices.usgs.gov/docs/portable_code.html 65 | """ 66 | 67 | if response.status_code == 200: 68 | print('connection successful') 69 | return response 70 | elif response.status_code == 403: 71 | raise nwisError('The USGS has blocked your Internet Protocol (IP) address') 72 | elif response.status_code == 400: 73 | raise nwisError('URL arguments are inconsistent') 74 | elif response.status_code == 404: 75 | raise nwisError('The query expresses a combination of elements where data do not exist.') 76 | elif response.status_code == 500: 77 | raise nwisError('There is a problem with the web service') 78 | elif response.status_code == 503: 79 | raise nwisError('This application is down at the moment') 80 | else: 81 | raise nwisError('Something went wrong.') 82 | 83 | def get_response(self, **kwargs): 84 | """ Returns a dictionary of data requested by each function. 85 | 86 | :returns: response - A dictionary that has been dumped from JSON. '01585200' 87 | 88 | .. raises:: nwisError - Overrides the exceptions given in the requests library to give more custom error messages. 89 | Connection_error occurs if no internet connection exists. Timeout_error occurs if the request takes too 90 | long and redirect_error is shown if the url is formatted incorrectly. 91 | """ 92 | http_error = 'Could not connect to the API. This could be because you have no internet connection, a parameter' \ 93 | ' was input incorrectly, or the API is currently down. Please try again.' 94 | 95 | kwargs[self.loc_type] = self.values 96 | kwargs['format'] = self.out_format 97 | 98 | if 'startDT' not in kwargs: 99 | kwargs['startDT'] = self.start_date 100 | if 'endDT' not in kwargs: 101 | kwargs['endDT'] = self.end_date 102 | 103 | total_url = self.url + self.service + '/?' 104 | response_ob = requests.get(total_url, params=kwargs, headers=self.header) 105 | if self.service != 'site': 106 | try: 107 | response_ob.json() 108 | except: 109 | raise nwisError("Could not decode response from {:} ".format(response_ob.url)) 110 | 111 | return self._checkresponse(response_ob) 112 | 113 | def get_nwis(self, **kwargs): 114 | jsn_dict = self.get_response(**kwargs) 115 | nwis_dict = jsn_dict.json() 116 | # dictionary from json object; each value in this dictionary is a station timeseries 117 | dt = nwis_dict['value']['timeSeries'] 118 | 119 | station_id, lat, lon, srs, station_type, station_nm = [], [], [], [], [], [] 120 | f = {} 121 | for i in range(len(dt)): 122 | station_id.append(dt[i]['sourceInfo']['siteCode'][0]['value']) 123 | lat.append(dt[i]['sourceInfo']['geoLocation'][u'geogLocation']['latitude']) 124 | lon.append(dt[i]['sourceInfo']['geoLocation'][u'geogLocation']['longitude']) 125 | srs.append(dt[i]['sourceInfo']['geoLocation'][u'geogLocation']['srs']) 126 | station_type.append(dt[i]['sourceInfo']['siteProperty'][0]['value']) 127 | station_nm.append(dt[i]['sourceInfo'][u'siteName']) 128 | 129 | df = pd.DataFrame(dt[i]['values'][0]['value']) 130 | if 'dateTime' in df.columns and 'Gage height, feet' not in dt[i]['variable']['variableDescription']: 131 | df.index = pd.to_datetime(df.pop('dateTime')) 132 | df.value = df.value.astype(float) 133 | df.value = df.value.where(df.value > -999, np.nan) 134 | df.index.name = 'datetime' 135 | 136 | f[dt[i]['sourceInfo']['siteCode'][0]['value']] = df 137 | else: 138 | print(dt[i]['variable']['variableDescription'] + " skipped!") 139 | pass 140 | 141 | stat_dict = {'site_no': station_id, 'dec_lat_va': lat, 'dec_long_va': lon, 'dec_coord_datum_cd': srs, 142 | 'station_nm': station_nm, 'data_type_cd': station_type} 143 | stations = pd.DataFrame(stat_dict) 144 | if len(dt) > 1 and len(f) >= 1: 145 | data = pd.concat(f) 146 | data.index.set_names('site_no', level=0, inplace=True) 147 | elif len(dt) == 1 and len(f) >= 1: 148 | data = f[dt[0]['sourceInfo']['siteCode'][0]['value']] 149 | data['site_no'] = dt[0]['sourceInfo']['siteCode'][0]['value'] 150 | else: 151 | data = None 152 | print('No Data!') 153 | return stations, data 154 | 155 | def parsesitelist(self, values): 156 | """Takes a list and turns it into a string format that can be used in the html REST format 157 | 158 | :param values: 159 | :param type: list 160 | :returns: sitno (str); string with commas separating values 161 | 162 | :Example: 163 | >>>parsesitelist([123,576,241]) 164 | '123,576,241' 165 | """ 166 | siteno = str(values).replace(" ", "") 167 | siteno = siteno.replace("]", "") 168 | siteno = siteno.replace("[", "") 169 | siteno = siteno.replace("','", ",") 170 | siteno = siteno.replace("'", "") 171 | siteno = siteno.replace('"', "") 172 | return siteno 173 | 174 | def get_info(self, **kwargs): 175 | """Downloads data from usgs service as text file; converted to Pandas DataFrame. 176 | 177 | :param kwargs: response of request 178 | :type kwargs: str 179 | 180 | .. returns:: df; Pandas DataFrame containing data downloaded from USGS 181 | """ 182 | self.service = 'site' 183 | self.out_format = 'rdb' 184 | kwargs['siteOutput'] = 'expanded' 185 | resp = self.get_response(**kwargs) 186 | print(resp.url) 187 | linefile = resp.iter_lines() 188 | numlist = [] 189 | num = 0 190 | for line in linefile: 191 | if line.startswith(b"#"): 192 | numlist.append(num) 193 | num += 1 194 | numlist.append(numlist[-1] + 2) 195 | df = pd.read_table(resp.url, sep="\t", skiprows=numlist) 196 | return df 197 | 198 | @staticmethod 199 | def get_first_string(lst): 200 | """Function to get the first string from each list""" 201 | return lst[0] if isinstance(lst, list) and lst and all(isinstance(item, str) for item in lst) else None 202 | 203 | def cleanGWL(self, df, colm='qualifiers',inplace=False): 204 | """Drops water level data of suspect quality based on lev_status_cd 205 | 206 | :param df: (pandas dataframe) groundwater dataframe 207 | :param colm: column to parse; defaults to 'qualifiers' 208 | 209 | :type colm: str 210 | :returns: sitno (str) - subset of input dataframe as new dataframe 211 | """ 212 | if inplace: 213 | data = df 214 | else: 215 | data = df.copy(deep=True) 216 | data[colm] = data[colm].apply(get_first_string) 217 | CleanData = data[~data[colm].isin(['Z', 'R', 'V', 'P', 'O', 'F', 'W', 'G', 'S', 'C', 'E', 'N'])] 218 | return CleanData 219 | 220 | def my_agg(self, x): 221 | 222 | names = { 223 | 'mean': x[self.avgtype].mean(numeric_only=True), 224 | 'std': x[self.avgtype].std(numeric_only=True), 225 | 'min': x[self.avgtype].min(numeric_only=True), 226 | 'max': x[self.avgtype].max(numeric_only=True), 227 | 'median': x[self.avgtype].median(numeric_only=True), 228 | 'cnt': (np.count_nonzero(~np.isnan(x[self.avgtype]))), 229 | 'err_pls': (np.mean(x[self.avgtype]) + (np.std(x[self.avgtype]) * 1.96)), 230 | 'err_min': (np.mean(x[self.avgtype]) - (np.std(x[self.avgtype]) * 1.96)) 231 | #'5 percent': np.percentile(x[self.avgtype], 5), 232 | #'95 percent': np.percentile(x[self.avgtype], 95) 233 | } 234 | 235 | return pd.Series(names, index=list(names.keys())) 236 | 237 | def avg_wl(self, numObs=50, avgtype='stdWL', grptype='bytime', grper='12ME'): 238 | """Calculates standardized statistics for a list of stations or a huc from the USGS 239 | avgDiffWL = average difference from mean WL for each station 240 | 241 | 242 | 243 | :param numObs: minimum observations per site required to include site in analysis; default is 50 244 | :param avgtype: averaging technique for site data; options are 'avgDiffWL','stdWL','cdm','avgDiff_dWL', and 'std_dWWL'; default is 'stWL' 245 | :param grptype: way to group the averaged data; options are 'bytime' or 'monthly' or user input; default 'bytime' 246 | :param grper: only used if 'bytime' called; defaults to '12M'; other times can be put in 247 | :return: 248 | """ 249 | self.avgtype = avgtype 250 | data = self.cleanGWL(self.data) 251 | # stationWL = pd.merge(siteinfo, data, on = 'site_no') 252 | data.reset_index(inplace=True) 253 | data.set_index(['datetime'], inplace=True) 254 | # get averages by year, month, and site number 255 | site_size = data.groupby('site_no').size() 256 | wl_long = data[data['site_no'].isin(list(site_size[site_size >= numObs].index.values))] 257 | # eliminate any duplicate site numbers 258 | siteList = list(wl_long.site_no.unique()) 259 | for site in siteList: 260 | mean = wl_long.loc[wl_long.site_no == site, 'value'].mean() 261 | std = wl_long.loc[wl_long.site_no == site, 'value'].std() 262 | meandiff = wl_long.loc[wl_long.site_no == site, 'value'].diff().mean() 263 | stddiff = wl_long.loc[wl_long.site_no == site, 'value'].diff().std() 264 | wl_long.loc[wl_long.site_no == site, 'diff'] = wl_long.loc[wl_long.site_no == site, 'value'].diff() 265 | wl_long.loc[wl_long.site_no == site, 'avgDiffWL'] = wl_long.loc[wl_long.site_no == site, 'value'] - mean 266 | wl_long.loc[wl_long.site_no == site, 'stdWL'] = wl_long.loc[wl_long.site_no == site, 'avgDiffWL'] / std 267 | wl_long.loc[wl_long.site_no == site, 'cdm'] = wl_long.loc[wl_long.site_no == site, 'avgDiffWL'].cumsum() 268 | wl_long.loc[wl_long.site_no == site, 'avgDiff_dWL'] = wl_long.loc[ 269 | wl_long.site_no == site, 'diff'] - meandiff 270 | wl_long.loc[wl_long.site_no == site, 'std_dWL'] = wl_long.loc[ 271 | wl_long.site_no == site, 'avgDiff_dWL'] / stddiff 272 | 273 | if grptype == 'bytime': 274 | grp = pd.Grouper(freq=grper) 275 | elif grptype == 'monthly': 276 | grp = wl_long.index.month 277 | else: 278 | grp = grptype 279 | 280 | # this statement reduces bias from one station 281 | wllong = wl_long.groupby(['site_no',grp]).mean(numeric_only=True) 282 | wllong.index = wllong.index.droplevel(level=0) 283 | # this statement gets the statistics 284 | wl_stats = wllong.groupby([grp]).apply(self.my_agg) 285 | 286 | self.wl_stats = wl_stats 287 | 288 | return wl_stats 289 | 290 | def pltavgwl(self, maxdate = [0,0,0], mindate=[1950,1,1],): 291 | 292 | if maxdate[0] == 0: 293 | maxdate = [datetime.today().year,1,1] 294 | 295 | grpd = self.wl_stats 296 | x2 = grpd.index 297 | y3 = grpd['mean'] 298 | y2 = grpd['median'] 299 | 300 | fig = plt.figure() 301 | ax = fig.add_subplot(111) 302 | 303 | rcParams['figure.figsize'] = 15, 10 304 | rcParams['legend.numpoints'] = 1 305 | plt.plot(x2, y3, '+-', color='green', label='Median') 306 | ax.plot(x2, y2, '+-', color='red', label='Average') 307 | ax.fill_between(x2, grpd['err_min'], grpd['err_pls'], alpha=0.2, label='2 Standard Deviations', linewidth=0) 308 | 309 | ax.set_ylabel(self.avgtype, color='red') 310 | ax.invert_yaxis() 311 | ax.grid() 312 | ax2 = ax.twinx() 313 | ax2.plot(x2, grpd['cnt'], label='Number of Wells Observed') 314 | ax2.set_ylim(0, int(grpd['cnt'].max()) * 3) 315 | ax2.set_yticks(range(0, int(grpd['cnt'].max()), int(grpd['cnt'].max() / 10))) 316 | ax2.set_ylabel('Number of Wells Observed', color='blue') 317 | ax2.yaxis.set_label_coords(1.03, 0.2) 318 | ax.set_xlim(datetime(*mindate), datetime(*maxdate)) 319 | date_range = pd.date_range('{:}-{:}-{:}'.format(*mindate), '{:}-{:}-{:}'.format(*maxdate), freq='36ME') 320 | date_range = date_range.map(lambda t: t.strftime('%Y-%m-%d')) 321 | ax.set_xticks(date_range) 322 | ax.set_xlabel('date') 323 | # ask matplotlib for the plotted objects and their labels 324 | lines, labels = ax.get_legend_handles_labels() 325 | lines2, labels2 = ax2.get_legend_handles_labels() 326 | ax2.legend(lines + lines2, labels + labels2, loc=0) 327 | 328 | return fig,ax,ax2 329 | 330 | def xcheck(self, x): 331 | """Converts empty list to empty string and filled list into string of first value""" 332 | if type(x) == list: 333 | if len(x) == 0: 334 | return '' 335 | else: 336 | return str(x[0]) 337 | else: 338 | return x 339 | 340 | def nwis_heat_map(self): 341 | from scipy.interpolate import griddata 342 | import matplotlib.cm as cm 343 | import matplotlib as mpl 344 | 345 | meth = 'linear' # 'nearest' 346 | 347 | data = self.data 348 | 349 | if isinstance(data.index, pd.core.index.MultiIndex): 350 | data.index = data.index.droplevel(0) 351 | 352 | x = data.index.dayofyear 353 | y = data.index.year 354 | z = data.value.values 355 | 356 | xi = np.linspace(x.min(), x.max(), 1000) 357 | yi = np.linspace(y.min(), y.max(), 1000) 358 | zi = griddata((x, y), z, (xi[None, :], yi[:, None]), method=meth) 359 | 360 | cmap = plt.cm.get_cmap('RdYlBu') 361 | norm = mpl.colors.Normalize(vmin=z.min(), vmax=z.max()) 362 | #norm = mpl.colors.LogNorm(vmin=0.1, vmax=100000) 363 | m = cm.ScalarMappable(norm=norm, cmap=cmap) 364 | m.set_array(z) 365 | 366 | br = plt.contourf(xi, yi, zi, color=m.to_rgba(z), cmap=cmap) 367 | # setup the colorbar 368 | 369 | 370 | cbar = plt.colorbar(m) 371 | cbar.set_label('Discharge (cfs)') 372 | 373 | plt.xlabel('Month') 374 | plt.ylabel('Year') 375 | plt.yticks(range(y.min(), y.max())) 376 | 377 | mons = {'Apr': 90.25, 'Aug': 212.25, 'Dec': 334.25, 'Feb': 31, 'Jan': 1, 'Jul': 181.25, 'Jun': 151.25, 378 | 'Mar': 59.25, 'May': 120.25, 379 | 'Nov': 304.25, 'Oct': 273.25, 'Sep': 243.25} 380 | monnms = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 381 | 382 | plt.title(self.sites.station_nm[0].title()) 383 | tickplc = [] 384 | plt.xticks([mons[i] for i in monnms], monnms) 385 | plt.grid() 386 | 387 | def get_elev(x, units='Meters'): 388 | """Uses USGS elevation service to retrieve elevation 389 | :param x: longitude and latitude of point where elevation is desired 390 | :type x: list 391 | :param units: units for returned value; defaults to Meters; options are 'Meters' or 'Feet' 392 | :type units: str 393 | 394 | :returns: ned float elevation of location in meters 395 | 396 | :Example: 397 | >>> get_elev([-111.21,41.4]) 398 | 1951.99 399 | """ 400 | 401 | values = { 402 | 'x': x[0], 403 | 'y': x[1], 404 | 'units': units, 405 | 'output': 'json' 406 | } 407 | 408 | elev_url = 'https://nationalmap.gov/epqs/pqs.php?' 409 | 410 | attempts = 0 411 | while attempts < 4: 412 | try: 413 | response = requests.get(elev_url, params=values).json() 414 | g = float(response['USGS_Elevation_Point_Query_Service']['Elevation_Query']['Elevation']) 415 | break 416 | except: 417 | print("Connection attempt {:} of 3 failed.".format(attempts)) 418 | attempts += 1 419 | g = 0 420 | return g 421 | 422 | def get_huc(x): 423 | """Receive the content of ``url``, parse it as JSON and return the object. 424 | 425 | :param x: [longitude, latitude] 426 | 427 | :returns: HUC12, HUC12_Name - 12 digit hydrologic unit code of location and the name associated with that code 428 | """ 429 | values = { 430 | 'geometry': '{:},{:}'.format(x[0], x[1]), 431 | 'geometryType': 'esriGeometryPoint', 432 | 'inSR': '4326', 433 | 'spatialRel': 'esriSpatialRelIntersects', 434 | 'returnGeometry': 'false', 435 | 'outFields': 'huc12,name', 436 | 'returnDistinctValues': 'true', 437 | 'f': 'pjson'} 438 | 439 | huc_url = 'https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer/6/query?' 440 | # huc_url = 'https://services.nationalmap.gov/arcgis/rest/services/USGSHydroNHDLarge/MapServer/10/query?' 441 | # huc_url2 = 'https://services.nationalmap.gov/arcgis/rest/services/nhd/mapserver/8/query?' 442 | response = requests.get(huc_url, params=values).json() 443 | return response['features'][0]['attributes']['huc12'], response['features'][0]['attributes']['name'] 444 | 445 | def get_fips(x): 446 | """Receive the content of ``url``, parse it as JSON and return the object. 447 | :param x: [longitude, latitude] 448 | :returns: tuple containing five digit county fips and county name 449 | """ 450 | values = { 451 | 'latitude': '{:}'.format(x[1]), 452 | 'longitude': '{:}'.format(x[0]), 453 | 'showall': 'true', 454 | 'format': 'json'} 455 | 456 | huc_url = "http://data.fcc.gov/api/block/find?" 457 | response = requests.get(huc_url, params=values).json() 458 | return response['County']['FIPS'], response['County']['name'] 459 | 460 | def USGSID(x): 461 | """Parses decimal latitude and longitude values into DDMMSSDDDMMSS01 USGS site id. 462 | See https://help.waterdata.usgs.gov/faq/sites/do-station-numbers-have-any-particular-meaning for documentation. 463 | 464 | :param x: [longitude,latitude] 465 | :type x: str 466 | :returns: USGS-style site id (groundwater) DDMMSSDDDMMSS01 467 | """ 468 | return dms(x[1]) + dms(x[0]) + '01' 469 | 470 | def dms(dec): 471 | """converts decimal degree coordinates to a usgs station id 472 | :param dec: latitude or longitude value in decimal degrees 473 | :return: usgs id value 474 | 475 | .. note:: https://help.waterdata.usgs.gov/faq/sites/do-station-numbers-have-any-particular-meaning 476 | """ 477 | DD = str(int(abs(dec))) 478 | MM = str(int((abs(dec) - int(DD)) * 60)).zfill(2) 479 | SS = str(int(round((((abs(dec) - int(DD)) * 60) - int(MM)) * 60, 0))).zfill(2) 480 | if SS == '60': 481 | MM = str(int(MM) + 1) 482 | SS = '00' 483 | if MM == '60': 484 | DD = str(int(DD) + 1) 485 | MM = '00' 486 | return DD + MM + SS 487 | 488 | 489 | def get_recess(df, Q, freq='1D', inplace=False): 490 | """ Select the data when values are decreasing compared to previous time step 491 | 492 | :param df: DataFrame of hydro data 493 | :param Q: DataFrame field with discharge or water level data 494 | :param freq: Frequency of measurement of data; default is 1D 495 | :param inplace: If True, replace input DataFrame; default is false 496 | :return: DataFrame of all of the decreasing segments of the input DataFrame 497 | 498 | .. note:: from https://github.com/stijnvanhoey/hydropy 499 | """ 500 | recess = df[Q].diff() < 0.0 501 | if inplace: 502 | df = df 503 | else: 504 | df = df[recess].copy() 505 | df = df.resample(freq).mean() 506 | return df 507 | 508 | 509 | def RB_Flashiness(series): 510 | """Richards-Baker Flashiness Index for a series of daily mean discharges. 511 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py 512 | """ 513 | Qsum = np.sum(series) # sum of daily mean discharges 514 | Qpath = 0.0 515 | for i in range(len(series)): 516 | if i == 0: 517 | Qpath = series[i] # first entry only 518 | else: 519 | Qpath += np.abs(series[i] - series[i-1]) # sum the absolute differences of the mean discharges 520 | return Qpath/Qsum 521 | 522 | 523 | def flow_duration(series): 524 | """Creates the flow duration curve for a discharge dataset. Returns a pandas 525 | series whose index is the discharge values and series is exceedance probability. 526 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py 527 | """ 528 | fd = pd.Series(series).value_counts() # frequency of unique values 529 | fd.sort_index(inplace=True) # sort in order of increasing discharges 530 | fd = fd.cumsum() # cumulative sum of frequencies 531 | fd = fd.apply(lambda x: 100 - x/fd.max() * 100) # normalize 532 | return fd 533 | 534 | def Lyne_Hollick(series, alpha=.925, direction='f'): 535 | """Recursive digital filter for baseflow separation. Based on Lyne and Hollick, 1979. 536 | series = array of discharge measurements 537 | alpha = filter parameter 538 | direction = (f)orward or (r)everse calculation 539 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py 540 | """ 541 | series = np.array(series) 542 | f = np.zeros(len(series)) 543 | if direction == 'f': 544 | for t in np.arange(1,len(series)): 545 | f[t] = alpha * f[t-1] + (1 + alpha)/2 * (series[t] - series[t-1]) 546 | if series[t] - f[t] > series[t]: 547 | f[t] = 0 548 | elif direction == 'r': 549 | for t in np.arange(len(series)-2, 1, -1): 550 | f[t] = alpha * f[t+1] + (1 + alpha)/2 * (series[t] - series[t+1]) 551 | if series[t] - f[t] > series[t]: 552 | f[t] = 0 553 | return np.array(series - f) 554 | 555 | def Eckhardt(series, alpha=.98, BFI=.80): 556 | """Recursive digital filter for baseflow separation. Based on Eckhardt, 2004. 557 | series = array of discharge measurements 558 | alpha = filter parameter 559 | BFI = BFI_max (maximum baseflow index) 560 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py 561 | """ 562 | series = np.array(series) 563 | f = np.zeros(len(series)) 564 | f[0] = series[0] 565 | for t in np.arange(1,len(series)): 566 | f[t] = ((1 - BFI) * alpha * f[t-1] + (1 - alpha) * BFI * series[t]) / (1 - alpha * BFI) 567 | if f[t] > series[t]: 568 | f[t] = series[t] 569 | return f 570 | 571 | def ratingCurve(discharge, stage): 572 | """Computes rating curve based on discharge measurements coupled with stage 573 | readings. 574 | discharge = array of measured discharges; 575 | stage = array of corresponding stage readings; 576 | Returns coefficients a, b for the rating curve in the form y = a * x**b 577 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py 578 | """ 579 | from scipy.optimize import curve_fit 580 | 581 | exp_curve = lambda x, a, b: (a * x ** b) 582 | popt, pcov = curve_fit(exp_curve, stage, discharge) 583 | 584 | 585 | a = 0.0 586 | b = 0.0 587 | 588 | for i, j in zip(discharge, stage): 589 | a += (i - exp_curve(j, popt[0], popt[1]))**2 590 | b += (i - np.mean(discharge))**2 591 | r_squ = 1 - a / b 592 | 593 | 594 | return popt, r_squ 595 | -------------------------------------------------------------------------------- /wellapplication/mesopy.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function, unicode_literals 2 | 3 | # ==================================================================================================================== # 4 | # MesoPy # 5 | # Version: 2.0.0 # 6 | # Copyright (c) 2015 MesoWest Developers # 7 | # # 8 | # LICENSE: # 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated # 10 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the # 11 | # rights to use,copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to # 12 | # permit persons to whom the Software is furnished to do so, subject to the following conditions: # 13 | # # 14 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the # 15 | # Software. # 16 | # ==================================================================================================================== # 17 | 18 | try: 19 | import urllib.parse 20 | import urllib.request 21 | import urllib.error 22 | except ImportError: 23 | import urllib2 24 | import urllib 25 | 26 | import json 27 | 28 | 29 | # ==================================================================================================================== # 30 | # MesoPyError class # 31 | # Type: Exception # 32 | # Description: This class is simply the means for error handling when an exception is raised. # 33 | # ==================================================================================================================== # 34 | 35 | 36 | class MesoPyError(Exception): 37 | def __init__(self, error_message): 38 | self.error_message = error_message 39 | 40 | def __str__(self): 41 | r""" This just returns one of the error messages listed in the checkresponse() function""" 42 | return repr(self.error_message) 43 | 44 | 45 | # ==================================================================================================================== # 46 | # Meso class # 47 | # Type: Main # 48 | # Description: This class defines an instance of MesoPy and takes in the user's token # 49 | # ==================================================================================================================== # 50 | 51 | 52 | class Meso(object): 53 | def __init__(self, token): 54 | r""" Instantiates an instance of MesoPy. 55 | 56 | Arguments: 57 | ---------- 58 | token: string, mandatory 59 | Your API token that authenticates you for requests against MesoWest.mes 60 | 61 | Returns: 62 | -------- 63 | None. 64 | 65 | Raises: 66 | ------- 67 | None. 68 | """ 69 | 70 | self.base_url = 'http://api.mesowest.net/v2/' 71 | self.token = token 72 | self.geo_criteria = ['stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 'nwsfirezone', 'gacc', 73 | 'subgacc'] 74 | 75 | # ================================================================================================================ # 76 | # Functions: # 77 | # ================================================================================================================ # 78 | 79 | @staticmethod 80 | def _checkresponse(response): 81 | r""" Returns the data requested by the other methods assuming the response from the API is ok. If not, provides 82 | error handling for all possible API errors. HTTP errors are handled in the get_response() function. 83 | 84 | Arguments: 85 | ---------- 86 | None. 87 | 88 | Returns: 89 | -------- 90 | The response from the API as a dictionary if the API code is 2. 91 | 92 | Raises: 93 | ------- 94 | MesoPyError: Gives different response messages depending on returned code from API. If the response is 2, 95 | resultsError is displayed. For a response of 200, an authError message is shown. A ruleError is displayed 96 | if the code is 400, a formatError for -1, and catchError for any other invalid response. 97 | 98 | """ 99 | 100 | results_error = 'No results were found matching your query' 101 | auth_error = 'The token or API key is not valid, please contact Josh Clark at joshua.m.clark@utah.edu to ' \ 102 | 'resolve this' 103 | rule_error = 'This request violates a rule of the API. Please check the guidelines for formatting a data ' \ 104 | 'request and try again' 105 | catch_error = 'Something went wrong. Check all your calls and try again' 106 | 107 | if response['SUMMARY']['RESPONSE_CODE'] == 1: 108 | return response 109 | elif response['SUMMARY']['RESPONSE_CODE'] == 2: 110 | raise MesoPyError(results_error) 111 | elif response['SUMMARY']['RESPONSE_CODE'] == 200: 112 | raise MesoPyError(auth_error) 113 | elif response['SUMMARY']['RESPONSE_CODE'] == 400: 114 | raise MesoPyError(rule_error) 115 | elif response['SUMMARY']['RESPONSE_CODE'] == -1: 116 | format_error = response['SUMMARY']['RESPONSE_MESSAGE'] 117 | raise MesoPyError(format_error) 118 | else: 119 | raise MesoPyError(catch_error) 120 | 121 | def _get_response(self, endpoint, request_dict): 122 | """ Returns a dictionary of data requested by each function. 123 | 124 | Arguments: 125 | ---------- 126 | endpoint: string, mandatory 127 | Set in all other methods, this is the API endpoint specific to each function. 128 | request_dict: string, mandatory 129 | A dictionary of parameters that are formatted into the API call. 130 | 131 | Returns: 132 | -------- 133 | response: A dictionary that has been dumped from JSON. 134 | 135 | Raises: 136 | ------- 137 | MesoPyError: Overrides the exceptions given in the requests library to give more custom error messages. 138 | Connection_error occurs if no internet connection exists. Timeout_error occurs if the request takes too 139 | long and redirect_error is shown if the url is formatted incorrectly. 140 | 141 | """ 142 | http_error = 'Could not connect to the API. This could be because you have no internet connection, a parameter' \ 143 | ' was input incorrectly, or the API is currently down. Please try again.' 144 | # For python 3.4 145 | try: 146 | qsp = urllib.parse.urlencode(request_dict, doseq=True) 147 | resp = urllib.request.urlopen(self.base_url + endpoint + '?' + qsp).read() 148 | 149 | # For python 2.7 150 | except AttributeError or NameError: 151 | try: 152 | qsp = urllib.urlencode(request_dict, doseq=True) 153 | resp = urllib2.urlopen(self.base_url + endpoint + '?' + qsp).read() 154 | except urllib2.URLError: 155 | raise MesoPyError(http_error) 156 | except urllib.error.URLError: 157 | raise MesoPyError(http_error) 158 | return self._checkresponse(json.loads(resp.decode('utf-8'))) 159 | 160 | def _check_geo_param(self, arg_list): 161 | r""" Checks each function call to make sure that the user has provided at least one of the following geographic 162 | parameters: 'stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 'nwsfirezone', 'gacc', or 'subgacc'. 163 | 164 | Arguments: 165 | ---------- 166 | arg_list: list, mandatory 167 | A list of kwargs from other functions. 168 | 169 | Returns: 170 | -------- 171 | None. 172 | 173 | Raises: 174 | ------- 175 | MesoPyError if no geographic search criteria is provided. 176 | 177 | """ 178 | 179 | geo_func = lambda a, b: any(i in b for i in a) 180 | check = geo_func(self.geo_criteria, arg_list) 181 | if check is False: 182 | raise MesoPyError('No stations or geographic search criteria specified. Please provide one of the ' 183 | 'following: stid, state, county, country, radius, bbox, cwa, nwsfirezone, gacc, subgacc') 184 | 185 | 186 | def precip(self, start, end, **kwargs): 187 | r""" Returns precipitation observations at a user specified location for a specified time. Users must specify at 188 | least one geographic search parameter ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 189 | 'nwsfirezone', 'gacc', or 'subgacc') to obtain observation data. Other parameters may also be included. See 190 | below mandatory and optional parameters. Also see the metadata() function for station IDs. 191 | 192 | Arguments: 193 | ---------- 194 | start: string, mandatory 195 | Start date in form of YYYYMMDDhhmm. MUST BE USED WITH THE END PARAMETER. Default time is UTC 196 | e.g., start='201306011800' 197 | end: string, mandatory 198 | End date in form of YYYYMMDDhhmm. MUST BE USED WITH THE START PARAMETER. Default time is UTC 199 | e.g., end='201306011800' 200 | obtimezone: string, optional 201 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local' 202 | showemptystations: string, optional 203 | Set to '1' to show stations even if no obs exist that match the time period. Stations without obs are 204 | omitted by default. 205 | stid: string, optional 206 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb' 207 | county: string, optional 208 | County/parish/borough (US/Canada only), full name e.g. county='Larimer' 209 | state: string, optional 210 | US state, 2-letter ID e.g. state='CO' 211 | country: string, optional 212 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx' 213 | radius: list, optional 214 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20" 215 | bbox: list, optional 216 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41" 217 | cwa: string, optional 218 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX' 219 | nwsfirezone: string, optional 220 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile 221 | containing the full list of zones. e.g. nwsfirezone='LOX241' 222 | gacc: string, optional 223 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs. 224 | subgacc: string, optional 225 | Name of Sub GACC e.g. subgacc='EB07' 226 | vars: string, optional 227 | Single or comma separated list of sensor variables. Will return all stations that match one of provided 228 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in 229 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars. 230 | status: string, optional 231 | A value of either active or inactive returns stations currently set as active or inactive in the archive. 232 | Omitting this param returns all stations. e.g. status='active' 233 | units: string, optional 234 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for 235 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph, 236 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa, 237 | alti|inhg. e.g. units='temp|F,speed|kph,metric' 238 | groupby: string, optional 239 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc 240 | e.g. groupby='state' 241 | timeformat: string, optional 242 | A python format string for returning customized date-time groups for observation times. Can include 243 | characters. e.g. timeformat='%m/%d/%Y at %H:%M' 244 | 245 | Returns: 246 | -------- 247 | Dictionary of precipitation observations. 248 | 249 | Raises: 250 | ------- 251 | None. 252 | 253 | """ 254 | 255 | self._check_geo_param(kwargs) 256 | kwargs['start'] = start 257 | kwargs['end'] = end 258 | kwargs['token'] = self.token 259 | 260 | return self._get_response('stations/precipitation', kwargs) 261 | 262 | def timeseries(self, start, end, **kwargs): 263 | r""" Returns a time series of observations at a user specified location for a specified time. Users must specify 264 | at least one geographic search parameter ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 265 | 'nwsfirezone', 'gacc', or 'subgacc') to obtain observation data. Other parameters may also be included. See 266 | below mandatory and optional parameters. Also see the metadata() function for station IDs. 267 | 268 | Arguments: 269 | ---------- 270 | start: string, mandatory 271 | Start date in form of YYYYMMDDhhmm. MUST BE USED WITH THE END PARAMETER. Default time is UTC 272 | e.g., start='201306011800' 273 | end: string, mandatory 274 | End date in form of YYYYMMDDhhmm. MUST BE USED WITH THE START PARAMETER. Default time is UTC 275 | e.g., end='201306011800' 276 | obtimezone: string, optional 277 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local' 278 | showemptystations: string, optional 279 | Set to '1' to show stations even if no obs exist that match the time period. Stations without obs are 280 | omitted by default. 281 | stid: string, optional 282 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb' 283 | county: string, optional 284 | County/parish/borough (US/Canada only), full name e.g. county='Larimer' 285 | state: string, optional 286 | US state, 2-letter ID e.g. state='CO' 287 | country: string, optional 288 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx' 289 | radius: string, optional 290 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20" 291 | bbox: string, optional 292 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41" 293 | cwa: string, optional 294 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX' 295 | nwsfirezone: string, optional 296 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile 297 | containing the full list of zones. e.g. nwsfirezone='LOX241' 298 | gacc: string, optional 299 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs. 300 | subgacc: string, optional 301 | Name of Sub GACC e.g. subgacc='EB07' 302 | vars: string, optional 303 | Single or comma separated list of sensor variables. Will return all stations that match one of provided 304 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in 305 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars. 306 | status: string, optional 307 | A value of either active or inactive returns stations currently set as active or inactive in the archive. 308 | Omitting this param returns all stations. e.g. status='active' 309 | units: string, optional 310 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for 311 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph, 312 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa, 313 | alti|inhg. e.g. units='temp|F,speed|kph,metric' 314 | groupby: string, optional 315 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc 316 | e.g. groupby='state' 317 | timeformat: string, optional 318 | A python format string for returning customized date-time groups for observation times. Can include 319 | characters. e.g. timeformat='%m/%d/%Y at %H:%M' 320 | 321 | Returns: 322 | -------- 323 | Dictionary of time series observations through the get_response() function. 324 | 325 | Raises: 326 | ------- 327 | None. 328 | """ 329 | 330 | self._check_geo_param(kwargs) 331 | kwargs['start'] = start 332 | kwargs['end'] = end 333 | kwargs['token'] = self.token 334 | 335 | return self._get_response('stations/timeseries', kwargs) 336 | 337 | def climatology(self, startclim, endclim, **kwargs): 338 | r""" Returns a climatology of observations at a user specified location for a specified time. Users must specify 339 | at least one geographic search parameter ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 340 | 'nwsfirezone', 'gacc', or 'subgacc') to obtain observation data. Other parameters may also be included. See 341 | below mandatory and optional parameters. Also see the metadata() function for station IDs. 342 | 343 | Arguments: 344 | ---------- 345 | startclim: string, mandatory 346 | Start date in form of MMDDhhmm. MUST BE USED WITH THE ENDCLIM PARAMETER. Default time is UTC 347 | e.g. startclim='06011800' Do not specify a year 348 | endclim: string, mandatory 349 | End date in form of MMDDhhmm. MUST BE USED WITH THE STARTCLIM PARAMETER. Default time is UTC 350 | e.g. endclim='06011800' Do not specify a year 351 | obtimezone: string, optional 352 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local' 353 | showemptystations: string, optional 354 | Set to '1' to show stations even if no obs exist that match the time period. Stations without obs are 355 | omitted by default. 356 | stid: string, optional 357 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb' 358 | county: string, optional 359 | County/parish/borough (US/Canada only), full name e.g. county='Larimer' 360 | state: string, optional 361 | US state, 2-letter ID e.g. state='CO' 362 | country: string, optional 363 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx' 364 | radius: string, optional 365 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20" 366 | bbox: string, optional 367 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41" 368 | cwa: string, optional 369 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX' 370 | nwsfirezone: string, optional 371 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile 372 | containing the full list of zones. e.g. nwsfirezone='LOX241' 373 | gacc: string, optional 374 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs. 375 | subgacc: string, optional 376 | Name of Sub GACC e.g. subgacc='EB07' 377 | vars: string, optional 378 | Single or comma separated list of sensor variables. Will return all stations that match one of provided 379 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in 380 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars. 381 | status: string, optional 382 | A value of either active or inactive returns stations currently set as active or inactive in the archive. 383 | Omitting this param returns all stations. e.g. status='active' 384 | units: string, optional 385 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for 386 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph, 387 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa, 388 | alti|inhg. e.g. units='temp|F,speed|kph,metric' 389 | groupby: string, optional 390 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc 391 | e.g. groupby='state' 392 | timeformat: string, optional 393 | A python format string for returning customized date-time groups for observation times. Can include 394 | characters. e.g. timeformat='%m/%d/%Y at %H:%M' 395 | 396 | Returns: 397 | -------- 398 | Dictionary of climatology observations through the get_response() function. 399 | 400 | Raises: 401 | ------- 402 | None. 403 | 404 | """ 405 | 406 | self._check_geo_param(kwargs) 407 | kwargs['startclim'] = startclim 408 | kwargs['endclim'] = endclim 409 | kwargs['token'] = self.token 410 | 411 | return self._get_response('stations/climatology', kwargs) 412 | 413 | def variables(self): 414 | """ Returns a dictionary of a list of variables that could be obtained from the 'vars' param in other functions. 415 | Some stations may not record all variables listed. Use the metadata() function to return metadata on each 416 | station. 417 | 418 | Arguments: 419 | ---------- 420 | None. 421 | 422 | Returns: 423 | -------- 424 | Dictionary of variables. 425 | 426 | Raises: 427 | ------- 428 | None. 429 | 430 | """ 431 | 432 | return self._get_response('variables', {'token': self.token}) 433 | 434 | def metadata(self, **kwargs): 435 | r""" Returns the metadata for a station or stations. Users must specify at least one geographic search parameter 436 | ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 'nwsfirezone', 'gacc', or 'subgacc') to obtain 437 | observation data. Other parameters may also be included. See below for optional parameters. 438 | 439 | Arguments: 440 | ---------- 441 | complete: string, optional 442 | A value of 1 or 0. When set to 1, an extended list of metadata attributes for each returned station is 443 | provided. This result is useful for exploring the zones and regions in which a station resides. 444 | e.g. complete='1' 445 | sensorvars: string, optional 446 | A value of 1 or 0. When set to 1, a complete history of sensor variables and period of record is given for 447 | each station. e.g. sensorvars='1' 448 | obrange: string, optional 449 | Filters metadata for stations which were in operation for a specified time period. Users can specify one 450 | date or a date range. Dates are in the format of YYYYmmdd. e.g. obrange='20150101', 451 | obrange='20040101,20060101' 452 | obtimezone: string, optional 453 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local' 454 | stid: string, optional 455 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb' 456 | county: string, optional 457 | County/parish/borough (US/Canada only), full name e.g. county='Larimer' 458 | state: string, optional 459 | US state, 2-letter ID e.g. state='CO' 460 | country: string, optional 461 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx' 462 | radius: string, optional 463 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20" 464 | bbox: string, optional 465 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41" 466 | cwa: string, optional 467 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX' 468 | nwsfirezone: string, optional 469 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile 470 | containing the full list of zones. e.g. nwsfirezone='LOX241' 471 | gacc: string, optional 472 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs. 473 | subgacc: string, optional 474 | Name of Sub GACC e.g. subgacc='EB07' 475 | vars: string, optional 476 | Single or comma separated list of sensor variables. Will return all stations that match one of provided 477 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in 478 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars. 479 | status: string, optional 480 | A value of either active or inactive returns stations currently set as active or inactive in the archive. 481 | Omitting this param returns all stations. e.g. status='active' 482 | units: string, optional 483 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for 484 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph, 485 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa, 486 | alti|inhg. e.g. units='temp|F,speed|kph,metric' 487 | groupby: string, optional 488 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc 489 | e.g. groupby='state' 490 | timeformat: string, optional 491 | A python format string for returning customized date-time groups for observation times. Can include 492 | characters. e.g. timeformat='%m/%d/%Y at %H:%M' 493 | 494 | Returns: 495 | -------- 496 | A dictionary of metadata. 497 | 498 | Raises: 499 | ------- 500 | None. 501 | 502 | """ 503 | 504 | self._check_geo_param(kwargs) 505 | kwargs['token'] = self.token 506 | 507 | return self._get_response('stations/metadata', kwargs) 508 | 509 | -------------------------------------------------------------------------------- /wellapplication/graphs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Nov 19 12:32:51 2015 4 | 5 | @author: paulinkenbrandt 6 | """ 7 | from __future__ import absolute_import, division, print_function, unicode_literals 8 | 9 | import pandas as pd 10 | import matplotlib.pyplot as plt 11 | import matplotlib as mpl 12 | import matplotlib.cm as cm 13 | from scipy.stats import linregress 14 | import numpy as np 15 | from collections import OrderedDict 16 | from datetime import datetime, timedelta 17 | 18 | 19 | 20 | def get_recess_int(df, Q, maxper=18, minper=6, thresh=30, inplace=False): 21 | """Gets intervals of recession from a hydrograph 22 | 23 | :param df: DataFrame with hydrograph data 24 | :param Q: Field in DataFrame with discharge data 25 | :param maxper: Period of record to scan discharge data for maxima; created for daily values; defaults to 18 26 | :param minper: Period of record to scan discharge data for minima; should be less than maxper; defaulst to 6 27 | :param thresh: Threshold of discharge below which maxima are not considered; defaults to 30 28 | :param inplace: Append to input database or create new one; defaults to False 29 | :return: DataFrame of original data and Max and Min, Indexes of maxima, Indexes of minima 30 | """ 31 | if inplace: 32 | data = df 33 | else: 34 | data = df.copy() 35 | data['max'] = data[Q].rolling(maxper,center=True).max() 36 | data['max'] = data.ix[data['max'] == data['value'],'max'] 37 | data['max'] = data.ix[data['max'] > thresh, 'max'] 38 | data['min'] = data[Q].rolling(minper,center=True).min() 39 | data['min'] = data.ix[data['min'] == data['value'],'min'] 40 | 41 | maxlist = data.index[data['max'].notnull()] 42 | firstmin = [] 43 | for ind in maxlist: 44 | firstmin.append(data.ix[ind:,'min'].first_valid_index()) 45 | data['min'] = data.ix[data.index.isin(firstmin),'min'] 46 | return data, maxlist, firstmin 47 | 48 | 49 | class recess(object): 50 | """Creates recession curve and modeled output to describe spring and streamflow recession. 51 | 52 | :param df: dataframe with spring discharge data 53 | :param Q: string indicating discharge field in df in units of gpm 54 | :param st: start date to examine data in [YYYY, MM, DD] format, where values are integers in an array 55 | :param end: end date to examine data 56 | :param excs: = begin date of exclusion period 57 | :param excf: = end date of exclusion period 58 | 59 | :type st: list 60 | :type end: list 61 | :type Q: str 62 | 63 | :return popt: alpha value for recession curve 64 | :return x1: days from start of recession 65 | :return x2: dates of recession curve analysis 66 | :return y1: points used for recession curve analysis 67 | :return y2: recession curve values 68 | :returns: Plot of recession curve 69 | """ 70 | 71 | def __init__(self, df, Q, st, end='', lab='', excs=[0, 0, 0], excf=[0, 0, 0]): 72 | self.ymd = [datetime.now().year, datetime.now().month, datetime.now().day] 73 | if end == '': 74 | end = self.ymd 75 | 76 | if lab == '': 77 | self.Qlab = 'Discharge' 78 | else: 79 | self.Qlab = lab 80 | 81 | self.Qz = df[Q][0] 82 | 83 | self.rec_results = self.recession(df, Q, st, end, excs, excf) 84 | 85 | def fitit(self, x, y, Q): 86 | from scipy.optimize import curve_fit 87 | 88 | func = lambda x, c: Q * np.exp(-1 * c * x) 89 | 90 | popt, pcov = curve_fit(func, x, y, p0=(1e-1)) 91 | return popt, pcov 92 | 93 | 94 | def recession(self, df, Q, st, end, excs, excf): 95 | """Creates recession curve and modeled output to describe spring and streamflow recession. 96 | 97 | The user puts in a dataframe with discharge data and defines the date range of recession. The Class will return 98 | recession values of the date range given. 99 | 100 | :param df: DataFrame with spring discharge data 101 | :type df: pandas.core.frame.DataFrame 102 | :param Q: discharge field in df in units of gpm 103 | :type Q: str 104 | :param st: start date to examine data in [YYYY, MM, DD] format, where values are integers in an array 105 | :type st: list 106 | :param end: end date to examine data 107 | :type end: list 108 | :param excs: begin date of exclusion period 109 | :param excf: end date of exclusion period 110 | :type excs: list 111 | :type excs: list 112 | 113 | :returns: popt1, x1, x2, y1, y2 114 | :return popt1: alpha value for recession curve 115 | :return x1: days from start of recession 116 | :return x2: dates of recession curve analysis 117 | :return y1: points used for recession curve analysis 118 | :return y2: recession curve values 119 | """ 120 | # account for hours in time input 121 | if len(st) == 3 and len(end) == 3: 122 | df1 = df[(df.index >= pd.datetime(st[0], st[1], st[2])) & (df.index <= pd.datetime(end[0], end[1], end[2]))] 123 | else: 124 | df1 = df[(df.index >= pd.datetime(st[0], st[1], st[2], st[3], st[4])) & ( 125 | df.index <= pd.datetime(end[0], end[1], end[2], st[3], st[4]))] 126 | 127 | # account for hours in time input 128 | if excs[0] == 0: 129 | pass 130 | else: 131 | if len(excs) == 3: 132 | df1 = df1[(df1.index < pd.datetime(excs[0], excs[1], excs[2])) | ( 133 | df1.index > pd.datetime(excf[0], excf[1], excf[2]))] 134 | else: 135 | df1 = df1[(df1.index < pd.datetime(excs[0], excs[1], excs[2], excs[3], excs[4])) | ( 136 | df1.index > pd.datetime(excf[0], excf[1], excf[2], excf[3], excf[4]))] 137 | 138 | df2 = df1.dropna(subset=[Q]) 139 | 140 | y1 = df2[Q] 141 | x1 = (df2.index.to_julian_date() - df2.index.to_julian_date()[0]) # convert to numeric days for opt. function 142 | popt1, pcov1 = self.fitit(x1, y1, y1[0]) # fit curve 143 | x2 = [df2.index[0] + timedelta(i) for i in x1] # convert back to dates for labels 144 | y2 = [y1[0] * np.exp(-1 * popt1[0] * i) for i in x1] # run function with optimized variables 145 | plt.plot(x2, y2, label='Recession (alpha = %.3f)' % popt1[0]) # report alpha value 146 | plt.scatter(x2, y1, label='Discharge') 147 | plt.ylabel(self.Qlab) 148 | plt.legend(scatterpoints=1) 149 | plt.show() 150 | return popt1, x1, x2, y1, y2 151 | 152 | 153 | class piper(object): 154 | """Class that generates rectangular piper diagrams. 155 | 156 | :param df: DataFrame containing chemistry data; must have fields labeled as abbreviations of the major ions; Na, K, 157 | NaK, Ca, Mg, Cl, HCO3, CO3, and SO4 158 | :type df: pandas.core.frame.DataFrame 159 | :param type_col: Name of field that designates the sample type (optional); defaults to '' 160 | :type type_col: str 161 | :param var_col: Name of field that contains a scalar variable to be designated by color (optional); defaults to '' 162 | :type var_col: str 163 | 164 | 165 | .. note:: 166 | Hydrochemistry - Construct Rectangular Piper plot 167 | Adopted from: Ray and Mukherjee, 2008, Groundwater 46(6): 893-896 and from code found at: 168 | http://python.hydrology-amsterdam.nl/scripts/piper_rectangular.py 169 | Based on code by: 170 | B.M. van Breukelen 171 | 172 | """ 173 | 174 | def __init__(self, df, type_col='', var_col=''): 175 | 176 | self.fieldnames = [u'Na', u'K', u'NaK', u'Ca', u'Mg', u'Cl', u'HCO3', u'CO3', u'SO4'] 177 | self.anions = ['Cl', 'HCO3', 'CO3', 'SO4'] 178 | self.cations = ['Na', 'K', 'Ca', 'Mg', 'NaK'] 179 | self.piperplot(df, type_col, var_col) 180 | 181 | 182 | def fillMissing(self, df): 183 | 184 | # fill in nulls with 0 185 | for col in df.columns: 186 | if col in self.fieldnames: 187 | for i in df.index: 188 | if df.loc[i, col] is None or df.loc[i, col] == '' or np.isnan(df.loc[i, col]): 189 | df.loc[i, col] = 0 190 | else: 191 | df.col = 0 192 | 193 | # add missing columns 194 | for name in self.fieldnames: 195 | if name in df.columns: 196 | pass 197 | else: 198 | print(name) 199 | df[name] = 0 200 | 201 | return df 202 | 203 | def check_nak(self, x): 204 | if x[0] == 0 and x[2] > 0: 205 | return x[2] 206 | else: 207 | return x[0] + x[1] 208 | 209 | def convertIons(self, df): 210 | """Convert major ion concentrations from mg/L to meq 211 | 212 | This function uses conversion factors to convert the concentrations of major ions from mg/L to meq. It also 213 | appends a field to the input database listing the Cation-Anion pair that have the highest meq concentrations. 214 | 215 | :param df: DataFrame containing chemistry data; must have fields labeled as abbreviations of the major ions; Na, K, 216 | NaK, Ca, Mg, Cl, HCO3, CO3, and SO4 217 | :returns: appends convert fields onto DataFrame with the suffix `_meq` and adds the fields 'water type', 'CBE' 218 | (charge balance), 'EC' (Sum(anions+cations)) 219 | 220 | """ 221 | # Conversion factors from mg/L to meq/L 222 | d = {'Ca': 0.04990269, 'Mg': 0.082287595, 'Na': 0.043497608, 'K': 0.02557656, 'Cl': 0.028206596, 'NaK': 0.043497608, 223 | 'HCO3': 0.016388838, 'CO3': 0.033328223, 'SO4': 0.020833333, 'NO2': 0.021736513, 'NO3': 0.016129032} 224 | 225 | df1 = df 226 | 227 | for name in self.fieldnames: 228 | if name in df.columns: 229 | df1[name + '_meq'] = df1[name].apply(lambda x: float(d.get(name, 0)) * x, 1) 230 | 231 | 232 | df1['NaK_meq'] = df1[['Na_meq', 'K_meq','NaK_meq']].apply(lambda x: self.check_nak(x), 1) 233 | 234 | df1['anions'] = 0 235 | df1['cations'] = 0 236 | 237 | for ion in self.anions: 238 | if ion in df.columns: 239 | df1['anions'] += df1[ion + '_meq'] 240 | for ion in self.cations: 241 | if ion in df1.columns: 242 | df1['cations'] += df1[ion + '_meq'] 243 | 244 | df1['total_ions'] = df1['cations'] + df1['anions'] 245 | df1['EC'] = df1['anions'] - df1['cations'] 246 | df1['CBE'] = df1['EC'] / (df1['anions'] + df1['cations']) 247 | df1['maj_cation'] = df1[['Ca_meq','Mg_meq','Na_meq','K_meq','NaK_meq']].idxmax(axis=1) 248 | df1['maj_anion'] = df1[['Cl_meq','SO4_meq','HCO3_meq','CO3_meq']].idxmax(axis=1) 249 | df1['water_type'] = df1[['maj_cation','maj_anion']].apply(lambda x: str(x[0])[:-4]+'-'+str(x[1])[:-4],1) 250 | return df1 251 | 252 | def ionPercentage(self, df): 253 | """Determines percentage of charge for each ion for display on the piper plot""" 254 | for ion in self.anions: 255 | df[ion + 'EC'] = df[[ion + '_meq', 'anions']].apply(lambda x: 100 * x[0] / x[1], 1) 256 | for ion in self.cations: 257 | df[ion + 'EC'] = df[[ion + '_meq', 'cations']].apply(lambda x: 100 * x[0] / x[1], 1) 258 | 259 | return df 260 | 261 | def piperplot(self, df, type_col, var_col): 262 | """Generates a rectangular piper diagram""" 263 | self.fillMissing(df) 264 | self.convertIons(df) 265 | self.ionPercentage(df) 266 | 267 | CaEC = df['CaEC'].values 268 | MgEC = df['MgEC'].values 269 | ClEC = df['ClEC'].values 270 | SO4EC = df['SO4EC'].values 271 | NaKEC = df['NaKEC'].values 272 | SO4ClEC = df[['ClEC', 'SO4EC']].apply(lambda x: x[0] + x[1], 1).values 273 | 274 | 275 | 276 | num_samps = len(df) 277 | if var_col == '': 278 | Elev = '' 279 | else: 280 | Elev = df[var_col].values 281 | 282 | if type_col == '': 283 | typ = ['Station']*num_samps 284 | stationtypes = ['Station'] 285 | else: 286 | stationtypes = list(df[type_col].unique()) 287 | typ = df[type_col].values 288 | 289 | # Change default settings for figures 290 | plt.rc('xtick', labelsize=10) 291 | plt.rc('ytick', labelsize=10) 292 | plt.rc('font', size=12) 293 | plt.rc('legend', fontsize=12) 294 | plt.rc('figure', figsize=(14, 5.5)) # defines size of Figure window orig (14,4.5) 295 | 296 | markSize = 30 297 | lineW = 0.5 298 | 299 | # Make Figure 300 | fig = plt.figure() 301 | # add title 302 | # fig.suptitle(piperTitle, x=0.20,y=.98, fontsize=14 ) 303 | # Colormap and Saving Options for Figure 304 | 305 | if len(Elev) > 0: 306 | vart = Elev 307 | else: 308 | vart = [1] * num_samps 309 | cNorm = plt.Normalize(vmin=min(vart), vmax=max(vart)) 310 | cmap = plt.cm.coolwarm 311 | # pdf = PdfPages(fileplace) 312 | 313 | mrkrSymbl = ['v', '^', '+', 's', '.', 'o', '*', 'v', '^', '+', 's', ',', '.', 'o', '*', 'v', '^', '+', 's', ',', 314 | '.', 'o', '*', 'v', '^', '+', 's', ',', '.', 'o', '*'] 315 | 316 | # count variable for legend (n) 317 | unique, counts = np.unique(typ, return_counts=True) 318 | nstatTypesDict = dict(zip(unique, counts)) 319 | 320 | typdict = {} 321 | for i in range(len(stationtypes)): 322 | typdict[stationtypes[i]] = mrkrSymbl[i] 323 | 324 | # CATIONS----------------------------------------------------------------------------- 325 | # 2 lines below needed to create 2nd y-axis (ax1b) for first subplot 326 | ax1 = fig.add_subplot(131) 327 | ax1b = ax1.twinx() 328 | 329 | ax1.fill([100, 0, 100, 100], [0, 100, 100, 0], color=(0.8, 0.8, 0.8)) 330 | ax1.plot([100, 0], [0, 100], 'k') 331 | ax1.plot([50, 0, 50, 50], [0, 50, 50, 0], 'k--') 332 | ax1.text(25, 15, 'Na type') 333 | ax1.text(75, 15, 'Ca type') 334 | ax1.text(25, 65, 'Mg type') 335 | 336 | if len(typ) > 0: 337 | for j in range(len(typ)): 338 | ax1.scatter(CaEC[j], MgEC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm, marker=typdict[typ[j]], 339 | linewidths=lineW) 340 | else: 341 | ax1.scatter(CaEC, MgEC, s=markSize, c=vart, cmap=cmap, norm=cNorm, linewidths=lineW) 342 | 343 | ax1.set_xlim(0, 100) 344 | ax1.set_ylim(0, 100) 345 | ax1b.set_ylim(0, 100) 346 | ax1.set_xlabel('<= Ca (% meq)') 347 | ax1b.set_ylabel('Mg (% meq) =>') 348 | plt.setp(ax1, yticklabels=[]) 349 | 350 | # next line needed to reverse x axis: 351 | ax1.set_xlim(ax1.get_xlim()[::-1]) 352 | 353 | # ANIONS---------------------------------------------------------------------------- 354 | ax = fig.add_subplot(1, 3, 3) 355 | ax.fill([100, 100, 0, 100], [0, 100, 100, 0], color=(0.8, 0.8, 0.8)) 356 | ax.plot([0, 100], [100, 0], 'k') 357 | ax.plot([50, 50, 0, 50], [0, 50, 50, 0], 'k--') 358 | ax.text(55, 15, 'Cl type') 359 | ax.text(5, 15, 'HCO3 type') 360 | ax.text(5, 65, 'SO4 type') 361 | 362 | if len(typ) > 0: 363 | for j in range(len(typ)): 364 | labs = "{:} n= {:}".format(typ[j],nstatTypesDict[typ[j]]) 365 | if float(nstatTypesDict[typ[j]]) > 1: 366 | s = ax.scatter(ClEC[j], SO4EC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm, 367 | marker=typdict[typ[j]], label=labs, linewidths=lineW) 368 | else: 369 | s = ax.scatter(ClEC[j], SO4EC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm, 370 | marker=typdict[typ[j]], label=typ[j], linewidths=lineW) 371 | else: 372 | s = ax.scatter(ClEC, SO4EC, s=markSize, c=vart, cmap=cmap, norm=cNorm, label='Sample', linewidths=lineW) 373 | 374 | ax.set_xlim(0, 100) 375 | ax.set_ylim(0, 100) 376 | ax.set_xlabel('Cl (% meq) =>') 377 | ax.set_ylabel('SO4 (% meq) =>') 378 | 379 | # CATIONS AND ANIONS COMBINED --------------------------------------------------------------- 380 | # 2 lines below needed to create 2nd y-axis (ax1b) for first subplot 381 | ax2 = fig.add_subplot(132) 382 | ax2b = ax2.twinx() 383 | 384 | ax2.plot([0, 100], [10, 10], 'k--') 385 | ax2.plot([0, 100], [50, 50], 'k--') 386 | ax2.plot([0, 100], [90, 90], 'k--') 387 | ax2.plot([10, 10], [0, 100], 'k--') 388 | ax2.plot([50, 50], [0, 100], 'k--') 389 | ax2.plot([90, 90], [0, 100], 'k--') 390 | 391 | if len(typ) > 0: 392 | for j in range(len(typ)): 393 | ax2.scatter(NaKEC[j], SO4ClEC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm, marker=typdict[typ[j]], 394 | linewidths=lineW) 395 | else: 396 | ax2.scatter(NaKEC, SO4ClEC, s=markSize, c=vart, cmap=cmap, norm=cNorm, linewidths=lineW) 397 | 398 | ax2.set_xlim(0, 100) 399 | ax2.set_ylim(0, 100) 400 | ax2.set_xlabel('Na+K (% meq) =>') 401 | ax2.set_ylabel('SO4+Cl (% meq) =>') 402 | ax2.set_title('<= Ca+Mg (% meq)', fontsize=12) 403 | ax2b.set_ylabel('<= CO3+HCO3 (% meq)') 404 | ax2b.set_ylim(0, 100) 405 | 406 | # next two lines needed to reverse 2nd y axis: 407 | ax2b.set_ylim(ax2b.get_ylim()[::-1]) 408 | 409 | # Align plots 410 | plt.subplots_adjust(left=0.05, bottom=0.35, right=0.95, top=0.90, wspace=0.4, hspace=0.0) 411 | 412 | # Legend----------------------------------------------------------------------------------------- 413 | 414 | # Add colorbar below legend 415 | # [left, bottom, width, height] where all quantities are in fractions of figure width and height 416 | 417 | if len(typ) > 0: 418 | handles, labels = ax.get_legend_handles_labels() 419 | by_label = OrderedDict(zip(labels, handles)) 420 | 421 | plt.legend(by_label.values(), by_label.keys(), loc='lower center', ncol=5, shadow=False, fancybox=True, 422 | bbox_to_anchor=(0.5, -0.3), scatterpoints=1) 423 | 424 | if len(Elev) > 0: 425 | cax = fig.add_axes([0.25, 0.10, 0.50, 0.02]) 426 | cb1 = plt.colorbar(s, cax=cax, cmap=cmap, norm=cNorm, orientation='horizontal') # use_gridspec=True 427 | cb1.set_label(var_col, size=8) 428 | 429 | self.plot = fig 430 | self.df = df 431 | 432 | 433 | def fdc(df, site, begyear=1900, endyear=2015, normalizer=1, plot=True): 434 | """Generate flow duration curve for hydrologic time series data 435 | 436 | :param df: DataFrame with discharge data of interest; must have a date or date-time as the index 437 | :type df: pandas.core.frame.DataFrame 438 | :param site: Name of DataFrame column in df containing discharge data 439 | :type site: str 440 | :param begyear: beginning year of analysis; defaults to 1900 441 | :type begyear: int 442 | :param endyear: end year of analysis; defaults to 2015 443 | :type endyear: int 444 | :param normalizer: value to use to normalize discharge; defaults to 1 (no normalization) 445 | :type normalizer: int 446 | :param plot: Whether to generate the plot or just return the variables for a plot; defaults to true 447 | :type plot: bool 448 | 449 | :returns: matplotlib plot displaying the flow duration curve of the data 450 | :return prob: x field stating the probability of a discharge in data 451 | :rtype prob: list 452 | :return data: y field stating the discharge for probability prob 453 | :rtype data: list 454 | 455 | """ 456 | from scipy import stats as sp 457 | # limit dataframe to only the site 458 | df = df[[site]] 459 | 460 | # filter dataframe to only include dates of interest 461 | data = df[ 462 | (pd.to_datetime(df.index) > pd.datetime(begyear, 1, 1)) & (pd.to_datetime(df.index) < pd.datetime(endyear, 1, 1))] 463 | 464 | # remove na values from dataframe 465 | data = data.dropna() 466 | 467 | # take average of each day of year (from 1 to 366) over the selected period of record 468 | data['doy'] = data.index.dayofyear 469 | dailyavg = data[site].groupby(data['doy']).mean() 470 | 471 | data = np.sort(dailyavg) 472 | 473 | ## uncomment the following to use normalized discharge instead of discharge 474 | # mean = np.mean(data) 475 | # std = np.std(data) 476 | # data = [(data[i]-np.mean(data))/np.std(data) for i in range(len(data))] 477 | data = [(data[i]) / normalizer for i in range(len(data))] 478 | 479 | # ranks data from smallest to largest 480 | ranks = sp.rankdata(data, method='average') 481 | 482 | # reverses rank order 483 | ranks = ranks[::-1] 484 | 485 | # calculate probability of each rank 486 | prob = [(ranks[i] / (len(data) + 1)) for i in range(len(data))] 487 | 488 | # plot data via matplotlib 489 | if plot: 490 | plt.plot(prob, data, label=site + ' ' + str(begyear) + '-' + str(endyear)) 491 | else: 492 | pass 493 | return prob, data 494 | 495 | 496 | class gantt(object): 497 | """Class to create gantt plots and to summarize pandas timeseries dataframes. 498 | 499 | Finds gaps and measuring duration of data. 500 | :param df: The DataFrame with a datetime index and columns as site time-series data; each column name 501 | should be the site name or the site labels should be input for chart 502 | :param stations: List of columns to include in the chart; defaults to all columns 503 | :param labels: Labels to use in the resulting plot for each station; must be equal to the length of stations list; 504 | defaults to stations 505 | :param samp_int: regular interval that the datetime index will be resampled. Defaults to daily ('D'); 506 | see http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases for all of the options 507 | :type samp_int: str 508 | :type df: pandas.core.frame.DataFrame 509 | :type stations: list 510 | :type labels: list 511 | 512 | .. note:: 513 | `.stations` produces a list describing the stations put into the class 514 | `.labels` produces a list describing the labels put into the class 515 | `.dateranges` is a dictionary describing gaps in the dataframe based on the presence of nan values in the frame 516 | `.ganttPlotter()` plots a gantt plot 517 | 518 | """ 519 | 520 | def __init__(self, df, stations=[], labels=[], samp_int = 'D'): 521 | if len(stations) == 0: 522 | stations = df.columns 523 | if len(labels) == 0: 524 | labels = stations 525 | 526 | self.data = df.resample(samp_int).mean() 527 | self.stations = stations 528 | self.labels = labels 529 | self.dateranges = self.markGaps() 530 | self.sitestats = self.site_info() 531 | print( 532 | 'Data Loaded \nType .ganttPlotter() after your defined object to make plot\nType .sitestats after your defined object to get summary stats') 533 | 534 | def markGaps(self): 535 | """Produces dictionary of list of gaps in time series data based on the presence of nan values; 536 | used for gantt plotting 537 | 538 | :returns: dateranges; a dictionary with station names as keys and lists of begin and end dates as values 539 | """ 540 | df = self.data 541 | stations = self.stations 542 | 543 | dateranges = {} 544 | for station in stations: 545 | dateranges[station] = [] 546 | first = df.ix[:, station].first_valid_index() 547 | last = df.ix[:, station].last_valid_index() 548 | records = df.ix[first:last, station] 549 | #dateranges[station].append(pd.to_datetime(first)) 550 | for i in range(len(records) - 1): 551 | if pd.isnull(records[i + 1]) and pd.notnull(records[i]): 552 | dateranges[station].append(pd.to_datetime(records.index)[i]) 553 | elif pd.isnull(records[i]) and pd.notnull(records[i + 1]): 554 | dateranges[station].append(pd.to_datetime(records.index)[i]) 555 | dateranges[station].append(pd.to_datetime(last)) 556 | return dateranges 557 | 558 | def site_info(self): 559 | """Creates a table of summary statistics for all of the stations in the stations field defined in the class 560 | 561 | :returns: site_info; a table of summary statistics; first, last, min, max, std, median, avg, 25%tile, 75%tile, 562 | and count 563 | 564 | """ 565 | stations = self.stations 566 | df = self.data 567 | 568 | stat, first, last, minum, maxum, stdev, medin, avg, q25, q75, count = [], [], [], [], [], [], [], [], [], [], [] 569 | for station in stations: 570 | stdt = df.ix[:, station] 571 | stat.append(station) 572 | first.append(stdt.first_valid_index()) 573 | last.append(stdt.last_valid_index()) 574 | minum.append(stdt.min()) 575 | maxum.append(stdt.max()) 576 | stdev.append(stdt.std()) 577 | medin.append(stdt.median()) 578 | avg.append(stdt.mean()) 579 | q25.append(stdt.quantile(0.25)) 580 | q75.append(stdt.quantile(0.75)) 581 | count.append(stdt.count()) 582 | colm = {'StationId': stat, 'first': first, 'last': last, 'min': minum, 'max': maxum, 583 | 'std': stdev, 'median': medin, 'mean': avg, 'q25': q25, 'q75': q75, 'count': count} 584 | Site_Info = pd.DataFrame(colm) 585 | return Site_Info 586 | 587 | def ganttPlotter(self): 588 | """Plots gantt plot using dictionary of stations and associated start and end dates; 589 | uses output from markGaps function""" 590 | 591 | labs, tickloc, col = [], [], [] 592 | 593 | dateranges = self.dateranges 594 | stations = self.stations 595 | labels = self.labels 596 | 597 | # create color iterator for multi-color lines in gantt chart 598 | color = iter(plt.cm.Dark2(np.linspace(0, 1, len(stations)))) 599 | 600 | plt.figure(figsize=[8, 10]) 601 | fig, ax = plt.subplots() 602 | 603 | for i in range(len(stations)): 604 | c = next(color) 605 | for j in range(len(dateranges[stations[i]]) - 1): 606 | if (j + 1) % 2 != 0: 607 | if len(labels) == 0 or len(labels) != len(stations): 608 | plt.hlines(i + 1, dateranges[stations[i]][j], dateranges[stations[i]][j + 1], label=stations[i], 609 | color=c, linewidth=3) 610 | else: 611 | plt.hlines(i + 1, dateranges[stations[i]][j], dateranges[stations[i]][j + 1], label=labels[i], 612 | color=c, linewidth=3) 613 | labs.append(stations[i]) 614 | tickloc.append(i + 1) 615 | col.append(c) 616 | plt.ylim(0, len(stations) + 1) 617 | 618 | if len(labels) == 0 or len(labels) != len(stations): 619 | labels = stations 620 | plt.yticks(tickloc, labs) 621 | else: 622 | plt.yticks(tickloc, labels) 623 | 624 | plt.xlabel('Date') 625 | plt.ylabel('Station Name') 626 | plt.grid(linewidth=0.2) 627 | 628 | gytl = plt.gca().get_yticklabels() 629 | for i in range(len(gytl)): 630 | gytl[i].set_color(col[i]) 631 | plt.tight_layout() 632 | return fig 633 | 634 | def gantt(self): 635 | """This function runs the other functions in this class.""" 636 | stations = self.stations 637 | labels = self.labels 638 | df = self.data 639 | 640 | df1 = df.ix[:, stations] 641 | df1.sort_index(inplace=True) 642 | Site_Info = self.site_info() 643 | dateranges = self.markGaps() 644 | fig = self.ganttPlotter() 645 | return Site_Info, dateranges, fig 646 | 647 | 648 | def scatterColor(x0, y, w): 649 | """Creates scatter plot with points colored by variable. 650 | All input arrays must have matching lengths 651 | 652 | :param x0: x values to plot 653 | :type x0: list 654 | :param y: y values to plot 655 | :type y: list 656 | :param w: z values to plot 657 | 658 | :returns: plot; slope and intercept of the RLM best fit line shown on the plot 659 | .. warning:: all input arrays must have matching lengths and scalar values 660 | .. note:: See documentation at http://statsmodels.sourceforge.net/0.6.0/generated/statsmodels.robust.robust_linear_model.RLM.html 661 | for the RLM line 662 | """ 663 | 664 | 665 | cmap = plt.cm.get_cmap('RdYlBu') 666 | norm = mpl.colors.Normalize(vmin=w.min(), vmax=w.max()) 667 | m = cm.ScalarMappable(norm=norm, cmap=cmap) 668 | m.set_array(w) 669 | 670 | 671 | plt.scatter(x0, y, label='', color=m.to_rgba(w)) 672 | 673 | slope, intercept, r_value, p_value, std_err = linregress(x0, y) 674 | 675 | x1 = np.arange(np.min(x0), np.max(x0), 0.1) 676 | y1 = [i * slope + intercept for i in x1] 677 | 678 | plt.plot(x1, y1, c='g', 679 | label='simple linear regression m = {:.2f} b = {:.0f}, r^2 = {:.2f}'.format(slope, intercept, r_value ** 2)) 680 | 681 | plt.legend() 682 | cbar = plt.colorbar(m) 683 | 684 | cbar.set_label('Julian Date') 685 | 686 | return slope, intercept 687 | 688 | 689 | 690 | 691 | def peakdet(v, delta, x=None): 692 | """ 693 | :param v: data vector 694 | :param delta: 695 | :param x: 696 | :returns: two arrays 697 | 698 | .. note:: https://gist.github.com/endolith/250860 699 | Converted from MATLAB script at http://billauer.co.il/peakdet.html 700 | """ 701 | import sys 702 | 703 | maxtab = [] 704 | mintab = [] 705 | 706 | if x is None: 707 | x = np.arange(len(v)) 708 | 709 | v = np.asarray(v) 710 | 711 | if len(v) != len(x): 712 | sys.exit('Input vectors v and x must have same length') 713 | 714 | if not np.isscalar(delta): 715 | sys.exit('Input argument delta must be a scalar') 716 | 717 | if delta <= 0: 718 | sys.exit('Input argument delta must be positive') 719 | 720 | mn, mx = np.Inf, -np.Inf 721 | mnpos, mxpos = np.NaN, np.NaN 722 | 723 | lookformax = True 724 | 725 | for i in np.arange(len(v)): 726 | this = v[i] 727 | if this > mx: 728 | mx = this 729 | mxpos = x[i] 730 | if this < mn: 731 | mn = this 732 | mnpos = x[i] 733 | 734 | if lookformax: 735 | if this < mx - delta: 736 | maxtab.append((mxpos, mx)) 737 | mn = this 738 | mnpos = x[i] 739 | lookformax = False 740 | else: 741 | if this > mn + delta: 742 | mintab.append((mnpos, mn)) 743 | mx = this 744 | mxpos = x[i] 745 | lookformax = True 746 | 747 | return np.array(maxtab), np.array(mintab) 748 | --------------------------------------------------------------------------------