├── .Rhistory
├── VERSION
├── test
├── 20161102_Barometer.xle
├── __init__.py
├── ag13a 2016-08-02.xle
├── ag13c 2016-08-02.xle
├── 20160919_LittleHobble.xle
├── ManualMeasurements.csv
├── usgsP.csv
└── test.py
├── .idea
├── markdown-navigator
│ └── profiles_settings.xml
├── libraries
│ └── R_User_Library.xml
├── vcs.xml
├── preferred-vcs.xml
├── modules.xml
├── misc.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── WellApplication.iml
└── markdown-navigator.xml
├── requirements.txt
├── wellapplication
├── __init__.py
├── MannKendall.py
├── hydropy.py
├── arcpy_functions.py
├── ros.py
├── chem.py
├── usgs.py
├── mesopy.py
└── graphs.py
├── pyproject.toml
├── LICENSE.txt
├── .travis.yml
├── setup.py
├── CHANGES.txt
├── docs
├── Transport.ipynb
└── UMAR_WL_Data.ipynb
└── README.md
/.Rhistory:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.5.10
2 |
--------------------------------------------------------------------------------
/test/20161102_Barometer.xle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/20161102_Barometer.xle
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __author__ = 'Paul Inkenbrandt'
4 | __email__ = 'paulinkenbrandt@utah.gov'
5 |
--------------------------------------------------------------------------------
/test/ag13a 2016-08-02.xle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/ag13a 2016-08-02.xle
--------------------------------------------------------------------------------
/test/ag13c 2016-08-02.xle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/ag13c 2016-08-02.xle
--------------------------------------------------------------------------------
/test/20160919_LittleHobble.xle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/utah-geological-survey/WellApplication/HEAD/test/20160919_LittleHobble.xle
--------------------------------------------------------------------------------
/.idea/markdown-navigator/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/libraries/R_User_Library.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Pandas >= 0.16.0
2 | Numpy >= 0.7.0
3 | Matplotlib >= 1.1
4 | xmltodict >= 0.6.2
5 | scipy >= 0.10.0
6 | pyproj >= 1.9.4
7 | requests >= 2.11.1
8 | xlrd >= 0.5.4
9 | statsmodels >= 0.6.0
10 |
--------------------------------------------------------------------------------
/.idea/preferred-vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ApexVCS
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/wellapplication/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import, division, print_function, unicode_literals
3 | import os
4 |
5 | from .transport import *
6 | from .usgs import *
7 | from .chem import *
8 | from .mesopy import *
9 | from .graphs import *
10 | from .MannKendall import *
11 | from .ros import *
12 | from .arcpy_functions import *
13 |
14 | __version__ = '0.5.10'
15 | __author__ = 'Paul Inkenbrandt'
16 | __name__ = 'wellapplication'
17 |
18 | __all__ = ['usgs','chem','transport','ros','hydropy','graphs','MannKendall',
19 | 'mesopy','arcpy_functions']
20 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/test/ManualMeasurements.csv:
--------------------------------------------------------------------------------
1 | datetime,Fork,staff gage,CFS
2 | 8/16/2016 12:49,SpringCreek,0.0825,1.714458
3 | 8/16/2016 15:57,SpringCreek,,2
4 | 8/16/2016 13:55,UpperMain,0.35,0.84
5 | 8/17/2016 15:40,SpringCreek,0.79,1.84
6 | 8/18/2016 16:55,LittleHobble,0.15,0.47
7 | 8/25/2016 12:54,UpperMain,,0.24
8 | 8/25/2016 14:30,LowerMain,0.2,0.6
9 | 9/19/2016 10:30,LittleHobble,0.15,0.39
10 | 9/19/2016 11:00,UpperMain,0.25,1.14
11 | 9/19/2016 11:50,SpringCreek,0.775,1.81
12 | 9/19/2016 13:02,LowerMain,0.2,2.96
13 | 10/5/2016 11:24,LittleHobble,0.2,0.58
14 | 10/5/2016 12:45,SpringCreek,,1.81
15 | 10/5/2016 13:15,LowerMain,0.2,3
16 | 11/2/2016 9:00,LowerMain,0.25,5.45
17 | 11/2/2016 10:30,SpringCreek,0.5,1.61
18 | 11/2/2016 12:10,UpperMain,,1.15
19 | 11/2/2016 13:18,LittleHobble,0.23,0.77
20 |
--------------------------------------------------------------------------------
/.idea/WellApplication.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools >= 61.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | dynamic = ["version"]
7 | name = "WellApplication"
8 | requires-python = ">= 3.8"
9 | authors = [{name = "Paul Inkenbrandt", email = "paulinkenbrandt@utah.gov"}]
10 | maintainers = [{name = "Paul Inkenbrandt", email = "paulinkenbrandt@utah.gov"}]
11 | description = "Tools used to manipulate hydrograph data; Solinst xles to pandas dataframe; hydrograph analysis; hydrology; Gannt Charts; Piper diagrams; hydrogeology; wells; groundwater; USGS data; WQP data;"
12 | readme = "README.md"
13 | license = {file = "LICENSE.txt"}
14 | keywords = ["well", "groundwater", "transducer"]
15 |
16 | [project.urls]
17 | Homepage = "https://github.com/utah-geological-survey/WellApplication"
18 | Issues = "https://github.com/utah-geological-survey/WellApplication/issues"
19 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Paul
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | python:
4 | - "2.7"
5 | - "3.5"
6 |
7 | sudo: required
8 |
9 | before_install:
10 | - pip install pytest
11 | - "export DISPLAY=:99.0"
12 | - "sh -e /etc/init.d/xvfb start"
13 | - sleep 3 # give xvfb some time to start
14 |
15 | env:
16 | - DEPS="numpy scipy matplotlib pandas statsmodels"
17 |
18 | install:
19 | - pip install codecov
20 | - pip install pytest-cov
21 | - pip install -r requirements.txt
22 |
23 |
24 | script:
25 | - pytest test/test.py --cov=./
26 |
27 | after_success:
28 | - codecov
29 |
30 | deploy:
31 | provider: pypi
32 | distributions: sdist
33 | user: "paulinkenbrandt"
34 | password:
35 | secure: "TtR3oPiYSSb3/eUny7/jcZAnLhfpZs+52N/ieKekL7bCJc+N7EK2ViktOE0c3uoA3dqjC+AnP4VUcpISCvmvQx7D2l1udMhBwxg+/08/6b+Gb4leuNgEZ+gdRVRHvHHTaTwLRk4dRhOpfXnf8L7W4ZPCOjc+3jZSvObqDcbN5uW1fcpb4zNwhYEFf/seWsNXw8EBTZ4CgDvaBKexHAifrT53BmhWULs+ImmUqryzxdJFneq8lvkbnJFRnqsy8B2aLC7Ev+JfNiIVDSvYZAB9M614iKaBkPscful242UOc0mZBj4IoQMxBt9oduyoztJqTdhos/VHpB0PGtek/FL5EukRtUqkjgWf9h0Abp6zpxYTjOxlQpb0lNmKSHmz8+RbNUJ9WtGu4ybQip4nqSuWZ/Df1EGdZ6ndlVobmHyP172vxSUE619wf4zFAu3PqjCNDs9QApLtd7RnEcwt9kPnTH+TRXM/w6xjltKMHN4Y9dqses3I6AjlcSEB48XehBTuW7tOH0sd5ytHeqBC2/RWQHaiZFm042kgeK+m0A0P0dgcOx6KnDhkHyeNoc1Qh05CnuFSMMr1dQXyu3UaHqdJdEilbSxalWuFf4ISwjBrCIJgI8RFmEfdWbV1z8kqCgqvB5K5q3JUfh0wGtKVpMv6jA9edK+p6FQayvuSK0TPZOs="
36 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function, unicode_literals
2 | import sys
3 | import os
4 | from setuptools import setup, find_packages
5 |
6 | if not sys.version_info[0] in [2,3]:
7 | print('Sorry, wellapplication not supported in your Python version')
8 | print(' Supported versions: 2 and 3')
9 | print(' Your version of Python: {}'.format(sys.version_info[0]))
10 | sys.exit(1) # return non-zero value for failure
11 |
12 | long_description = 'A tool for hydrogeologists to upload and display hydrographs and geochemical data'
13 |
14 | try:
15 | import pypandoc
16 |
17 | long_description = pypandoc.convert('README.md', 'rst')
18 | except:
19 | pass
20 |
21 | setup(name='wellapplication',
22 | description = 'Interface with xle files; analyze hydrographs; plot hydrographs; download USGS data',
23 | long_description = long_description,
24 | version = '0.5.10',
25 | author = 'Paul Inkenbrandt',
26 | author_email = 'paulinkenbrandt@utah.gov',
27 | url = 'https://github.com/inkenbrandt/WellApplication',
28 | license = 'LICENSE.txt',
29 | install_requires=["Pandas >= 0.16.0",
30 | "Numpy >= 0.7.0",
31 | "Matplotlib >= 1.1",
32 | "xmltodict >= 0.6.2",
33 | "scipy >= 0.10.0",
34 | "pyproj >= 1.9.4",
35 | "requests >= 2.11.1",
36 | "xlrd >= 0.5.4"],
37 | packages = find_packages(exclude=['contrib', 'docs', 'tests*']))
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/test/usgsP.csv:
--------------------------------------------------------------------------------
1 | ,PO4,month,year
2 | 0,0.07,1,1972
3 | 1,0.33,1,1973
4 | 2,0.7,1,1974
5 | 3,0.08,1,1975
6 | 4,0.04,1,1976
7 | 5,0.05,1,1977
8 | 6,0.14,1,1978
9 | 7,0.08,1,1979
10 | 8,0.11,2,1972
11 | 9,0.24,2,1973
12 | 10,0.17,2,1974
13 | 11,0.11,2,1978
14 | 12,0.04,2,1979
15 | 13,0.6,3,1972
16 | 14,0.12,3,1973
17 | 15,0.16,3,1974
18 | 16,0.14,3,1976
19 | 17,0.05,3,1976
20 | 18,0.03,3,1977
21 | 19,0.02,3,1978
22 | 20,0.02,3,1979
23 | 21,0.1,4,1972
24 | 22,0.08,4,1973
25 | 23,1.2,4,1974
26 | 24,0.11,4,1975
27 | 25,0.06,4,1975
28 | 26,0.05,4,1976
29 | 27,0.04,4,1977
30 | 28,0.06,4,1978
31 | 29,0.01,4,1979
32 | 30,0.04,5,1972
33 | 31,0.03,5,1973
34 | 32,0.12,5,1974
35 | 33,0.09,5,1975
36 | 34,0.02,5,1976
37 | 35,0.04,5,1977
38 | 36,0.03,5,1978
39 | 37,0.03,5,1979
40 | 38,0.05,6,1972
41 | 39,0.01,6,1973
42 | 40,0.05,6,1974
43 | 41,0.05,6,1975
44 | 42,0.03,6,1978
45 | 43,0.01,6,1979
46 | 44,0.04,7,1972
47 | 45,0.04,7,1973
48 | 46,0.03,7,1974
49 | 47,0.02,7,1975
50 | 48,0.06,7,1977
51 | 49,0.02,7,1978
52 | 50,0.04,7,1979
53 | 51,0.05,8,1972
54 | 52,0.06,8,1973
55 | 53,0.03,8,1974
56 | 54,0.05,8,1975
57 | 55,0.07,8,1976
58 | 56,0.08,8,1977
59 | 57,0.06,8,1978
60 | 58,0.02,8,1979
61 | 59,0.1,9,1972
62 | 60,0.09,9,1973
63 | 61,0.06,9,1974
64 | 62,0.1,9,1975
65 | 63,0.08,9,1977
66 | 64,0.05,9,1978
67 | 65,0.06,9,1979
68 | 66,0.13,10,1972
69 | 67,0.13,10,1973
70 | 68,0.12,10,1974
71 | 69,0.13,10,1975
72 | 70,0.13,10,1976
73 | 71,0.09,10,1977
74 | 72,0.1,10,1978
75 | 73,0.33,10,1979
76 | 74,0.14,11,1972
77 | 75,0.42,11,1973
78 | 76,0.47,11,1977
79 | 77,0.14,11,1978
80 | 78,0.13,12,1972
81 | 79,0.15,12,1973
82 | 80,0.12,12,1974
83 | 81,0.09,12,1974
84 | 82,0.05,12,1975
85 | 83,0.3,12,1977
86 | 84,0.07,12,1978
87 |
--------------------------------------------------------------------------------
/CHANGES.txt:
--------------------------------------------------------------------------------
1 | v0.2.4, 2016-01-23 -- added to readme
2 | v0.2.2, 2016-01-22 -- added tests
3 | v0.2.0, 2016-01-22 -- added to readme and docs; changed setup file
4 | v0.1.7, 2016-01-22 -- added to readme and docs
5 | v0.1.6, 2016-01-18 -- added to readme and docs
6 | v0.1.5, 2016-01-18 -- added features; fixed bugs
7 | v0.1.4, 2016-01-17 -- added features
8 | v0.1.3, 2016-01-17 -- added features
9 | v0.1.2, 2016-01-16 -- added features
10 | v0.1.1, 2016-01-16 -- reupload
11 | v0.1.0, 2016-01-16 -- added a ton of functions, sped up usgs functions
12 | v0.0.39, 2016-01-08 -- urlliberr
13 | v0.0.38, 2016-01-08 -- added usgs functions
14 | v0.0.36, 2016-01-08 -- added gantt function
15 | v0.0.35, 2016-01-07 -- added fdc function
16 | v0.0.28, 2016-01-07 -- added piper class
17 | v0.0.27, 2016-01-04 -- added functions
18 | v0.0.26, 2016-01-04 -- indentation
19 | v0.0.25, 2016-01-04 -- coerce to numeric
20 | v0.0.24, 2016-01-03 -- bug fix, added docs
21 | v0.0.23, 2016-01-03 -- bug fix, added chem.py, fixed WQP
22 | v0.0.22, 2016-01-03 -- bug fix, added chem.py
23 | v0.0.18, 2016-01-03 -- version
24 | v0.0.17, 2016-01-03 -- fixed class
25 | v0.0.16, 2016-01-03 -- fixed class
26 | v0.0.15, 2016-01-03 -- fixed class
27 | v0.0.14, 2016-01-03 -- fixed class
28 | v0.0.13, 2016-01-03 -- fixed variable
29 | v0.0.12, 2016-01-03 -- fixed variable
30 | v0.0.11, 2016-01-03 -- added class
31 | v0.0.10, 2016-01-03 -- fixed standard library dependency
32 | v0.0.9, 2016-01-03 -- fixed standard library dependency added docs
33 | v0.0.8, 2016-01-03 -- added class usgsGis in gis
34 | v0.0.7, 2016-01-03 -- improved readme (.rst)
35 | v0.0.6, 2016-01-01 -- connected object in init
36 | v0.0.5, 2016-01-01 -- renamed file, added class
37 | v0.0.4, 2016-01-01 -- Improved Setup file
38 | v0.0.3, 2016-01-01 -- Added to Readme
39 | v0.0.2, 2016-01-01 -- Added to Readme
40 | v0.0.1, 2016-01-01 -- Initial release.
41 |
--------------------------------------------------------------------------------
/docs/Transport.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "%matplotlib inline\n",
12 | "import pandas as pd\n",
13 | "import platform\n",
14 | "import sys\n",
15 | "import numpy as np\n",
16 | "import matplotlib.pyplot as plt\n",
17 | "import matplotlib\n",
18 | "from pylab import rcParams\n",
19 | "rcParams['figure.figsize'] = 10, 10"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {
26 | "collapsed": true
27 | },
28 | "outputs": [],
29 | "source": [
30 | "import wellapplication as wa"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 3,
36 | "metadata": {
37 | "collapsed": false
38 | },
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "Operating System Linux 4.4.0-59-generic\n",
45 | "Python Version 2.7.11+ (default, Apr 17 2016, 14:00:29) \n",
46 | "[GCC 5.3.1 20160413]\n",
47 | "Pandas Version 0.19.1\n",
48 | "Numpy Version 1.11.2\n",
49 | "Matplotlib Version 1.5.3\n",
50 | "WellApplication Version 0.4.21\n"
51 | ]
52 | }
53 | ],
54 | "source": [
55 | "print(\"Operating System \" + platform.system() + \" \" + platform.release())\n",
56 | "print(\"Python Version \" + str(sys.version))\n",
57 | "print(\"Pandas Version \" + str(pd.__version__))\n",
58 | "print(\"Numpy Version \" + str(np.__version__))\n",
59 | "print(\"Matplotlib Version \" + str(matplotlib.__version__))\n",
60 | "print(\"WellApplication Version \" + str(wa.__version__))"
61 | ]
62 | },
63 | {
64 | "cell_type": "code",
65 | "execution_count": null,
66 | "metadata": {
67 | "collapsed": true
68 | },
69 | "outputs": [],
70 | "source": [
71 | "'/home/pi/PycharmProjects/WellApplication/test'"
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {
78 | "collapsed": true
79 | },
80 | "outputs": [],
81 | "source": []
82 | }
83 | ],
84 | "metadata": {
85 | "kernelspec": {
86 | "display_name": "Python 2",
87 | "language": "python",
88 | "name": "python2"
89 | },
90 | "language_info": {
91 | "codemirror_mode": {
92 | "name": "ipython",
93 | "version": 2
94 | },
95 | "file_extension": ".py",
96 | "mimetype": "text/x-python",
97 | "name": "python",
98 | "nbconvert_exporter": "python",
99 | "pygments_lexer": "ipython2",
100 | "version": "2.7.11+"
101 | }
102 | },
103 | "nbformat": 4,
104 | "nbformat_minor": 0
105 | }
106 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://zenodo.org/badge/latestdoi/48931715)
2 | [](https://travis-ci.org/inkenbrandt/WellApplication)
3 | [](https://badge.fury.io/py/WellApplication)
4 | [](https://codecov.io/gh/inkenbrandt/WellApplication)
5 | [](http://mybinder.org:/repo/inkenbrandt/wellapplication)
6 | 
7 |
8 |
9 | Developed with funding from the U.S. EPA Exchange Network
10 |
11 | # Well Application
12 |
13 | Set of tools for groundwater level and water chemistry analysis. Allows for rapid download and graphing of data from the USGS NWIS database and the Water Quality Portal.
14 |
15 | ## Installation
16 | Wellapplication should be compatible with both Python 2.7 and 3.5. It has been tested most rigously on Python 2.7. It should work on both 32 and 64-bit platforms. I have used it on Linux and Windows machines.
17 |
18 | To install the most recent version, use pip.
19 | ```Bash
20 | pip install wellapplication
21 | ```
22 | ## Modules
23 |
24 | ### transport
25 |
26 | This module:
27 |
28 | * allows a user to upload data from an .xle file common with some water well transducers.
29 |
30 | * matches well and barometric data to same sample intervals
31 |
32 | * adjust with manual measurements
33 |
34 | * removes skips and jumps from data
35 |
36 | This class has functions used to import transducer data and condition it for analysis.
37 |
38 | The most important function in this library is `new_xle_imp`, which uses the path and filename of an xle file, commonly produced by pressure transducers, to convert that file into a Pandas DataFrame.
39 |
40 | A Jupyter Notebook using some of the transport functions can be found here.
41 |
42 | ### usgs
43 |
44 | This module has functions used to apply the USGS's rest-based api to download USGS data by leveraging `requests` package and Pandas.
45 |
46 | The most powerful class in this module is `nwis`. It is called by `nwis(service, location value, location type)`.
47 | The main USGS services are `dv` for daily values, `iv` for instantaneous values, `gwlevels` for groundwater levels, and `site` for site information. The `nwis` class allows for rapid download of NWIS data.
48 |
49 | ```Python
50 | >>> import wellapplication as wa
51 | >>> discharge = wa.nwis('dv','10109000','sites')
52 | >>> site_data = discharge.sites
53 | >>> flow_data = discharge.data
54 | ```
55 |
56 | A Jupyter Notebook using some of the usgs functions can be found here.
57 |
58 |
--------------------------------------------------------------------------------
/.idea/markdown-navigator.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/wellapplication/MannKendall.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Jan 8 19:55:22 2016
4 |
5 | @author: p
6 | """
7 | from __future__ import absolute_import, division, print_function, unicode_literals
8 | import numpy as np
9 | import pandas as pd
10 | from scipy.stats import norm
11 |
12 | import sys
13 |
14 | if sys.version_info >= (3, 0):
15 | def xrange(*args, **kwargs):
16 | return iter(range(*args, **kwargs))
17 |
18 | def mk_test(x, alpha = 0.05):
19 | """This perform the MK (Mann-Kendall) test to check if there is any trend present in
20 | data or not
21 |
22 | Args:
23 | x: a vector of data
24 | alpha: significance level
25 |
26 | Returns:
27 | trend: tells the trend (increasing, decreasing or no trend)
28 | h: True (if trend is present) or False (if trend is absence)
29 | p: p value of the sifnificance test
30 | z: normalized test statistics
31 |
32 | Examples::
33 | >>> x = np.random.rand(100)
34 | >>> trend = mk_test(x,0.05)
35 | >>> print(trend.trend)
36 | increasing
37 |
38 | Credit: http://pydoc.net/Python/ambhas/0.4.0/ambhas.stats/
39 | """
40 | n = len(x)
41 | ta = n*(n-1)/2
42 | # calculate S
43 | s = 0
44 | for k in xrange(n-1):
45 | for j in xrange(k+1,n):
46 | s += np.sign(x[j] - x[k])
47 |
48 | # calculate the unique data
49 | unique_x = np.unique(x)
50 | g = len(unique_x)
51 |
52 | # calculate the var(s)
53 | if n == g: # there is no tie
54 | var_s = (n*(n-1)*(2*n+5))/18
55 | else: # there are some ties in data
56 | tp = np.zeros(unique_x.shape)
57 | for i in xrange(len(unique_x)):
58 | tp[i] = sum(unique_x[i] == x)
59 | var_s = (n*(n-1)*(2*n+5) - np.sum(tp*(tp-1)*(2*tp+5)))/18
60 |
61 | if s>0:
62 | z = (s - 1)/np.sqrt(var_s)
63 | elif s == 0:
64 | z = 0
65 | elif s<0:
66 | z = (s + 1)/np.sqrt(var_s)
67 | else:
68 | z = 0
69 |
70 | # calculate the p_value
71 | p = 2*(1- norm.cdf(abs(z))) # two tail test
72 | h = abs(z) > norm.ppf(1-alpha/2)
73 |
74 | if (z<0) and h:
75 | trend = 'decreasing'
76 | elif (z>0) and h:
77 | trend = 'increasing'
78 | else:
79 | trend = 'no trend'
80 |
81 | return pd.Series({'trend':trend, 'varS':round(var_s,3), 'p':round(p,3), 'z':round(z,3), 's':round(s,3), 'n':n, 'ta':ta})
82 |
83 | def mk_ts(df, const, group1, orderby = 'year', alpha = 0.05):
84 | """
85 | df = dataframe
86 | const = variable tested for trend
87 | group1 = variable to group by
88 | orderby = variable to order by (typically a date)
89 | """
90 |
91 | def zcalc(Sp, Varp):
92 | if Sp > 0:
93 | return (Sp - 1)/Varp**0.5
94 | elif Sp < 0:
95 | return (Sp + 1)/Varp**0.5
96 | else:
97 | return 0
98 |
99 | df.is_copy = False
100 |
101 | df[const] = pd.to_numeric(df.ix[:,const])
102 | # remove null values
103 | df[const].dropna(inplace=True)
104 | # remove index
105 | df.reset_index(inplace=True, drop=True)
106 | # sort by groups, then time
107 | df.sort_values(by=[group1,orderby],axis=0, inplace=True)
108 |
109 | # group by group and apply mk_test
110 | dg = df.groupby(group1).apply(lambda x: mk_test(x.loc[:,const].dropna().values, alpha))
111 | Var_S = dg.loc[:,'varS'].sum()
112 | S = dg.loc[:,'s'].sum()
113 | N = dg.loc[:,'n'].sum()
114 | Z = zcalc(S,Var_S)
115 | P = 2*(1-norm.cdf(abs(Z)))
116 | group_n = len(dg)
117 | h = abs(Z) > norm.ppf(1-alpha/2)
118 | tau = S/dg.loc[:,'ta'].sum()
119 |
120 | if (Z<0) and h:
121 | trend = 'decreasing'
122 | elif (Z>0) and h:
123 | trend = 'increasing'
124 | else:
125 | trend = 'no trend'
126 |
127 |
128 | return pd.Series({'S':S, 'Z':round(Z,2), 'p':P, 'trend':trend, 'group_n':group_n, 'sample_n':N, 'Var_S':Var_S, 'tau':round(tau,2)})
129 |
130 |
131 |
--------------------------------------------------------------------------------
/wellapplication/hydropy.py:
--------------------------------------------------------------------------------
1 | """
2 | Hydropy package
3 | @author: Stijn Van Hoey
4 | from: https://github.com/stijnvanhoey/hydropy/tree/master/hydropy
5 | for a better and more up to date copy of this script go to the original repo.
6 | """
7 | from __future__ import absolute_import, division, print_function, unicode_literals
8 | import pandas as pd
9 | import numpy as np
10 | from scipy.optimize import curve_fit
11 |
12 |
13 | def get_baseflow_chapman(flowserie, recession_time):
14 | """
15 | Parameters
16 | ----------
17 | flowserie : pd.TimeSeries
18 | River discharge flowserie
19 | recession_time : float [0-1]
20 | recession constant
21 | Notes
22 | ------
23 | $$Q_b(i) = \frac{k}{2-k}Q_b(i-1) + \frac{1-k}{2-k}Q(i)$$
24 | """
25 |
26 | secterm = (1.-recession_time)*flowserie/(2.-recession_time)
27 |
28 | baseflow = np.empty(flowserie.shape[0])
29 | for i, timestep in enumerate(baseflow):
30 | if i == 0:
31 | baseflow[i] = 0.0
32 | else:
33 | baseflow[i] = recession_time*baseflow[i-1]/(2.-recession_time) + \
34 | secterm.values[i]
35 | baseflow = pd.DataFrame(baseflow, index=flowserie.index)
36 | return baseflow
37 |
38 |
39 | def get_baseflow_boughton(flowserie, recession_time, baseflow_index):
40 | """
41 | Parameters
42 | ----------
43 | flowserie : pd.TimeSeries
44 | River discharge flowserie
45 | recession_time : float [0-1]
46 | recession constant
47 | baseflow_index : float
48 | Notes
49 | ------
50 | $$Q_b(i) = \frac{k}{1+C}Q_b(i-1) + \frac{C}{1+C}Q(i)$$
51 | """
52 |
53 | parC = baseflow_index
54 |
55 | secterm = parC*flowserie/(1 + parC)
56 |
57 | baseflow = np.empty(flowserie.shape[0])
58 | for i, timestep in enumerate(baseflow):
59 | if i == 0:
60 | baseflow[i] = 0.0
61 | else:
62 | baseflow[i] = recession_time*baseflow[i-1]/(1 + parC) + \
63 | secterm.values[i]
64 | return pd.DataFrame(baseflow, index=flowserie.index)
65 |
66 |
67 | def get_baseflow_ihacres(flowserie, recession_time, baseflow_index, alfa):
68 | """
69 | Parameters
70 | ----------
71 | flowserie : pd.TimeSeries
72 | River discharge flowserie
73 | recession_time : float [0-1]
74 | recession constant
75 | Notes
76 | ------
77 | $$Q_b(i) = \frac{k}{1+C}Q_b(i-1) + \frac{C}{1+C}[Q(i)+\alpha Q(i-1)]$$
78 | $\alpha$ < 0.
79 | """
80 |
81 | parC = baseflow_index
82 |
83 | secterm = parC/(1 + parC)
84 |
85 | baseflow = np.empty(flowserie.shape[0])
86 | for i, timestep in enumerate(baseflow):
87 | if i == 0:
88 | baseflow[i] = 0.0
89 | else:
90 | baseflow[i] = recession_time * baseflow[i-1]/(1 + parC) + \
91 | secterm * (flowserie.values[i] +
92 | alfa * flowserie.values[i-1])
93 | return pd.DataFrame(baseflow, index=flowserie.index)
94 |
95 | def exp_curve(x, a, b):
96 | """Exponential curve used for rating curves"""
97 | return (a * x**b)
98 |
99 | def ratingCurve(discharge, stage):
100 | """Computes rating curve based on discharge measurements coupled with stage
101 | readings.
102 | discharge = array of measured discharges;
103 | stage = array of corresponding stage readings;
104 | Returns coefficients a, b for the rating curve in the form y = a * x**b
105 | """
106 |
107 | popt, pcov = curve_fit(exp_curve, stage, discharge)
108 |
109 | def r_squ():
110 | a = 0.0
111 | b = 0.0
112 | for i, j in zip(discharge, stage):
113 | a += (i - exp_curve(j, popt[0], popt[1]))**2
114 | b += (i - np.mean(discharge))**2
115 | return 1 - a / b
116 |
117 | return popt, r_squ()
118 |
119 | def RB_Flashiness(series):
120 | """Richards-Baker Flashiness Index for a series of daily mean discharges.
121 | https://github.com/hydrogeog/hydro"""
122 | Qsum = np.sum(series) # sum of daily mean discharges
123 | Qpath = 0.0
124 | for i in range(len(series)):
125 | if i == 0:
126 | Qpath = series[i] # first entry only
127 | else:
128 | Qpath += np.abs(series[i] - series[i-1]) # sum the absolute differences of the mean discharges
129 | return Qpath/Qsum
130 |
131 |
132 | def flow_duration(series):
133 | """Creates the flow duration curve for a discharge dataset. Returns a pandas
134 | series whose index is the discharge values and series is exceedance probability.
135 | https://github.com/hydrogeog/hydro"""
136 | fd = pd.Series(series).value_counts() # frequency of unique values
137 | fd.sort_index(inplace=True) # sort in order of increasing discharges
138 | fd = fd.cumsum() # cumulative sum of frequencies
139 | fd = fd.apply(lambda x: 100 - x/fd.max() * 100) # normalize
140 | return fd
141 |
142 | def Lyne_Hollick(series, alpha=.925, direction='f'):
143 | """Recursive digital filter for baseflow separation. Based on Lyne and Hollick, 1979.
144 | series = array of discharge measurements
145 | alpha = filter parameter
146 | direction = (f)orward or (r)everse calculation
147 | https://github.com/hydrogeog/hydro
148 | """
149 | series = np.array(series)
150 | f = np.zeros(len(series))
151 | if direction == 'f':
152 | for t in np.arange(1,len(series)):
153 | f[t] = alpha * f[t-1] + (1 + alpha)/2 * (series[t] - series[t-1])
154 | if series[t] - f[t] > series[t]:
155 | f[t] = 0
156 | elif direction == 'r':
157 | for t in np.arange(len(series)-2, 1, -1):
158 | f[t] = alpha * f[t+1] + (1 + alpha)/2 * (series[t] - series[t+1])
159 | if series[t] - f[t] > series[t]:
160 | f[t] = 0
161 | return np.array(series - f)
162 |
163 | def Eckhardt(series, alpha=.98, BFI=.80):
164 | """Recursive digital filter for baseflow separation. Based on Eckhardt, 2004.
165 | series = array of discharge measurements
166 | alpha = filter parameter
167 | BFI = BFI_max (maximum baseflow index)
168 | https://github.com/hydrogeog/hydro
169 | """
170 | series = np.array(series)
171 | f = np.zeros(len(series))
172 | f[0] = series[0]
173 | for t in np.arange(1,len(series)):
174 | f[t] = ((1 - BFI) * alpha * f[t-1] + (1 - alpha) * BFI * series[t]) / (1 - alpha * BFI)
175 | if f[t] > series[t]:
176 | f[t] = series[t]
177 | return f
178 |
--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sat Jan 23 13:03:00 2016
4 |
5 | @author: p
6 | """
7 | from __future__ import absolute_import, division, print_function, unicode_literals
8 | import wellapplication as wa
9 | import pandas as pd
10 | import matplotlib
11 | import sys
12 | sys.path.append('../')
13 | import numpy as np
14 |
15 | m = wa.Meso(token='demotoken')
16 |
17 | def test_getelev():
18 | print('Testing getelev')
19 | x = [-111.21, 41.4]
20 | m = wa.get_elev(x)
21 | assert m > 100.0
22 |
23 | def test_gethuc():
24 | print('Testing gethuc')
25 | x = [-111.21, 41.4]
26 | huc_data = wa.get_huc(x)
27 | assert len(huc_data[0])>0
28 |
29 | def test_USGSID():
30 | print('Testing USGSID')
31 | x = [-111.21, 41.4]
32 | usgs_id = wa.USGSID(x)
33 | assert usgs_id == '412400111123601'
34 |
35 | def test_nwis():
36 | nw = wa.nwis('dv', '01585200', 'sites')
37 | assert len(nw.sites) == 1
38 |
39 | def test_nwis_gw():
40 | nw = wa.nwis('gwlevels','16010204','huc',siteStatus='all')
41 | df = nw.avg_wl()
42 | assert len(df) > 5
43 |
44 | def test_fdc():
45 | d16 = wa.nwis('dv','01659500','sites')
46 | ci = wa.fdc(d16.data,'value',1900,2016)
47 | assert type(ci[0]) == list
48 |
49 | def test_mktest():
50 | x = range(0,100)
51 | trend = wa.MannKendall.mk_test(x,0.05)
52 | assert trend.trend == 'increasing'
53 |
54 | #def test_pipe():
55 | # Chem = {'Type':[1,2,2,3], 'Cl':[1.72,0.90,4.09,1.52], 'HCO3':[4.02,1.28,4.29,3.04],
56 | # 'SO4':[0.58,0.54,0.38,0.46], 'NaK':[1.40,0.90,3.38,2.86], 'Ca':[4.53,None,4.74,1.90],
57 | # 'Mg':[0.79,0.74,0.72,0.66], 'EC':[672.0,308.0,884.0,542.0], 'NO3':[0.4,0.36,0.08,0.40],
58 | # 'Sicc':[0.21,0.56,None,-0.41]}
59 | # chem = pd.DataFrame(Chem)
60 | # pipr = wa.piper(chem)
61 | # assert type(pipr.plot) == matplotlib.figure.Figure
62 |
63 | def test_new_xle_imp():
64 | xle = 'test/20160919_LittleHobble.xle'
65 | xle_df = wa.new_xle_imp(xle)
66 | assert len(xle_df) > 0
67 |
68 | def test_xle_head_table():
69 | xle_dir = 'test/'
70 | dir_df = wa.xle_head_table(xle_dir)
71 | assert len(xle_dir) > 0
72 |
73 | def test_dataendclean():
74 | xle = 'test/20160919_LittleHobble.xle'
75 | df = wa.new_xle_imp(xle)
76 | x = 'Level'
77 | xle1 = wa.dataendclean(df, x)
78 | assert len(xle1) > 1
79 |
80 | def test_smoother():
81 | xle = 'test/20160919_LittleHobble.xle'
82 | df = wa.new_xle_imp(xle)
83 | x = 'Level'
84 | xle1 = wa.smoother(df, x, sd=1)
85 | assert len(xle1) > 1
86 |
87 | def test_hourly_resample():
88 | xle = 'test/20160919_LittleHobble.xle'
89 | df = wa.new_xle_imp(xle)
90 | xle1 = wa.hourly_resample(df, minutes=30)
91 |
92 | # Basic Function Tests
93 | def testvars():
94 | var_list = m.variables()
95 |
96 | def testmetadata():
97 | stations = m.metadata(radius=['wbb', 5])
98 |
99 | def test_WQP():
100 | wqq = wa.WQP('16010204','huc')
101 | wqq.results = wqq.massage_results()
102 | pivchem = wqq.piv_chem()
103 | assert 'Alk' in pivchem.columns
104 |
105 | def test_WQ2():
106 | wqq = wa.WQP('16010204','huc')
107 | wqq.stations = wqq.massage_stations()
108 | df = wqq.stations
109 | assert "OrgId" in list(df.columns)
110 |
111 | #def test_imp_new_well():
112 | # inputfile = "test/ag13c 2016-08-02.xle"
113 | # manualwls = "test/All tape measurements.csv"
114 | # manual = pd.read_csv(manualwls, index_col="DateTime", engine="python")
115 | # barofile = "test/baro.csv"
116 | # baro = pd.read_csv(barofile,index_col=0, parse_dates=True)
117 | # wellinfo = pd.read_csv("test/wellinfo4.csv")
118 | # g, drift, wellname = wa.fix_well(wellinfo,inputfile, manual, baro)
119 | # assert wellname == 'ag13c'
120 |
121 | def test_well_baro_merge():
122 | xle = "test/ag13c 2016-08-02.xle"
123 | xle_df = wa.new_xle_imp(xle)
124 | barofile = "test/baro.csv"
125 | baro = pd.read_csv(barofile,index_col=0, parse_dates=True)
126 | baro['Level'] = baro['pw03']
127 | assert len(wa.well_baro_merge(xle_df, baro, sampint=60)) > 10
128 |
129 | #def test_fix_drift():
130 | # xle = "test/ag13c 2016-08-02.xle"
131 | # xle_df = wa.new_xle_imp(xle)
132 | # manualwls = "test/All tape measurements.csv"
133 | # manual = pd.read_csv(manualwls, index_col="DateTime", engine="python")
134 | # manual35 = manual[manual['WellID']==35]
135 | # manual35['dt'] = pd.to_datetime(manual35.index)
136 | # manual_35 = manual35.reset_index()
137 | # manual_35.set_index('dt',inplace=True)
138 | # fd = wa.fix_drift(xle_df, manual_35, meas='Level', corrwl='Level',
139 | # manmeas='MeasuredDTW', outcolname='DriftCorrection')
140 | # assert 'DriftCorrection' in list(fd[0].columns)
141 |
142 | def test_getwellid():
143 | inputfile = "test/ag13c 2016-08-02.xle"
144 | wellinfo = pd.read_csv("test/wellinfo4.csv")
145 | wid = wa.getwellid(inputfile, wellinfo)
146 | assert wid[1] == 35
147 |
148 | def test_barodistance():
149 | wellinfo = pd.read_csv("test/wellinfo4.csv")
150 | bd = wa.barodistance(wellinfo)
151 | assert 'closest_baro' in list(bd.columns)
152 |
153 | #def test_imp_new_well_csv():
154 | # inputfile = "test/ag14a 2016-08-02.csv"
155 | # manualwls = "test/All tape measurements.csv"
156 | # manual = pd.read_csv(manualwls, index_col="DateTime", engine="python")
157 | # barofile = "test/baro.csv"
158 | # baro = pd.read_csv(barofile,index_col=0, parse_dates=True)
159 | # wellinfo = pd.read_csv("test/wellinfo4.csv")
160 | # g, drift, wellname = wa.imp_new_well(inputfile, wellinfo, manual, baro)
161 | # assert wellname == 'ag14a'
162 |
163 | def test_jumpfix():
164 | xle = "test/ag13c 2016-08-02.xle"
165 | df = wa.new_xle_imp(xle)
166 | jf = wa.jumpfix(df, 'Level', threashold=0.005)
167 | assert jf['newVal'][-1] > 10
168 |
169 | def test_gantt():
170 | ashley = wa.nwis('dv', '09265500', 'sites')
171 | gn = wa.gantt(ashley.data, stations=['value'])
172 | assert type(gn.gantt()[2]) == matplotlib.figure.Figure
173 |
174 | #def test_scatterColor():
175 | # x = np.arange(1, 100, 1)
176 | # y = np.arange(0.1, 10.0, 0.1)
177 | # w = np.arange(5, 500, 5)
178 | # out = wa.scatterColor(x, y, w)
179 | # assert round(out[0], 1) == 0.1
180 |
181 | def test_get_info():
182 | nw = wa.nwis('gwlevels', '16010204', 'huc', siteStatus='all')
183 | df = nw.get_info(siteStatus='all')
184 | assert 'site_no' in list(df.columns)
185 |
186 | #def test_recess():
187 | # ashley = wa.nwis('dv', '09265500', 'sites', startDT='2015-06-02', endDT='2015-06-14')
188 | # rec = wa.graphs.recess(ashley.data, 'value', st=[2015, 6, 2])
189 | # assert round(rec.rec_results[0], 2) == 0.04
190 |
191 | #def test_get_recess_int():
192 | # ashley = wa.nwis('dv', '09265500','sites', startDT='2015-01-02' ,endDT='2015-10-14')
193 | # assert type(wa.get_recess_int(ashley.data, 'value')[0]) == pd.DataFrame
194 |
195 | def test_mk_ts():
196 | usgsP = pd.read_csv('test/usgsP.csv')
197 | var = wa.MannKendall.mk_ts(usgsP, 'PO4', 'month', 'year',0.05)
198 | assert var[0] == -87.0
199 |
--------------------------------------------------------------------------------
/wellapplication/arcpy_functions.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function, unicode_literals
2 |
3 | import pandas as pd
4 |
5 |
6 | from .transport import *
7 |
8 | try:
9 | import arcpy
10 |
11 | arcpy.env.overwriteOutput = True
12 |
13 | except ImportError:
14 | pass
15 |
16 |
17 | def imp_one_well(well_file, baro_file, man_startdate, man_start_level, man_endate, man_end_level,
18 | conn_file_root,
19 | wellid, be=None, well_table="UGGP.UGGPADMIN.UGS_NGWMN_Monitoring_Locations",
20 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading", drift_tol=0.3, override=False):
21 | import arcpy
22 | arcpy.env.workspace = conn_file_root
23 |
24 | if os.path.splitext(well_file)[1] == '.xle':
25 | trans_type = 'Solinst'
26 | else:
27 | trans_type = 'Global Water'
28 |
29 | printmes('Trans type for well is {:}.'.format(trans_type))
30 |
31 | welltable = table_to_pandas_dataframe(well_table, query="AlternateID is not Null")
32 |
33 | well = new_trans_imp(well_file)
34 | baro = new_trans_imp(baro_file)
35 |
36 |
37 | corrwl = well_baro_merge(well, baro, vented=(trans_type != 'Solinst'))
38 |
39 | if be:
40 | corrwl = correct_be(wellid, welltable, corrwl, be=be)
41 | corrwl['corrwl'] = corrwl['BAROEFFICIENCYLEVEL']
42 |
43 | stickup, well_elev = get_stickup_elev(wellid, well_table)
44 |
45 | man = pd.DataFrame(
46 | {'DateTime': [man_startdate, man_endate], 'MeasuredDTW': [man_start_level, man_end_level]}).set_index(
47 | 'DateTime')
48 | printmes(man)
49 | man['Meas_GW_Elev'] = well_elev - (man['MeasuredDTW'] - stickup)
50 |
51 | man['MeasuredDTW'] = man['MeasuredDTW'] * -1
52 |
53 | dft = fix_drift(corrwl, man, meas='corrwl', manmeas='MeasuredDTW')
54 | drift = round(float(dft[1]['drift'].values[0]), 3)
55 | printmes('Drift for well {:} is {:}.'.format(wellid, drift))
56 | df = dft[0]
57 |
58 | rowlist, fieldnames = prepare_fieldnames(df, wellid, stickup, well_elev)
59 |
60 | if drift <= drift_tol:
61 | edit_table(rowlist, gw_reading_table, fieldnames)
62 | printmes('Well {:} successfully imported!'.format(wellid))
63 | elif override == 1:
64 | edit_table(rowlist, gw_reading_table, fieldnames)
65 | printmes('Override initiated. Well {:} successfully imported!'.format(wellid))
66 | else:
67 | printmes('Well {:} drift greater than tolerance!'.format(wellid))
68 | return df, man, be, drift
69 |
70 |
71 | def find_extreme(site_number, gw_table="UGGP.UGGPADMIN.UGS_GW_reading", extma='max'):
72 | """
73 | Find date extrema from a SDE table using query parameters
74 | :param site_number: LocationID of the site of interest
75 | :param gw_table: SDE table to be queried
76 | :param extma: options are 'max' (default) or 'min'
77 | :return: date of extrema, depth to water of extrema, water elevation of extrema
78 | """
79 | import arcpy
80 | from arcpy import env
81 | env.overwriteOutput = True
82 |
83 | if extma == 'max':
84 | sort = 'DESC'
85 | else:
86 | sort = 'ASC'
87 | query = "LOCATIONID = '{:}'".format(site_number)
88 | field_names = ['READINGDATE', 'LOCATIONID', 'DTWBELOWGROUNDSURFACE', 'WATERELEVATION']
89 | sql_sn = ('TOP 1', 'ORDER BY READINGDATE {:}'.format(sort))
90 | # use a search cursor to iterate rows
91 | dateval, dtw, wlelev = [], [], []
92 |
93 | envtable = os.path.join(env.workspace, gw_table)
94 |
95 | with arcpy.da.SearchCursor(envtable, field_names, query, sql_clause=sql_sn) as search_cursor:
96 | # iterate the rows
97 | for row in search_cursor:
98 | dateval.append(row[0])
99 | dtw.append(row[1])
100 | wlelev.append(row[2])
101 | if len(dateval) < 1:
102 | return None, 0, 0
103 | else:
104 | return dateval[0], dtw[0], wlelev[0]
105 |
106 |
107 | def get_field_names(table):
108 | read_descr = arcpy.Describe(table)
109 | field_names = []
110 | for field in read_descr.fields:
111 | field_names.append(field.name)
112 | field_names.remove('OBJECTID')
113 | return field_names
114 |
115 | def get_gap_data(site_number, enviro, gap_tol = 0.5,
116 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading"):
117 | arcpy.env.workspace = enviro
118 | first_date = datetime.datetime(1900, 1, 1)
119 | last_date = datetime.datetime.now()
120 |
121 | query_txt = "LOCATIONID = '{:}' AND TAPE = 0"
122 | query = query_txt.format(site_number)
123 |
124 | sql_sn = (None, 'ORDER BY READINGDATE ASC')
125 |
126 | fieldnames = ['READINGDATE']
127 |
128 | #readings = wa.table_to_pandas_dataframe(gw_reading_table, fieldnames, query, sql_sn)
129 |
130 | dt = []
131 |
132 | # use a search cursor to iterate rows
133 | with arcpy.da.SearchCursor(gw_reading_table, 'READINGDATE', query, sql_clause=sql_sn) as search_cursor:
134 | # iterate the rows
135 | for row in search_cursor:
136 | # combine the field names and row items together, and append them
137 | dt.append(row[0])
138 |
139 | df = pd.Series(dt,name='DateTime')
140 | df = df.to_frame()
141 | df['hr_diff'] = df['DateTime'].diff()
142 | df.set_index('DateTime',inplace=True)
143 | df['julian'] = df.index.to_julian_date()
144 | df['diff'] = df['julian'].diff()
145 | df['is_gap'] = df['diff'] > gap_tol
146 | def rowIndex(row):
147 | return row.name
148 | df['gap_end'] = df.apply(lambda x: rowIndex(x) if x['is_gap'] else pd.NaT, axis=1)
149 | df['gap_start'] = df.apply(lambda x: rowIndex(x) - x['hr_diff'] if x['is_gap'] else pd.NaT, axis=1)
150 | df = df[df['is_gap'] == True]
151 | return df
152 |
153 |
154 |
155 | def table_to_pandas_dataframe(table, field_names=None, query=None, sql_sn=(None, None)):
156 | """
157 | Load data into a Pandas Data Frame for subsequent analysis.
158 | :param table: Table readable by ArcGIS.
159 | :param field_names: List of fields.
160 | :param query: SQL query to limit results
161 | :param sql_sn: sort fields for sql; see http://pro.arcgis.com/en/pro-app/arcpy/functions/searchcursor.htm
162 | :return: Pandas DataFrame object.
163 | """
164 |
165 | # if field names are not specified
166 | if not field_names:
167 | field_names = get_field_names(table)
168 | # create a pandas data frame
169 | df = pd.DataFrame(columns=field_names)
170 |
171 | # use a search cursor to iterate rows
172 | with arcpy.da.SearchCursor(table, field_names, query, sql_clause=sql_sn) as search_cursor:
173 | # iterate the rows
174 | for row in search_cursor:
175 | # combine the field names and row items together, and append them
176 | df = df.append(dict(zip(field_names, row)), ignore_index=True)
177 |
178 | # return the pandas data frame
179 | return df
180 |
181 |
182 | def edit_table(df, gw_reading_table, fieldnames):
183 | """
184 | Edits SDE table by inserting new rows
185 | :param df: pandas DataFrame
186 | :param gw_reading_table: sde table to edit
187 | :param fieldnames: field names that are being appended in order of appearance in dataframe or list row
188 | :return:
189 | """
190 |
191 | table_names = get_field_names(gw_reading_table)
192 |
193 | for name in fieldnames:
194 | if name not in table_names:
195 | fieldnames.remove(name)
196 | printmes("{:} not in {:} fieldnames!".format(name, gw_reading_table))
197 |
198 | if len(fieldnames) > 0:
199 | subset = df[fieldnames]
200 | rowlist = subset.values.tolist()
201 |
202 | arcpy.env.overwriteOutput = True
203 | edit = arcpy.da.Editor(arcpy.env.workspace)
204 | edit.startEditing(False, False)
205 | edit.startOperation()
206 |
207 | cursor = arcpy.da.InsertCursor(gw_reading_table, fieldnames)
208 | for j in range(len(rowlist)):
209 | cursor.insertRow(rowlist[j])
210 |
211 | del cursor
212 | edit.stopOperation()
213 | edit.stopEditing(True)
214 | else:
215 | printmes('No data imported!')
216 |
217 |
218 | def simp_imp_well(well_table, file, baro_out, wellid, manual, stbl_elev=True,
219 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading", drift_tol=0.3, override=False):
220 | """
221 | Imports single well
222 | :param well_table: pandas dataframe of well data with ALternateID as index; needs altitude, be, stickup, and barolooger
223 | :param file: raw well file (xle, csv, or lev)
224 | :param baro_out: dictionary with barometer ID defining dataframe names
225 | :param wellid: unique ID of well field
226 | :param manual: manual data dataframe indexed by measure datetime
227 | :param stbl_elev:
228 | :param gw_reading_table:
229 | :param drift_tol:
230 | :param override:
231 | :return:
232 | """
233 | # import well file
234 | well = new_trans_imp(file)
235 |
236 | file_ext = os.path.splitext(file)[1]
237 | if file_ext == '.xle':
238 | trans_type = 'Solinst'
239 | else:
240 | trans_type = 'Global Water'
241 | try:
242 | baroid = well_table.loc[wellid, 'BaroLoggerType']
243 | printmes('{:}'.format(baroid))
244 | corrwl = well_baro_merge(well, baro_out[str(baroid)], barocolumn='MEASUREDLEVEL',
245 | vented=(trans_type != 'Solinst'))
246 | except:
247 | corrwl = well_baro_merge(well, baro_out['9003'], barocolumn='MEASUREDLEVEL',
248 | vented=(trans_type != 'Solinst'))
249 |
250 | # be, intercept, r = clarks(corrwl, 'barometer', 'corrwl')
251 | # correct barometric efficiency
252 | wls, be = correct_be(wellid, well_table, corrwl)
253 |
254 | # get manual groundwater elevations
255 | # man, stickup, well_elev = self.get_gw_elevs(wellid, well_table, manual, stable_elev = stbl_elev)
256 | stdata = well_table[well_table['WellID'] == str(wellid)]
257 | man_sub = manual[manual['LOCATIONID'] == int(wellid)]
258 | well_elev = float(stdata['Altitude'].values[0]) # Should be in feet
259 |
260 | if stbl_elev:
261 | if stdata['Offset'].values[0] is None:
262 | stickup = 0
263 | printmes('Well ID {:} missing stickup!'.format(wellid))
264 | else:
265 | stickup = float(stdata['Offset'].values[0])
266 | else:
267 |
268 | stickup = man_sub.loc[man_sub.last_valid_index(), 'Current Stickup Height']
269 |
270 | # manual = manual['MeasuredDTW'].to_frame()
271 | man_sub.loc[:, 'MeasuredDTW'] = man_sub['DTWBELOWCASING'] * -1
272 | man_sub.loc[:, 'Meas_GW_Elev'] = man_sub.loc[:, 'WATERELEVATION']
273 | #man_sub.loc[:, 'Meas_GW_Elev'] = man_sub['MeasuredDTW'].apply(lambda x: float(well_elev) + (x + float(stickup)),1)
274 | printmes('Stickup: {:}, Well Elev: {:}'.format(stickup, well_elev))
275 |
276 | # fix transducer drift
277 |
278 | dft = fix_drift(wls, man_sub, meas='BAROEFFICIENCYLEVEL', manmeas='MeasuredDTW')
279 | drift = np.round(float(dft[1]['drift'].values[0]), 3)
280 |
281 | df = dft[0]
282 | df.sort_index(inplace=True)
283 | first_index = df.first_valid_index()
284 |
285 | # Get last reading at the specified location
286 | read_max, dtw, wlelev = find_extreme(wellid)
287 |
288 | printmes("Last database date is {:}. First transducer reading is on {:}.".format(read_max, first_index))
289 |
290 | rowlist, fieldnames = prepare_fieldnames(df, wellid, stickup, well_elev)
291 |
292 | if (read_max is None or read_max < first_index) and (drift < drift_tol):
293 | edit_table(rowlist, gw_reading_table, fieldnames)
294 | printmes(arcpy.GetMessages())
295 | printmes("Well {:} imported.".format(wellid))
296 | elif override and (drift < drift_tol):
297 | edit_table(rowlist, gw_reading_table, fieldnames)
298 | printmes(arcpy.GetMessages())
299 | printmes("Override Activated. Well {:} imported.".format(wellid))
300 | elif drift > drift_tol:
301 | printmes('Drift for well {:} exceeds tolerance!'.format(wellid))
302 | else:
303 | printmes('Dates later than import data for well {:} already exist!'.format(wellid))
304 | pass
305 |
306 | # except (ValueError, ZeroDivisionError):
307 |
308 | # drift = -9999
309 | # df = corrwl
310 | # pass
311 | return rowlist, man_sub, be, drift
312 |
313 |
314 |
315 |
316 |
317 | def upload_bp_data(df, site_number, return_df=False, gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading"):
318 | import arcpy
319 |
320 | df.sort_index(inplace=True)
321 | first_index = df.first_valid_index()
322 |
323 | # Get last reading at the specified location
324 | read_max, dtw, wlelev = find_extreme(site_number)
325 |
326 | if read_max is None or read_max < first_index:
327 |
328 | df['MEASUREDLEVEL'] = df['Level']
329 | df['TAPE'] = 0
330 | df['LOCATIONID'] = site_number
331 |
332 | df.sort_index(inplace=True)
333 |
334 | fieldnames = ['READINGDATE', 'MEASUREDLEVEL', 'TEMP', 'LOCATIONID', 'TAPE']
335 |
336 | if 'Temperature' in df.columns:
337 | df.rename(columns={'Temperature': 'TEMP'}, inplace=True)
338 |
339 | if 'TEMP' in df.columns:
340 | df['TEMP'] = df['TEMP'].apply(lambda x: np.round(x, 4), 1)
341 | else:
342 | df['TEMP'] = None
343 |
344 | df.index.name = 'READINGDATE'
345 |
346 | subset = df.reset_index()
347 |
348 | edit_table(subset, gw_reading_table, fieldnames)
349 |
350 | if return_df:
351 | return df
352 |
353 | else:
354 | printmes('Dates later than import data for this station already exist!')
355 | pass
356 |
357 |
358 | def get_location_data(site_number, enviro, first_date=None, last_date=None, limit=None,
359 | gw_reading_table="UGGP.UGGPADMIN.UGS_GW_reading"):
360 | arcpy.env.workspace = enviro
361 | if not first_date:
362 | first_date = datetime.datetime(1900, 1, 1)
363 | elif type(first_date) == str:
364 | try:
365 | datetime.datetime.strptime(first_date, '%m/%d/%Y')
366 | except:
367 | first_date = datetime.datetime(1900, 1, 1)
368 | # Get last reading at the specified location
369 | if not last_date or last_date > datetime.datetime.now():
370 | last_date = datetime.datetime.now()
371 |
372 | query_txt = "LOCATIONID = '{:}' and (READINGDATE >= '{:%m/%d/%Y}' and READINGDATE <= '{:%m/%d/%Y}')"
373 | query = query_txt.format(site_number, first_date, last_date + datetime.timedelta(days=1))
374 | printmes(query)
375 | sql_sn = (limit, 'ORDER BY READINGDATE ASC')
376 |
377 | fieldnames = get_field_names(gw_reading_table)
378 |
379 | readings = table_to_pandas_dataframe(gw_reading_table, fieldnames, query, sql_sn)
380 | readings.set_index('READINGDATE', inplace=True)
381 | if len(readings) == 0:
382 | printmes('No Records for location {:}'.format(site_number))
383 | return readings
384 |
--------------------------------------------------------------------------------
/docs/UMAR_WL_Data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "This notebook provides Python scripts to import, compile, modify, graph, and export Solinst transducer data. "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [],
17 | "source": [
18 | "%matplotlib inline\n",
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "import os\n",
22 | "import sys\n",
23 | "import platform\n",
24 | "import glob\n",
25 | "import re\n",
26 | "import xmltodict\n",
27 | "import matplotlib\n",
28 | "import matplotlib.pyplot as plt\n",
29 | "import matplotlib.dates as dates\n",
30 | "import matplotlib.ticker as tick\n",
31 | "from matplotlib.backends.backend_pdf import PdfPages\n",
32 | "import statsmodels.tsa.tsatools as tools\n",
33 | "from pandas.stats.api import ols\n",
34 | "from datetime import datetime\n",
35 | "from pylab import rcParams\n",
36 | "rcParams['figure.figsize'] = 15, 10"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": null,
42 | "metadata": {
43 | "collapsed": false
44 | },
45 | "outputs": [],
46 | "source": [
47 | "print(\"Operating System \" + platform.system() + \" \" + platform.release())\n",
48 | "print(\"Python Version \" + str(sys.version))\n",
49 | "print(\"Pandas Version \" + str(pd.__version__))\n",
50 | "print(\"Numpy Version \" + str(np.__version__))\n",
51 | "print(\"Matplotlib Version \" + str(matplotlib.__version__))"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {
58 | "collapsed": true
59 | },
60 | "outputs": [],
61 | "source": [
62 | "#rootname = '/media/p/Transcend/PROJECTS/UMAR/Phase_II/Data/RAW/'\n",
63 | "rootname = 'E:/PROJECTS/UMAR/Data/RAW/'"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {},
69 | "source": [
70 | "## Scat"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {
77 | "collapsed": false
78 | },
79 | "outputs": [],
80 | "source": [
81 | "def Scat(data,bp,wl):\n",
82 | " data['dwl'] = data[wl].diff()\n",
83 | " data['dbp'] = data[bp].diff()\n",
84 | "\n",
85 | " regression = ols(y=data['dwl'], x=data['dbp'])\n",
86 | " m = regression.beta.x\n",
87 | " b = regression.beta.intercept\n",
88 | " r = regression.r2\n",
89 | " #r = (regression.beta.r)**2\n",
90 | " plt.scatter(y=data['dwl'], x=data['dbp'])\n",
91 | "\n",
92 | " y_reg = [data['dbp'][i]*m+b for i in range(len(data['dbp']))]\n",
93 | "\n",
94 | " plt.plot(data['dbp'],y_reg, \n",
95 | " label='Regression: Y = {m:.4f}X + {b:.5}\\nr^2 = {r:.4f}\\n BE = {be:.2f} '.format(m=m,b=b,r=r,be=m))\n",
96 | " plt.legend()\n",
97 | " plt.xlabel('Sum of Barometric Pressure Changes (ft)')\n",
98 | " plt.ylabel('Sum of Water-Level Changes (ft)')"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "## clarks"
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {
112 | "collapsed": false
113 | },
114 | "outputs": [],
115 | "source": [
116 | "# clark's method\n",
117 | "def clarks(data,bp,wl):\n",
118 | " '''\n",
119 | " clarks method\n",
120 | " Input dataframe (data) with barometric pressure (bp) and water level (wl) data\n",
121 | " Returns slope, intercept, and r squared value'''\n",
122 | " data['dwl'] = data[wl].diff()\n",
123 | " data['dbp'] = data[bp].diff()\n",
124 | " \n",
125 | " data['beta'] = data['dbp']*data['dwl']\n",
126 | " data['Sbp'] = np.abs(data['dbp']).cumsum()\n",
127 | " data['Swl'] = data[['dwl','beta']].apply(lambda x: -1*np.abs(x[0]) if x[1]>0 else np.abs(x[0]), axis=1).cumsum()\n",
128 | " plt.figure()\n",
129 | " plt.plot(data['Sbp'],data['Swl'])\n",
130 | " regression = ols(y=data['Swl'], x=data['Sbp'])\n",
131 | " \n",
132 | " m = regression.beta.x\n",
133 | " b = regression.beta.intercept\n",
134 | " r = regression.r2\n",
135 | " \n",
136 | " y_reg = [data.ix[i,'Sbp']*m+b for i in range(len(data['Sbp']))]\n",
137 | "\n",
138 | " plt.plot(data['Sbp'],y_reg,\n",
139 | " label='Regression: Y = {m:.4f}X + {b:.5}\\nr^2 = {r:.4f}\\n BE = {be:.2f} '.format(m=m,b=b,r=r,be=m))\n",
140 | " plt.legend()\n",
141 | " plt.xlabel('Sum of Barometric Pressure Changes (ft)')\n",
142 | " plt.ylabel('Sum of Water-Level Changes (ft)')\n",
143 | " data.drop(['dwl','dbp','Sbp','Swl'], axis=1, inplace=True)\n",
144 | " return m,b,r"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "# Setting Up the Solinst Barologger and Levelogger"
152 | ]
153 | },
154 | {
155 | "cell_type": "markdown",
156 | "metadata": {},
157 | "source": [
158 | "I always set my transducers to `future start` to make the tranducer start on the hour. I also allow the Levelogger to take an instantaneous measurement out of water, and zero the transducer out to accomodate for elevation."
159 | ]
160 | },
161 | {
162 | "cell_type": "markdown",
163 | "metadata": {},
164 | "source": [
165 | "# Import Relevant Files"
166 | ]
167 | },
168 | {
169 | "cell_type": "markdown",
170 | "metadata": {},
171 | "source": [
172 | "First, we must import all of the relevant data. To properly import transducer data, we need:\n",
173 | "* Transducer (Levelogger) data\n",
174 | "* Barometric (Barologger) data\n",
175 | "* Manual Depth to Water Measurements\n",
176 | " \n",
177 | "If we want to calculate water-level elevation, we also need:\n",
178 | "* Well stickup length (ground to measure point distance)\n",
179 | "* Ground surface elevation at well\n",
180 | "
OR
\n",
181 | "* Elevation of measure point"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {
188 | "collapsed": false
189 | },
190 | "outputs": [],
191 | "source": [
192 | "barofile = new_xle_imp(rootname + \"baro_2015-07-16.xle\")\n",
193 | "barofile2 = pd.read_csv(rootname + \"UCC.csv\",parse_dates=True,index_col='Day',skiprows=14, na_values=['M','S'])\n",
194 | "wellfile = new_xle_imp(rootname +\"arnold_well_2015-07-16.xle\")\n",
195 | "wellfile2 = new_xle_imp(rootname +\"arnold_well_2015-04-01.xle\")\n",
196 | "manualfile = pd.read_excel(rootname +\"Manual_Readings.xlsx\",\"Arn_Well\",index_col=\"datetime\")"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": null,
202 | "metadata": {
203 | "collapsed": false
204 | },
205 | "outputs": [],
206 | "source": [
207 | "barofile2['ft_water_bp']= barofile2['Sea Level Pressure']*0.0335 - (31.17 - 4806/826 + 7.8) # convert hPa to ft water\n",
208 | "barofile2 = barofile2.interpolate(method='time') # fill NA spots"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "## Compile Files if Necessary"
216 | ]
217 | },
218 | {
219 | "cell_type": "markdown",
220 | "metadata": {},
221 | "source": [
222 | "Concatonate the well files so that they are one seamless file."
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": null,
228 | "metadata": {
229 | "collapsed": true
230 | },
231 | "outputs": [],
232 | "source": [
233 | "wellfile = pd.concat([wellfile,wellfile2])\n",
234 | "wellfile.sort_index(inplace=True)"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "metadata": {
241 | "collapsed": false
242 | },
243 | "outputs": [],
244 | "source": [
245 | "wellfile.columns"
246 | ]
247 | },
248 | {
249 | "cell_type": "markdown",
250 | "metadata": {},
251 | "source": [
252 | "## Graph Raw Data"
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "You should always graph raw data to see if there are any tares in the data from users moving the tranducer placement. Sometimes, the transducer is out of the water when it takes a measurement. These points should be removed or adjusted."
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": null,
265 | "metadata": {
266 | "collapsed": false
267 | },
268 | "outputs": [],
269 | "source": [
270 | "#http://stackoverflow.com/questions/7733693/matplotlib-overlay-plots-with-different-scales\n",
271 | "x1 = wellfile.index.to_datetime() #converts pandas dataframe index into datetime format for graph\n",
272 | "x2 = barofile.index.to_datetime()\n",
273 | "x3 = manualfile.index.to_datetime()\n",
274 | "\n",
275 | "y1 = wellfile['Level']\n",
276 | "y2 = barofile['Level']\n",
277 | "y3 = manualfile['dtw_ft']\n",
278 | "\n",
279 | "data = [(x1,y1),(x2,y2),(x3,y3)]\n",
280 | "\n",
281 | "fig, ax = plt.subplots()\n",
282 | "\n",
283 | "# Twin the x-axis twice to make independent y-axes.\n",
284 | "axes = [ax, ax.twinx(), ax.twinx()]\n",
285 | "\n",
286 | "# Make some space on the right side for the extra y-axis.\n",
287 | "fig.subplots_adjust(right=0.75)\n",
288 | "\n",
289 | "# Move the last y-axis spine over to the right by 20% of the width of the axes\n",
290 | "axes[-1].spines['right'].set_position(('axes', 1.2))\n",
291 | "\n",
292 | "# To make the border of the right-most axis visible, we need to turn the frame\n",
293 | "# on. This hides the other plots, however, so we need to turn its fill off.\n",
294 | "axes[-1].set_frame_on(True)\n",
295 | "axes[-1].patch.set_visible(False)\n",
296 | "\n",
297 | "# And finally we get to plot things...\n",
298 | "colors = ['Green', 'Red', 'Blue']\n",
299 | "labels = ['Levelogger Pressure (ft)','Barologger Pressure (ft)','Manual Readings (ft to water)' ]\n",
300 | "marks = ['','','o']\n",
301 | "linetypes = ['solid','solid','none']\n",
302 | "\n",
303 | "for ax, color, datum, label, mark, linety in zip(axes, colors, data, labels, marks, linetypes):\n",
304 | " ax.plot(datum[0],datum[1], marker=mark, linestyle=linety, color=color, label=label)\n",
305 | " ax.set_ylabel(label, color=color)\n",
306 | " ax.tick_params(axis='y', colors=color)\n",
307 | " \n",
308 | "h1, l1 = axes[0].get_legend_handles_labels()\n",
309 | "h2, l2 = axes[1].get_legend_handles_labels()\n",
310 | "h3, l3 = axes[2].get_legend_handles_labels()\n",
311 | "axes[0].legend(h1+h2+h3, l1+l2+l3, loc=4)\n",
312 | "\n",
313 | "\n",
314 | "plt.show()"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": null,
320 | "metadata": {
321 | "collapsed": false
322 | },
323 | "outputs": [],
324 | "source": [
325 | "print range(-10,10)"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "# Fix Jumps"
333 | ]
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "metadata": {},
338 | "source": [
339 | "This tranducer has a jump in the middle of the data caused by adjustments during manual recordings, as well as a jump at the beginning due to the transducer being out of water at the time of measurement."
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": null,
345 | "metadata": {
346 | "collapsed": false
347 | },
348 | "outputs": [],
349 | "source": [
350 | "wellfile = smoother(wellfile, 'Level', 30, 3)\n",
351 | "wellfile = smoother(wellfile, 'Conductivity', 30, 3)"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": null,
357 | "metadata": {
358 | "collapsed": false
359 | },
360 | "outputs": [],
361 | "source": [
362 | "wellfile = jumpfix(wellfile,'Level',0.1)\n",
363 | "wellfile = jumpfix(wellfile,'Conductivity',0.005)\n",
364 | "wellfile['Level'].plot()"
365 | ]
366 | },
367 | {
368 | "cell_type": "markdown",
369 | "metadata": {},
370 | "source": [
371 | "# Remove Barometric Pressure"
372 | ]
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {},
377 | "source": [
378 | "Solinst transducers are nonvented, meaning that they measure absolute pressure. When they are submerged in a well, they are measuring the pressure of the water and the atmosphere. In most cases, we are only interested in the pressure that the water exerts, so we have to subtract the pressure that the atmosphere is exerting."
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": null,
384 | "metadata": {
385 | "collapsed": false
386 | },
387 | "outputs": [],
388 | "source": [
389 | "wellbaro = baro_drift_correct(wellfile,barofile,manualfile)"
390 | ]
391 | },
392 | {
393 | "cell_type": "code",
394 | "execution_count": null,
395 | "metadata": {
396 | "collapsed": false
397 | },
398 | "outputs": [],
399 | "source": [
400 | "wellbaro.columns"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": null,
406 | "metadata": {
407 | "collapsed": false
408 | },
409 | "outputs": [],
410 | "source": [
411 | "wellbaro['WaterElevation'].plot()\n",
412 | "plt.vlines('11/4/2014 11:16',wellbaro['WaterElevation'].min(),wellbaro['WaterElevation'].max(),color='green')"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": null,
418 | "metadata": {
419 | "collapsed": false
420 | },
421 | "outputs": [],
422 | "source": [
423 | "Scat(wellbaro,'abs_feet_above_barologger','WaterElevation')"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": null,
429 | "metadata": {
430 | "collapsed": false
431 | },
432 | "outputs": [],
433 | "source": [
434 | "s, m, r = clarks(wellbaro,'abs_feet_above_barologger','WaterElevation')"
435 | ]
436 | },
437 | {
438 | "cell_type": "code",
439 | "execution_count": null,
440 | "metadata": {
441 | "collapsed": false
442 | },
443 | "outputs": [],
444 | "source": [
445 | "negcumls, cumls, ymod, resid, lag_time, dwl, dbp = baro_eff(wellbaro,'abs_feet_above_barologger','WaterElevation',100)\n",
446 | "plt.figure()\n",
447 | "lag_trim = lag_time[0:len(negcumls)]\n",
448 | "plt.scatter(lag_trim*24,negcumls, label='b.p. alone')\n",
449 | "plt.xlabel('lag (hours)')\n",
450 | "plt.ylabel('barometric response')\n",
451 | "\n",
452 | "ymin = wellbaro['WaterElevation'].min()\n",
453 | "\n",
454 | "fig, ax = plt.subplots()\n",
455 | "plt.plot(wellbaro.index[1:-1], resid)\n",
456 | "plt.text(x='11/3/2014 1:00',y=ymin+2,s='Injection Began',rotation=90,color='green',fontsize=12)\n",
457 | "y_formatter = tick.ScalarFormatter(useOffset=False)\n",
458 | "ax.yaxis.set_major_formatter(y_formatter)\n",
459 | "plt.vlines('11/4/2014 11:16',ymin+3,wellbaro['WaterElevation'].max(),color='green')\n",
460 | "\n",
461 | "print len(resid)\n",
462 | "print len(wellbaro.index[1:-1])"
463 | ]
464 | },
465 | {
466 | "cell_type": "code",
467 | "execution_count": null,
468 | "metadata": {
469 | "collapsed": false
470 | },
471 | "outputs": [],
472 | "source": [
473 | "wellbaro['corrwl'] = wellbaro['WaterElevation'] - wellbaro['abs_feet_above_barologger']*1\n",
474 | "manualfile['wlelev'] = 4800-manualfile['dtw_ft']\n",
475 | "\n",
476 | "x1 = wellbaro.index.to_datetime()[1:-1] #converts pandas dataframe index into datetime format for graph\n",
477 | "x2 = barofile.index.to_datetime()\n",
478 | "x3 = manualfile.index.to_datetime()\n",
479 | "\n",
480 | "y1 = resid\n",
481 | "y2 = barofile['Level']\n",
482 | "y3 = manualfile['wlelev']\n",
483 | "\n",
484 | "data = [(x1,y1),(x2,y2),(x3,y3)]\n",
485 | "\n",
486 | "fig, ax = plt.subplots()\n",
487 | "\n",
488 | "# Twin the x-axis twice to make independent y-axes.\n",
489 | "axes = [ax, ax.twinx(), ax.twinx()]\n",
490 | "\n",
491 | "# Make some space on the right side for the extra y-axis.\n",
492 | "fig.subplots_adjust(right=0.75)\n",
493 | "\n",
494 | "# Move the last y-axis spine over to the right by 20% of the width of the axes\n",
495 | "axes[-1].spines['right'].set_position(('axes', 1.2))\n",
496 | "\n",
497 | "# To make the border of the right-most axis visible, we need to turn the frame\n",
498 | "# on. This hides the other plots, however, so we need to turn its fill off.\n",
499 | "axes[-1].set_frame_on(True)\n",
500 | "axes[-1].patch.set_visible(False)\n",
501 | "\n",
502 | "# And finally we get to plot things...\n",
503 | "colors = ['Green', 'Red', 'Blue']\n",
504 | "labels = ['Levelogger Pressure (ft)','Barologger Pressure (ft)','Manual Readings (ft to water)' ]\n",
505 | "marks = ['','','o']\n",
506 | "linetypes = ['solid','solid','none']\n",
507 | "\n",
508 | "y_formatter = tick.ScalarFormatter(useOffset=False)\n",
509 | "\n",
510 | "for ax, color, datum, label, mark, linety in zip(axes, colors, data, labels, marks, linetypes):\n",
511 | " ax.plot(datum[0],datum[1], marker=mark, linestyle=linety, color=color, label=label)\n",
512 | " ax.set_ylabel(label, color=color)\n",
513 | " ax.tick_params(axis='y', colors=color)\n",
514 | " ax.yaxis.set_major_formatter(y_formatter)\n",
515 | "\n",
516 | "h1, l1 = axes[0].get_legend_handles_labels()\n",
517 | "h2, l2 = axes[1].get_legend_handles_labels()\n",
518 | "h3, l3 = axes[2].get_legend_handles_labels()\n",
519 | "axes[0].legend(h1+h2+h3, l1+l2+l3, loc=4)\n",
520 | "axes[2].set_ylim(4485,4493)\n",
521 | "\n",
522 | "\n",
523 | "plt.show()"
524 | ]
525 | },
526 | {
527 | "cell_type": "markdown",
528 | "metadata": {},
529 | "source": [
530 | "## Match Measurement Interval of Barometer (Barologger) and Transducer "
531 | ]
532 | },
533 | {
534 | "cell_type": "markdown",
535 | "metadata": {},
536 | "source": [
537 | "It is best to set Solinst transducers (Leveloggers) to start at the same time and to measure at the same frequency as your Barologger. Sometimes, this does not happen. To solve mismatches in sampling interval, we can resample the barometer data to same base (start time) and frequency as the transducer."
538 | ]
539 | },
540 | {
541 | "cell_type": "markdown",
542 | "metadata": {},
543 | "source": [
544 | "Using the `hourly_resample` function above, we can resample each transducer dataset."
545 | ]
546 | }
547 | ],
548 | "metadata": {
549 | "kernelspec": {
550 | "display_name": "Python 2",
551 | "language": "python",
552 | "name": "python2"
553 | },
554 | "language_info": {
555 | "codemirror_mode": {
556 | "name": "ipython",
557 | "version": 2
558 | },
559 | "file_extension": ".py",
560 | "mimetype": "text/x-python",
561 | "name": "python",
562 | "nbconvert_exporter": "python",
563 | "pygments_lexer": "ipython2",
564 | "version": "2.7.10"
565 | }
566 | },
567 | "nbformat": 4,
568 | "nbformat_minor": 0
569 | }
570 |
--------------------------------------------------------------------------------
/wellapplication/ros.py:
--------------------------------------------------------------------------------
1 | # directly copied from https://github.com/Geosyntec/wqio/blob/master/wqio/ros.py
2 | # Copyright (c) 2015, Geosyntec Consultants
3 | # All rights reserved.
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # * Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # * Neither the name of wqio nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | import warnings
31 |
32 | import numpy
33 | from scipy import stats
34 | import pandas
35 |
36 |
37 | def _ros_sort(df, result, censorship):
38 | """
39 | This function prepares a dataframe for ROS. It sorts ascending with
40 | left-censored observations on top. Censored results larger than the
41 | maximum uncensored results are removed from the dataframe.
42 | Parameters
43 | ----------
44 | df : pandas.DataFrame
45 | result : str
46 | Name of the column in the dataframe that contains observed
47 | values. Censored values should be set to the detection (upper)
48 | limit.
49 | censorship : str
50 | Name of the column in the dataframe that indicates that a
51 | result is left-censored. (i.e., True -> censored,
52 | False -> uncensored)
53 | Returns
54 | ------
55 | sorted_df : pandas.DataFrame
56 | The sorted dataframe with all columns dropped except the
57 | result and censorship columns.
58 | """
59 |
60 | # separate uncensored data from censored data
61 | max_uncensored = df.loc[~df[censorship], result].max()
62 | if (df.loc[df[censorship], result] > max_uncensored).any():
63 | msg = (
64 | "Dropping censored results greater than "
65 | "the max uncensored result."
66 | )
67 | warnings.warn(msg)
68 |
69 | df_sorted = (
70 | df[[censorship, result]]
71 | .sort_values(by=[censorship, result], ascending=[False, True])
72 | .where(lambda df:
73 | (~df[censorship]) | # uncensored values
74 | ((df[result] < max_uncensored) & df[censorship]) # censored values < max_uncen
75 | )
76 | .dropna(how='all')
77 | .reset_index(drop=True)
78 | .assign(**{censorship: lambda df: df[censorship].astype(bool)})
79 | )
80 | return df_sorted[[result, censorship]]
81 |
82 |
83 | def cohn_numbers(df, result, censorship):
84 | """
85 | Computes the Cohn numbers for the detection limits in the dataset.
86 | The Cohn Numbers are:
87 | - :math:`A_j =` the number of uncensored obs above the
88 | :math:`j^\mathrm{th}` threshold.
89 | - :math:`B_j =` the number of observations (cen & uncen) below
90 | the :math:`j^\mathrm{th}` threshold.
91 | - :math:`C_j =` the number of censored observations at the
92 | :math:`j^\mathrm{th}` threshold.
93 | - :math:`\mathrm{PE}_j =` the probability of exceeding the
94 | :math:`j^\mathrm{th}` threshold
95 | - :math:`\mathrm{DL}_j =` the unique, sorted detection limits
96 | - :math:`\mathrm{DL}_{j+1} = \mathrm{DL}_j` shifted down a
97 | single index (row)
98 | Parameters
99 | ----------
100 | dataframe : pandas.DataFrame
101 | result : str
102 | Name of the column in the dataframe that contains observed
103 | values. Censored values should be set to the detection (upper)
104 | limit.
105 | censorship : str
106 | Name of the column in the dataframe that indicates that a
107 | result is left-censored. (i.e., True -> censored,
108 | False -> uncensored)
109 | Returns
110 | -------
111 | cohn : pandas.DataFrame
112 | """
113 |
114 | def nuncen_above(row):
115 | """ A, the number of uncensored obs above the given threshold.
116 | """
117 |
118 | # index of results above the lower_dl DL
119 | above = df[result] >= row['lower_dl']
120 |
121 | # index of results below the upper_dl DL
122 | below = df[result] < row['upper_dl']
123 |
124 | # index of non-detect results
125 | detect = df[censorship].eq(False)
126 |
127 | # return the number of results where all conditions are True
128 | return df[above & below & detect].shape[0]
129 |
130 | def nobs_below(row):
131 | """ B, the number of observations (cen & uncen) below the given
132 | threshold
133 | """
134 |
135 | # index of data less than the lower_dl DL
136 | less_than = df[result] < row['lower_dl']
137 |
138 | # index of data less than or equal to the lower_dl DL
139 | less_thanequal = df[result] <= row['lower_dl']
140 |
141 | # index of detects, non-detects
142 | uncensored = df[censorship].eq(False)
143 | censored = df[censorship].eq(True)
144 |
145 | # number results less than or equal to lower_dl DL and non-detect
146 | LTE_censored = df[less_thanequal & censored].shape[0]
147 |
148 | # number of results less than lower_dl DL and detected
149 | LT_uncensored = df[less_than & uncensored].shape[0]
150 |
151 | # return the sum
152 | return LTE_censored + LT_uncensored
153 |
154 | def ncen_equal(row):
155 | """ C, the number of censored observations at the given
156 | threshold.
157 | """
158 |
159 | censored_index = df[censorship]
160 | censored_data = df[result][censored_index]
161 | censored_below = censored_data == row['lower_dl']
162 | return censored_below.sum()
163 |
164 | def set_upper_limit(cohn):
165 | """ Sets the upper_dl DL for each row of the Cohn dataframe. """
166 | if cohn.shape[0] > 1:
167 | return cohn['lower_dl'].shift(-1).fillna(value=numpy.inf)
168 | else:
169 | return [numpy.inf]
170 |
171 | def compute_PE(A, B):
172 | """ Computes the probability of excedance for each row of the
173 | Cohn dataframe. """
174 | N = len(A)
175 | PE = numpy.empty(N, dtype='float64')
176 | PE[-1] = 0.0
177 | for j in range(N - 2, -1, -1):
178 | PE[j] = PE[j + 1] + (1 - PE[j + 1]) * A[j] / (A[j] + B[j])
179 |
180 | return PE
181 |
182 | # unique, sorted detection limts
183 | censored_data = df[censorship]
184 | DLs = pandas.unique(df.loc[censored_data, result])
185 | DLs.sort()
186 |
187 | # if there is a results smaller than the minimum detection limit,
188 | # add that value to the array
189 | if DLs.shape[0] > 0:
190 | if df[result].min() < DLs.min():
191 | DLs = numpy.hstack([df[result].min(), DLs])
192 |
193 | # create a dataframe
194 | cohn = (
195 | pandas.DataFrame(DLs, columns=['lower_dl'])
196 | .assign(upper_dl=lambda df: set_upper_limit(df))
197 | .assign(nuncen_above=lambda df: df.apply(nuncen_above, axis=1))
198 | .assign(nobs_below=lambda df: df.apply(nobs_below, axis=1))
199 | .assign(ncen_equal=lambda df: df.apply(ncen_equal, axis=1))
200 | .reindex(range(DLs.shape[0] + 1))
201 | .assign(prob_exceedance=lambda df: compute_PE(df['nuncen_above'], df['nobs_below']))
202 | )
203 |
204 | else:
205 | dl_cols = ['lower_dl', 'upper_dl', 'nuncen_above',
206 | 'nobs_below', 'ncen_equal', 'prob_exceedance']
207 | cohn = pandas.DataFrame(numpy.empty((0, len(dl_cols))), columns=dl_cols)
208 |
209 | return cohn
210 |
211 |
212 | def _detection_limit_index(res, cohn):
213 | """ Helper function to create an array of indices for the detection
214 | limits (cohn) corresponding to each data point.
215 | Parameters
216 | ----------
217 | res : float
218 | A single observed result from the larger dataset.
219 | cohn : pandas.DataFrame
220 | Dataframe of Cohn numbers.
221 | Returns
222 | -------
223 | det_limit_index : int
224 | The index of the corresponding detection limit in `cohn`
225 | See also
226 | --------
227 | cohn_numbers
228 | """
229 |
230 | if cohn.shape[0] > 0:
231 | index, = numpy.where(cohn['lower_dl'] <= res)
232 | det_limit_index = index[-1]
233 | else:
234 | det_limit_index = 0
235 |
236 | return det_limit_index
237 |
238 |
239 | def _ros_group_rank(df, dl_idx, censorship):
240 | """
241 | Ranks each result within the groups defined by the record's
242 | detection limit index and censorship.
243 | Parameters
244 | ----------
245 | df : pandas.DataFrame
246 | dl_idx : str
247 | Name of the column in the dataframe the index of the result's
248 | corresponding detection limit in the `cohn` dataframe.
249 | censorship : str
250 | Name of the column in the dataframe that indicates that a
251 | result is left-censored. (i.e., True -> censored,
252 | False -> uncensored)
253 | Returns
254 | -------
255 | ranks : numpy.array
256 | Array of ranks for the dataset.
257 | """
258 |
259 | ranks = (
260 | df.assign(rank=1)
261 | .groupby(by=[dl_idx, censorship])['rank']
262 | .transform(lambda g: g.cumsum())
263 | )
264 | return ranks
265 |
266 |
267 | def _ros_plot_pos(row, censorship, cohn):
268 | """
269 | Compute the ROS plotting position for a result based on its rank,
270 | censorship, detection limit index.
271 | Parameters
272 | ----------
273 | row : pandas.Series or dict-like
274 | Full observation (row) from a censored dataset. Requires a
275 | 'rank', 'detection_limit', and `censorship` column.
276 | censorship : str
277 | Name of the column in the dataframe that indicates that a
278 | result is left-censored. (i.e., True -> censored,
279 | False -> uncensored)
280 | cohn : pandas.DataFrame
281 | Dataframe of Cohn numbers.
282 | Returns
283 | -------
284 | plotting_position : float
285 | See also
286 | --------
287 | cohn_numbers
288 | """
289 |
290 | DL_index = row['det_limit_index']
291 | rank = row['rank']
292 | censored = row[censorship]
293 |
294 | dl_1 = cohn.iloc[DL_index]
295 | dl_2 = cohn.iloc[DL_index + 1]
296 | if censored:
297 | return (1 - dl_1['prob_exceedance']) * rank / (dl_1['ncen_equal'] + 1)
298 | else:
299 | return (1 - dl_1['prob_exceedance']) + (dl_1['prob_exceedance'] - dl_2['prob_exceedance']) * rank / (dl_1['nuncen_above'] + 1)
300 |
301 |
302 | def _norm_plot_pos(results):
303 | """
304 | Computes standard normal (Gaussian) plotting positions using scipy.
305 | Parameters
306 | ----------
307 | results : array-like
308 | Sequence of observed quantities.
309 | Returns
310 | -------
311 | plotting_position : array of floats
312 | """
313 | ppos, sorted_res = stats.probplot(results, fit=False)
314 | return stats.norm.cdf(ppos)
315 |
316 |
317 | def plotting_positions(df, censorship, cohn):
318 | """
319 | Compute the ROS plotting positions for results based on their rank,
320 | censorship, detection limit index.
321 | Parameters
322 | ----------
323 | df : pandas.DataFrame.
324 | censorship : str
325 | Name of the column in the dataframe that indicates that a
326 | result is left-censored. (i.e., True -> censored,
327 | False -> uncensored)
328 | cohn : pandas.DataFrame
329 | Dataframe of Cohn numbers.
330 | Returns
331 | -------
332 | plotting_position : array of float
333 | See also
334 | --------
335 | cohn_numbers
336 | """
337 |
338 | plot_pos = df.apply(lambda r: _ros_plot_pos(r, censorship, cohn), axis=1)
339 |
340 | # correctly sort the plotting positions of the ND data:
341 | ND_plotpos = plot_pos[df[censorship]]
342 | ND_plotpos.values.sort()
343 | plot_pos.loc[df[censorship]] = ND_plotpos
344 |
345 | return plot_pos
346 |
347 |
348 | def _ros_estimate(df, result, censorship, transform_in, transform_out):
349 | """ Computed the estimated censored from the best-fit line of a
350 | probability plot of the uncensored values.
351 | Parameters
352 | ----------
353 | df : pandas.DataFrame
354 | result : str
355 | Name of the column in the dataframe that contains observed
356 | values. Censored values should be set to the detection (upper)
357 | limit.
358 | censorship : str
359 | Name of the column in the dataframe that indicates that a
360 | result is left-censored. (i.e., True -> censored,
361 | False -> uncensored)
362 | transform_in, transform_out : callable
363 | Transformations to be applied to the data prior to fitting
364 | the line and after estimated values from that line. Typically,
365 | `numpy.log` and `numpy.exp` are used, respectively.
366 | Returns
367 | -------
368 | estimated : pandas.DataFrame
369 | A new dataframe with two new columns: "estimated" and "final".
370 | The "estimated" column contains of the values inferred from the
371 | best-fit line. The "final" column contains the estimated values
372 | only where the original results were censored, and the original
373 | results everwhere else.
374 | """
375 |
376 | # detect/non-detect selectors
377 | uncensored_mask = df[censorship].eq(False)
378 | censored_mask = df[censorship].eq(True)
379 |
380 | # fit a line to the logs of the detected data
381 | fit_params = stats.linregress(
382 | df['Zprelim'][uncensored_mask],
383 | transform_in(df[result][uncensored_mask])
384 | )
385 |
386 | # pull out the slope and intercept for use later
387 | slope, intercept = fit_params[:2]
388 |
389 | # model the data based on the best-fit curve
390 | df = (
391 | df.assign(estimated=transform_out(slope * df['Zprelim'][censored_mask] + intercept))
392 | .assign(final=lambda df: numpy.where(df[censorship], df['estimated'], df[result]))
393 | )
394 |
395 | return df
396 |
397 |
398 | def _do_ros(df, result, censorship, transform_in, transform_out):
399 | """
400 | Prepares a dataframe for, and then esimates the values of a censored
401 | dataset using Regression on Order Statistics
402 | Parameters
403 | ----------
404 | df : pandas.DataFrame
405 | result : str
406 | Name of the column in the dataframe that contains observed
407 | values. Censored values should be set to the detection (upper)
408 | limit.
409 | censorship : str
410 | Name of the column in the dataframe that indicates that a
411 | result is left-censored. (i.e., True -> censored,
412 | False -> uncensored)
413 | transform_in, transform_out : callable
414 | Transformations to be applied to the data prior to fitting
415 | the line and after estimated values from that line. Typically,
416 | `numpy.log` and `numpy.exp` are used, respectively.
417 | Returns
418 | -------
419 | estimated : pandas.DataFrame
420 | A new dataframe with two new columns: "estimated" and "final".
421 | The "estimated" column contains of the values inferred from the
422 | best-fit line. The "final" column contains the estimated values
423 | only where the original results were censored, and the original
424 | results everwhere else.
425 | """
426 |
427 | # compute the Cohn numbers
428 | cohn = cohn_numbers(df, result=result, censorship=censorship)
429 |
430 | modeled = (
431 | df.pipe(_ros_sort, result=result, censorship=censorship)
432 | .assign(det_limit_index=lambda df: df[result].apply(_detection_limit_index, args=(cohn,)))
433 | .assign(rank=lambda df: _ros_group_rank(df, 'det_limit_index', censorship))
434 | .assign(plot_pos=lambda df: plotting_positions(df, censorship, cohn))
435 | .assign(Zprelim=lambda df: stats.norm.ppf(df['plot_pos']))
436 | .pipe(_ros_estimate, result, censorship, transform_in, transform_out)
437 | )
438 |
439 | return modeled
440 |
441 |
442 | def ROS(result, censorship, df=None, min_uncensored=2,
443 | max_fraction_censored=0.8, substitution_fraction=0.5,
444 | transform_in=numpy.log, transform_out=numpy.exp,
445 | as_array=False):
446 | """
447 | Impute censored dataset using Regression on Order Statistics (ROS)
448 | or simple substitution if insufficient uncensored data exists.
449 | Method described in *Nondetects and Data Analysis* by Dennis R.
450 | Helsel (John Wiley, 2005) to estimate the left-censored (non-detect)
451 | values of a dataset.
452 | Parameters
453 | ----------
454 | result : str or array-like
455 | Label of the column or the float array of censored results
456 | censorship : str
457 | Label of the column or the bool array of the censorship
458 | status of the results.
459 | * True if censored,
460 | * False if uncensored
461 | df : pandas.DataFrame, optional
462 | If `result` and `censorship` are labels, this is the DataFrame
463 | that contains those columns.
464 | min_uncensored : int (default is 2)
465 | The minimum number of uncensored values required before ROS
466 | can be used to impute the censored results. When this criterion
467 | is not met, simple substituion is used instead.
468 | max_fraction_censored : float (default is 0.8)
469 | The maximum fraction of censored data below which ROS can be
470 | used to impute the censored results. When this fraction is
471 | exceeded, simple substituion is used instead.
472 | substitution_fraction : float (default is 0.5)
473 | The fraction of the detection limit to be used during simple
474 | substitution of the censored values.
475 | transform_in : callable (default is numpy.log)
476 | Transformation to be applied to the values prior to fitting a
477 | line to the plotting positions vs. uncensored values.
478 | transform_out : callable (default is numpy.exp)
479 | Transformation to be applied to the imputed censored values
480 | estimated from the previously computed best-fit line.
481 | as_array : bool (default is True)
482 | When True, a numpy array of the imputed results is returned.
483 | Otherwise, a modified copy of the original dataframe with all
484 | of the intermediate calculations is returned.
485 | Returns
486 | -------
487 | imputed : numpy.array (default) or pandas.DataFrame
488 | The final results where the censored values have either been
489 | imputed through ROS or substituted as a fraction of the
490 | detection limit.
491 | """
492 |
493 | # process arrays into a dataframe, if necessary
494 | if df is None:
495 | df = pandas.DataFrame({'res': result, 'cen': censorship})
496 | result = 'res'
497 | censorship = 'cen'
498 |
499 | # basic counts/metrics of the dataset
500 | N_observations = df.shape[0]
501 | N_censored = df[censorship].astype(int).sum()
502 | N_uncensored = N_observations - N_censored
503 | fraction_censored = N_censored / N_observations
504 | print(N_censored,N_uncensored,fraction_censored)
505 |
506 | # add plotting positions if there are no censored values
507 | if N_censored == 0:
508 | output = df[[result, censorship]].assign(final=df[result])
509 |
510 | # substitute w/ fraction of the DLs if there's insufficient
511 | # uncensored data
512 | elif (N_uncensored < min_uncensored) or (fraction_censored > max_fraction_censored):
513 | final = numpy.where(df[censorship], df[result] * substitution_fraction, df[result])
514 | output = df.assign(final=final)[[result, censorship, 'final']]
515 |
516 | # normal ROS stuff
517 | else:
518 | output = _do_ros(df, result, censorship, transform_in, transform_out)
519 |
520 | # convert to an array if necessary
521 | if as_array:
522 | output = output['final'].values
523 |
524 |
525 | return output
--------------------------------------------------------------------------------
/wellapplication/chem.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Tue Jan 05 09:50:51 2016
4 |
5 | @author: paulinkenbrandt
6 | """
7 | from __future__ import absolute_import, division, print_function, unicode_literals
8 | import pandas as pd
9 | from datetime import datetime
10 | import numpy as np
11 | import requests
12 |
13 | class WQP(object):
14 | """Downloads Water Quality Data from thw Water Quality Portal based on parameters entered
15 | :param values: query parameter designating location to select site; this is the Argument for the REST parameter in
16 | table 1 of https://www.waterqualitydata.us/webservices_documentation/
17 | :param loc_type: type of query to perform; valid inputs include 'huc', 'bBox', 'countycode', 'siteid';
18 | this is the REST parameter of table 1 of https://www.waterqualitydata.us/webservices_documentation/
19 | :type loc_type: str
20 | :type values: str
21 | :param **kwargs: additional Rest Parameters
22 |
23 | :Example:
24 | >>> wq = WQP('-111.54,40.28,-111.29,40.48','bBox')
25 | https://www.waterqualitydata.us/Result/search?mimeType=csv&zip=no&siteType=Spring&siteType=Well&characteristicType=Inorganics%2C+Major%2C+Metals&characteristicType=Inorganics%2C+Major%2C+Non-metals&characteristicType=Nutrient&characteristicType=Physical&bBox=-111.54%2C40.28%2C-111.29%2C40.48&sorted=no&sampleMedia=Water
26 |
27 | """
28 |
29 | def __init__(self, values, loc_type, **kwargs):
30 | r"""Downloads Water Quality Data from thw Water Quality Portal based on parameters entered
31 | """
32 | self.loc_type = loc_type
33 | self.values = values
34 | self.url = 'https://www.waterqualitydata.us/'
35 | self.geo_criteria = ['sites', 'stateCd', 'huc', 'countyCd', 'bBox']
36 | self.cTgroups = ['Inorganics, Major, Metals', 'Inorganics, Major, Non-metals', 'Nutrient', 'Physical']
37 | self.results = self.get_wqp_results('Result', **kwargs)
38 | self.stations = self.get_wqp_stations('Station', **kwargs)
39 |
40 | def get_response(self, service, **kwargs):
41 | """ Returns a dictionary of data requested by each function.
42 | :param service: options include 'Station' or 'Results'
43 | table 1 of https://www.waterqualitydata.us/webservices_documentation/
44 | """
45 | http_error = 'Could not connect to the API. This could be because you have no internet connection, a parameter' \
46 | ' was input incorrectly, or the API is currently down. Please try again.'
47 | # For python 3.4
48 | # try:
49 | kwargs[self.loc_type] = self.values
50 | kwargs['mimeType'] = 'csv'
51 | kwargs['zip'] = 'no'
52 | kwargs['sorted'] = 'no'
53 |
54 | if 'siteType' not in kwargs:
55 | kwargs['sampleMedia'] = 'Water'
56 |
57 | if 'siteType' not in kwargs:
58 | kwargs['siteType'] = ['Spring', 'Well']
59 | print('This function is biased towards groundwater. For all sites, use')
60 |
61 | if 'characteristicType' not in kwargs:
62 | kwargs['characteristicType'] = self.cTgroups
63 |
64 | total_url = self.url + service + '/search?'
65 | response_ob = requests.get(total_url, params=kwargs)
66 |
67 | return response_ob
68 |
69 | def get_wqp_stations(self, service, **kwargs):
70 | nwis_dict = self.get_response(service, **kwargs).url
71 |
72 | stations = pd.read_csv(nwis_dict)
73 | return stations
74 |
75 | def get_wqp_results(self, service, **kwargs):
76 | """Bring data from WQP site into a Pandas DataFrame for analysis"""
77 |
78 | # set data types
79 | Rdtypes = {"OrganizationIdentifier": np.str_, "OrganizationFormalName": np.str_, "ActivityIdentifier": np.str_,
80 | "ActivityStartTime/Time": np.str_,
81 | "ActivityTypeCode": np.str_, "ActivityMediaName": np.str_, "ActivityMediaSubdivisionName": np.str_,
82 | "ActivityStartDate": np.str_, "ActivityStartTime/TimeZoneCode": np.str_,
83 | "ActivityEndDate": np.str_, "ActivityEndTime/Time": np.str_, "ActivityEndTime/TimeZoneCode": np.str_,
84 | "ActivityDepthHeightMeasure/MeasureValue": np.float16,
85 | "ActivityDepthHeightMeasure/MeasureUnitCode": np.str_,
86 | "ActivityDepthAltitudeReferencePointText": np.str_,
87 | "ActivityTopDepthHeightMeasure/MeasureValue": np.float16,
88 | "ActivityTopDepthHeightMeasure/MeasureUnitCode": np.str_,
89 | "ActivityBottomDepthHeightMeasure/MeasureValue": np.float16,
90 | "ActivityBottomDepthHeightMeasure/MeasureUnitCode": np.str_,
91 | "ProjectIdentifier": np.str_, "ActivityConductingOrganizationText": np.str_,
92 | "MonitoringLocationIdentifier": np.str_, "ActivityCommentText": np.str_,
93 | "SampleAquifer": np.str_, "HydrologicCondition": np.str_, "HydrologicEvent": np.str_,
94 | "SampleCollectionMethod/MethodIdentifier": np.str_,
95 | "SampleCollectionMethod/MethodIdentifierContext": np.str_,
96 | "SampleCollectionMethod/MethodName": np.str_, "SampleCollectionEquipmentName": np.str_,
97 | "ResultDetectionConditionText": np.str_, "CharacteristicName": np.str_,
98 | "ResultSampleFractionText": np.str_,
99 | "ResultMeasureValue": np.str_, "ResultMeasure/MeasureUnitCode": np.str_,
100 | "MeasureQualifierCode": np.str_,
101 | "ResultStatusIdentifier": np.str_, "StatisticalBaseCode": np.str_, "ResultValueTypeName": np.str_,
102 | "ResultWeightBasisText": np.str_, "ResultTimeBasisText": np.str_,
103 | "ResultTemperatureBasisText": np.str_,
104 | "ResultParticleSizeBasisText": np.str_, "PrecisionValue": np.str_, "ResultCommentText": np.str_,
105 | "USGSPCode": np.str_, "ResultDepthHeightMeasure/MeasureValue": np.float16,
106 | "ResultDepthHeightMeasure/MeasureUnitCode": np.str_,
107 | "ResultDepthAltitudeReferencePointText": np.str_,
108 | "SubjectTaxonomicName": np.str_, "SampleTissueAnatomyName": np.str_,
109 | "ResultAnalyticalMethod/MethodIdentifier": np.str_,
110 | "ResultAnalyticalMethod/MethodIdentifierContext": np.str_,
111 | "ResultAnalyticalMethod/MethodName": np.str_, "MethodDescriptionText": np.str_,
112 | "LaboratoryName": np.str_,
113 | "AnalysisStartDate": np.str_, "ResultLaboratoryCommentText": np.str_,
114 | "DetectionQuantitationLimitTypeName": np.str_,
115 | "DetectionQuantitationLimitMeasure/MeasureValue": np.str_,
116 | "DetectionQuantitationLimitMeasure/MeasureUnitCode": np.str_, "PreparationStartDate": np.str_,
117 | "ProviderName": np.str_}
118 |
119 | # define date field indices
120 | dt = [6, 56, 61]
121 | csv = self.get_response(service, **kwargs).url
122 | print(csv)
123 | # read csv into DataFrame
124 | df = pd.read_csv(csv, dtype=Rdtypes, parse_dates=dt)
125 | return df
126 |
127 | def massage_results(self, df = ''):
128 | """Massage WQP result data for analysis
129 |
130 | When called, this function:
131 | - renames all of the results fields, abbreviating the fields and eliminating slashes and spaces.
132 | - parses the datetime fields, fixing errors when possible (see :func:`datetimefix`)
133 | - standardizes units to mg/L
134 | - normalizes nutrient species(See :func:`parnorm`)
135 |
136 |
137 | """
138 | if df == '':
139 | df = self.results
140 |
141 | # Map new names for columns
142 | ResFieldDict = {"AnalysisStartDate": "AnalysisDate", "ResultAnalyticalMethod/MethodIdentifier": "AnalytMeth",
143 | "ResultAnalyticalMethod/MethodName": "AnalytMethId",
144 | "ResultDetectionConditionText": "DetectCond",
145 | "ResultLaboratoryCommentText": "LabComments", "LaboratoryName": "LabName",
146 | "DetectionQuantitationLimitTypeName": "LimitType",
147 | "DetectionQuantitationLimitMeasure/MeasureValue": "MDL",
148 | "DetectionQuantitationLimitMeasure/MeasureUnitCode": "MDLUnit",
149 | "MethodDescriptionText": "MethodDescript",
150 | "OrganizationIdentifier": "OrgId", "OrganizationFormalName": "OrgName",
151 | "CharacteristicName": "Param",
152 | "ProjectIdentifier": "ProjectId", "MeasureQualifierCode": "QualCode",
153 | "ResultCommentText": "ResultComment",
154 | "ResultStatusIdentifier": "ResultStatus", "ResultMeasureValue": "ResultValue",
155 | "ActivityCommentText": "SampComment", "ActivityDepthHeightMeasure/MeasureValue": "SampDepth",
156 | "ActivityDepthAltitudeReferencePointText": "SampDepthRef",
157 | "ActivityDepthHeightMeasure/MeasureUnitCode": "SampDepthU",
158 | "SampleCollectionEquipmentName": "SampEquip",
159 | "ResultSampleFractionText": "SampFrac", "ActivityStartDate": "SampleDate",
160 | "ActivityIdentifier": "SampleId",
161 | "ActivityStartTime/Time": "SampleTime", "ActivityMediaSubdivisionName": "SampMedia",
162 | "SampleCollectionMethod/MethodIdentifier": "SampMeth",
163 | "SampleCollectionMethod/MethodName": "SampMethName",
164 | "ActivityTypeCode": "SampType", "MonitoringLocationIdentifier": "StationId",
165 | "ResultMeasure/MeasureUnitCode": "Unit", "USGSPCode": "USGSPCode"}
166 |
167 | # Rename Data
168 | df = self.results
169 | df1 = df.rename(columns=ResFieldDict)
170 |
171 | # Remove unwanted and bad times
172 | df1["SampleDate"] = df1[["SampleDate", "SampleTime"]].apply(lambda x: self.datetimefix(x, "%Y-%m-%d %H:%M"), 1)
173 |
174 | # Define unneeded fields to drop
175 | resdroplist = ["ActivityBottomDepthHeightMeasure/MeasureUnitCode",
176 | "ActivityBottomDepthHeightMeasure/MeasureValue",
177 | "ActivityConductingOrganizationText", "ActivityEndDate", "ActivityEndTime/Time",
178 | "ActivityEndTime/TimeZoneCode", "ActivityMediaName", "ActivityStartTime/TimeZoneCode",
179 | "ActivityTopDepthHeightMeasure/MeasureUnitCode", "ActivityTopDepthHeightMeasure/MeasureValue",
180 | "HydrologicCondition", "HydrologicEvent", "PrecisionValue", "PreparationStartDate",
181 | "ProviderName",
182 | "ResultAnalyticalMethod/MethodIdentifierContext", "ResultDepthAltitudeReferencePointText",
183 | "ResultDepthHeightMeasure/MeasureUnitCode", "ResultDepthHeightMeasure/MeasureValue",
184 | "ResultParticleSizeBasisText", "ResultTemperatureBasisText",
185 | "ResultTimeBasisText", "ResultValueTypeName", "ResultWeightBasisText", "SampleAquifer",
186 | "SampleCollectionMethod/MethodIdentifierContext", "SampleTissueAnatomyName",
187 | "StatisticalBaseCode",
188 | "SubjectTaxonomicName", "SampleTime"]
189 |
190 | # Drop fields
191 | df1 = df1.drop(resdroplist, axis=1)
192 |
193 | # convert results and mdl to float
194 | df1['ResultValue'] = pd.to_numeric(df1['ResultValue'], errors='coerce')
195 | df1['MDL'] = pd.to_numeric(df1['MDL'], errors='coerce')
196 |
197 | # match old and new station ids
198 | df1['StationId'] = df1['StationId'].str.replace('_WQX-', '-')
199 |
200 | # standardize all ug/l data to mg/l
201 | df1.Unit = df1.Unit.apply(lambda x: str(x).rstrip(), 1)
202 | df1.ResultValue = df1[["ResultValue", "Unit"]].apply(
203 | lambda x: x[0] / 1000 if str(x[1]).lower() == "ug/l" else x[0], 1)
204 | df1.Unit = df1.Unit.apply(lambda x: self.unitfix(x), 1)
205 |
206 | df1['Param'], df1['ResultValue'], df1['Unit'] = zip(
207 | *df1[['Param', 'ResultValue', 'Unit']].apply(lambda x: self.parnorm(x), 1))
208 |
209 | #self.results = df1
210 |
211 | return df1
212 |
213 | def datetimefix(self, x, form):
214 | """This script cleans date-time errors
215 |
216 | :param x: date-time string
217 | :param form: format of date-time string
218 |
219 | :returns: formatted datetime type
220 | """
221 | d = str(x[0]).lstrip().rstrip()[0:10]
222 | t = str(x[1]).lstrip().rstrip()[0:5].zfill(5)
223 | try:
224 | int(d[0:2])
225 | except(ValueError, TypeError, NameError):
226 | return np.nan
227 | try:
228 | int(t[0:2])
229 | int(t[3:5])
230 | except(ValueError, TypeError, NameError):
231 | t = "00:00"
232 |
233 | if int(t[0:2]) > 23:
234 | t = "00:00"
235 | elif int(t[3:5]) > 59:
236 | t = "00:00"
237 | else:
238 | t = t[0:2].zfill(2) + ":" + t[3:5]
239 | return datetime.strptime(d + " " + t, form)
240 |
241 | def parnorm(self, x):
242 | """Standardizes nutrient species
243 |
244 | - Nitrate as N to Nitrate
245 | - Nitrite as N to Nitrite
246 | - Sulfate as s to Sulfate
247 | """
248 | p = str(x[0]).rstrip().lstrip().lower()
249 | u = str(x[2]).rstrip().lstrip().lower()
250 | if p == 'nitrate' and u == 'mg/l as n':
251 | return 'Nitrate', x[1] * 4.427, 'mg/l'
252 | elif p == 'nitrite' and u == 'mg/l as n':
253 | return 'Nitrite', x[1] * 3.285, 'mg/l'
254 | elif p == 'ammonia-nitrogen' or p == 'ammonia-nitrogen as n' or p == 'ammonia and ammonium':
255 | return 'Ammonium', x[1] * 1.288, 'mg/l'
256 | elif p == 'ammonium' and u == 'mg/l as n':
257 | return 'Ammonium', x[1] * 1.288, 'mg/l'
258 | elif p == 'sulfate as s':
259 | return 'Sulfate', x[1] * 2.996, 'mg/l'
260 | elif p in ('phosphate-phosphorus', 'phosphate-phosphorus as p', 'orthophosphate as p'):
261 | return 'Phosphate', x[1] * 3.066, 'mg/l'
262 | elif (p == 'phosphate' or p == 'orthophosphate') and u == 'mg/l as p':
263 | return 'Phosphate', x[1] * 3.066, 'mg/l'
264 | elif u == 'ug/l':
265 | return x[0], x[1] / 1000, 'mg/l'
266 | else:
267 | return x[0], x[1], str(x[2]).rstrip()
268 |
269 | def unitfix(self, x):
270 | """Standardizes unit labels from ug/l to mg/l
271 |
272 | :param x: unit label to convert
273 | :type x: str
274 |
275 | :returns: unit string as mg/l
276 | .. warning:: must be used with a value conversion tool
277 | """
278 | z = str(x).lower()
279 | if z == "ug/l":
280 | return "mg/l"
281 | elif z == "mg/l":
282 | return "mg/l"
283 | else:
284 | return x
285 |
286 | def massage_stations(self):
287 | """Massage WQP station data for analysis
288 | """
289 | StatFieldDict = {"MonitoringLocationIdentifier": "StationId", "AquiferName": "Aquifer",
290 | "AquiferTypeName": "AquiferType",
291 | "ConstructionDateText": "ConstDate", "CountyCode": "CountyCode",
292 | "WellDepthMeasure/MeasureValue": "Depth",
293 | "WellDepthMeasure/MeasureUnitCode": "DepthUnit", "VerticalMeasure/MeasureValue": "Elev",
294 | "VerticalAccuracyMeasure/MeasureValue": "ElevAcc",
295 | "VerticalAccuracyMeasure/MeasureUnitCode": "ElevAccUnit",
296 | "VerticalCollectionMethodName": "ElevMeth",
297 | "VerticalCoordinateReferenceSystemDatumName": "ElevRef",
298 | "VerticalMeasure/MeasureUnitCode": "ElevUnit", "FormationTypeText": "FmType",
299 | "WellHoleDepthMeasure/MeasureValue": "HoleDepth",
300 | "WellHoleDepthMeasure/MeasureUnitCode": "HoleDUnit",
301 | "HorizontalAccuracyMeasure/MeasureValue": "HorAcc",
302 | "HorizontalAccuracyMeasure/MeasureUnitCode": "HorAccUnit",
303 | "HorizontalCollectionMethodName": "HorCollMeth",
304 | "HorizontalCoordinateReferenceSystemDatumName": "HorRef",
305 | "HUCEightDigitCode": "HUC8", "LatitudeMeasure": "Lat_Y", "LongitudeMeasure": "Lon_X",
306 | "OrganizationIdentifier": "OrgId", "OrganizationFormalName": "OrgName",
307 | "StateCode": "StateCode",
308 | "MonitoringLocationDescriptionText": "StationComment", "MonitoringLocationName": "StationName",
309 | "MonitoringLocationTypeName": "StationType"}
310 |
311 | df = self.stations
312 | df.rename(columns=StatFieldDict, inplace=True)
313 |
314 | statdroplist = ["ContributingDrainageAreaMeasure/MeasureUnitCode",
315 | "ContributingDrainageAreaMeasure/MeasureValue",
316 | "DrainageAreaMeasure/MeasureUnitCode", "DrainageAreaMeasure/MeasureValue", "CountryCode",
317 | "ProviderName",
318 | "SourceMapScaleNumeric"]
319 |
320 | df.drop(statdroplist, inplace=True, axis=1)
321 |
322 | TypeDict = {"River/Stream": "Stream", "Stream: Canal": "Stream",
323 | "Well: Test hole not completed as a well": "Well"}
324 |
325 | # Make station types in the StationType field consistent for easier summary and compilation later on.
326 | df.StationType = df["StationType"].apply(lambda x: TypeDict.get(x, x), 1)
327 | df.Elev = df.Elev.apply(lambda x: np.nan if x == 0.0 else round(x, 1), 1)
328 |
329 | # Remove preceding WQX from StationId field to remove duplicate station data created by legacy database.
330 | df['StationId'] = df['StationId'].str.replace('_WQX-', '-')
331 | df.drop_duplicates(subset=['StationId'], inplace=True)
332 | #self.stations = df
333 | return df
334 |
335 | def piv_chem(self, results='', chems='piper'):
336 | """pivots results DataFrame for input into piper class
337 |
338 | :param results: DataFrame of results data from WQP; default is return from call of :class:`WQP`
339 | :param chems: set of chemistry that must be present to retain row; default are the major ions for a piper plot
340 | :return: pivoted table of result values
341 |
342 | .. warnings:: this method drops < and > signs from values; do not use it for statistics
343 | """
344 |
345 | if results == '':
346 | results = self.results
347 |
348 | ParAbb = {"Alkalinity": "Alk", "Alkalinity, Carbonate as CaCO3": "Alk", "Alkalinity, total": "Alk",
349 | "Arsenic": "As", "Calcium": "Ca", "Chloride": "Cl", "Carbon dioxide": "CO2", "Carbonate": "CO3",
350 | "Carbonate (CO3)": "CO3", "Specific conductance": "Cond", "Conductivity": "Cond", "Copper": "Cu",
351 | "Depth": "Depth", "Dissolved oxygen (DO)": "DO", "Iron": "Fe",
352 | "Hardness, Ca, Mg": "Hard", "Total hardness -- SDWA NPDWR": "Hard",
353 | "Bicarbonate": "HCO3", "Potassium": "K", "Magnesium": "Mg", "Kjeldahl nitrogen": "N",
354 | "Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)": "N", "Nitrogen": "N", "Sodium": "Na",
355 | "Sodium plus potassium": "NaK", "Ammonia-nitrogen": "NH3_N", "Ammonia-nitrogen as N": "N",
356 | "Nitrite": "NO2",
357 | "Nitrate": "NO3", "Nitrate as N": "N", "pH, lab": "pH", "pH": "pH", "Phosphate-phosphorus": "PO4",
358 | "Orthophosphate": "PO4", "Phosphate": "PO4", "Stream flow, instantaneous": "Q", "Flow": "Q",
359 | "Flow rate, instantaneous": "Q", "Silica": "Si", "Sulfate": "SO4", "Sulfate as SO4": "SO4",
360 | "Boron": "B", "Barium": "Ba", "Bromine": "Br", "Lithium": "Li", "Manganese": "Mn", "Strontium": "Sr",
361 | "Total dissolved solids": "TDS", "Temperature, water": "Temp",
362 | "Total Organic Carbon": "TOC", "delta Dueterium": "d2H", "delta Oxygen 18": "d18O",
363 | "delta Carbon 13 from Bicarbonate": "d13CHCO3", "delta Oxygen 18 from Bicarbonate": "d18OHCO3",
364 | "Total suspended solids": "TSS", "Turbidity": "Turb"}
365 |
366 | results['ParAbb'] = results['Param'].apply(lambda x: ParAbb.get(x, ''), 1)
367 | results.dropna(subset=['SampleId'], how='any', inplace=True)
368 | results = results[pd.isnull(results['DetectCond'])]
369 | results.drop_duplicates(subset=['SampleId', 'ParAbb'], inplace=True)
370 | datap = results.pivot(index='SampleId', columns='ParAbb', values='ResultValue')
371 | if chems == '':
372 | pass
373 | elif chems == 'piper':
374 | datap.dropna(subset=['SO4', 'Cl', 'Ca', 'HCO3', 'pH'], how='any', inplace=True)
375 | else:
376 | datap.dropna(subset=chems, how='any', inplace=True)
377 | return datap
378 |
--------------------------------------------------------------------------------
/wellapplication/usgs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Sun Jan 3 00:30:36 2016
4 |
5 | @author: p
6 | """
7 | from __future__ import absolute_import, division, print_function, unicode_literals
8 | import pandas as pd
9 | from datetime import datetime
10 | from pylab import rcParams
11 |
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | import requests
15 |
16 |
17 | class nwisError(Exception):
18 | def __init__(self, error_message):
19 | self.error_message = error_message
20 |
21 | def __str__(self):
22 | r""" This just returns one of the error messages listed in the checkresponse() function"""
23 | return repr(self.error_message)
24 |
25 |
26 | class nwis(object):
27 | """Class to quickly download NWIS data using NWIS_ services
28 | .. _NWIS: https://waterservices.usgs.gov/
29 |
30 | :param service: name of web service to use; options are daily values ('dv'), instantaneous values ('iv'),
31 | site ('site'), and groundwater levels ('gwlevels')
32 | :param values: values for REST query; valid site is '01646500'; valid huc is '02070010'; valid bBox is
33 | '-83.000000,36.500000,-81.000000,38.500000'
34 | :param loc_type: filter type; valid values are 'huc', 'bBox', 'sites', and 'countyCd';
35 | see https://waterservices.usgs.gov/rest/IV-Service.html#MajorFilters for details
36 | :param **kwargs: other query parameters; optional
37 |
38 | """
39 | def __init__(self, service, values, loc_type, **kwargs):
40 | r""" Instantiates an instance of nwis"""
41 | self.service = service
42 | self.loc_type = loc_type
43 | self.values = self.parsesitelist(values)
44 | self.header = {'Accept-encoding': 'gzip'}
45 | self.url = 'https://waterservices.usgs.gov/nwis/'
46 | self.geo_criteria = ['sites', 'stateCd', 'huc', 'countyCd', 'bBox']
47 | self.out_format = 'json'
48 | self.start_date = '1800-01-01'
49 | self.input = kwargs
50 | self.end_date = str(datetime.today().year) + '-' + str(datetime.today().month).zfill(2) + '-' + str(
51 | datetime.today().day).zfill(2)
52 | self.sites, self.data = self.get_nwis(**kwargs)
53 |
54 | @staticmethod
55 | def _checkresponse(response):
56 | r""" Returns the data requested by the other methods assuming the response from the API is ok. If not, provides
57 | error handling for all possible API errors. HTTP errors are handled in the get_response() function.
58 |
59 | :param response: The response from the API as a dictionary if the API code is 200.
60 |
61 | :returns: The response from the API as a dictionary if the API code is 200.
62 |
63 | .. raises:: nwisError; Gives different response messages depending on returned code from API.
64 | .. notes:: https://waterservices.usgs.gov/docs/portable_code.html
65 | """
66 |
67 | if response.status_code == 200:
68 | print('connection successful')
69 | return response
70 | elif response.status_code == 403:
71 | raise nwisError('The USGS has blocked your Internet Protocol (IP) address')
72 | elif response.status_code == 400:
73 | raise nwisError('URL arguments are inconsistent')
74 | elif response.status_code == 404:
75 | raise nwisError('The query expresses a combination of elements where data do not exist.')
76 | elif response.status_code == 500:
77 | raise nwisError('There is a problem with the web service')
78 | elif response.status_code == 503:
79 | raise nwisError('This application is down at the moment')
80 | else:
81 | raise nwisError('Something went wrong.')
82 |
83 | def get_response(self, **kwargs):
84 | """ Returns a dictionary of data requested by each function.
85 |
86 | :returns: response - A dictionary that has been dumped from JSON. '01585200'
87 |
88 | .. raises:: nwisError - Overrides the exceptions given in the requests library to give more custom error messages.
89 | Connection_error occurs if no internet connection exists. Timeout_error occurs if the request takes too
90 | long and redirect_error is shown if the url is formatted incorrectly.
91 | """
92 | http_error = 'Could not connect to the API. This could be because you have no internet connection, a parameter' \
93 | ' was input incorrectly, or the API is currently down. Please try again.'
94 |
95 | kwargs[self.loc_type] = self.values
96 | kwargs['format'] = self.out_format
97 |
98 | if 'startDT' not in kwargs:
99 | kwargs['startDT'] = self.start_date
100 | if 'endDT' not in kwargs:
101 | kwargs['endDT'] = self.end_date
102 |
103 | total_url = self.url + self.service + '/?'
104 | response_ob = requests.get(total_url, params=kwargs, headers=self.header)
105 | if self.service != 'site':
106 | try:
107 | response_ob.json()
108 | except:
109 | raise nwisError("Could not decode response from {:} ".format(response_ob.url))
110 |
111 | return self._checkresponse(response_ob)
112 |
113 | def get_nwis(self, **kwargs):
114 | jsn_dict = self.get_response(**kwargs)
115 | nwis_dict = jsn_dict.json()
116 | # dictionary from json object; each value in this dictionary is a station timeseries
117 | dt = nwis_dict['value']['timeSeries']
118 |
119 | station_id, lat, lon, srs, station_type, station_nm = [], [], [], [], [], []
120 | f = {}
121 | for i in range(len(dt)):
122 | station_id.append(dt[i]['sourceInfo']['siteCode'][0]['value'])
123 | lat.append(dt[i]['sourceInfo']['geoLocation'][u'geogLocation']['latitude'])
124 | lon.append(dt[i]['sourceInfo']['geoLocation'][u'geogLocation']['longitude'])
125 | srs.append(dt[i]['sourceInfo']['geoLocation'][u'geogLocation']['srs'])
126 | station_type.append(dt[i]['sourceInfo']['siteProperty'][0]['value'])
127 | station_nm.append(dt[i]['sourceInfo'][u'siteName'])
128 |
129 | df = pd.DataFrame(dt[i]['values'][0]['value'])
130 | if 'dateTime' in df.columns and 'Gage height, feet' not in dt[i]['variable']['variableDescription']:
131 | df.index = pd.to_datetime(df.pop('dateTime'))
132 | df.value = df.value.astype(float)
133 | df.value = df.value.where(df.value > -999, np.nan)
134 | df.index.name = 'datetime'
135 |
136 | f[dt[i]['sourceInfo']['siteCode'][0]['value']] = df
137 | else:
138 | print(dt[i]['variable']['variableDescription'] + " skipped!")
139 | pass
140 |
141 | stat_dict = {'site_no': station_id, 'dec_lat_va': lat, 'dec_long_va': lon, 'dec_coord_datum_cd': srs,
142 | 'station_nm': station_nm, 'data_type_cd': station_type}
143 | stations = pd.DataFrame(stat_dict)
144 | if len(dt) > 1 and len(f) >= 1:
145 | data = pd.concat(f)
146 | data.index.set_names('site_no', level=0, inplace=True)
147 | elif len(dt) == 1 and len(f) >= 1:
148 | data = f[dt[0]['sourceInfo']['siteCode'][0]['value']]
149 | data['site_no'] = dt[0]['sourceInfo']['siteCode'][0]['value']
150 | else:
151 | data = None
152 | print('No Data!')
153 | return stations, data
154 |
155 | def parsesitelist(self, values):
156 | """Takes a list and turns it into a string format that can be used in the html REST format
157 |
158 | :param values:
159 | :param type: list
160 | :returns: sitno (str); string with commas separating values
161 |
162 | :Example:
163 | >>>parsesitelist([123,576,241])
164 | '123,576,241'
165 | """
166 | siteno = str(values).replace(" ", "")
167 | siteno = siteno.replace("]", "")
168 | siteno = siteno.replace("[", "")
169 | siteno = siteno.replace("','", ",")
170 | siteno = siteno.replace("'", "")
171 | siteno = siteno.replace('"', "")
172 | return siteno
173 |
174 | def get_info(self, **kwargs):
175 | """Downloads data from usgs service as text file; converted to Pandas DataFrame.
176 |
177 | :param kwargs: response of request
178 | :type kwargs: str
179 |
180 | .. returns:: df; Pandas DataFrame containing data downloaded from USGS
181 | """
182 | self.service = 'site'
183 | self.out_format = 'rdb'
184 | kwargs['siteOutput'] = 'expanded'
185 | resp = self.get_response(**kwargs)
186 | print(resp.url)
187 | linefile = resp.iter_lines()
188 | numlist = []
189 | num = 0
190 | for line in linefile:
191 | if line.startswith(b"#"):
192 | numlist.append(num)
193 | num += 1
194 | numlist.append(numlist[-1] + 2)
195 | df = pd.read_table(resp.url, sep="\t", skiprows=numlist)
196 | return df
197 |
198 | @staticmethod
199 | def get_first_string(lst):
200 | """Function to get the first string from each list"""
201 | return lst[0] if isinstance(lst, list) and lst and all(isinstance(item, str) for item in lst) else None
202 |
203 | def cleanGWL(self, df, colm='qualifiers',inplace=False):
204 | """Drops water level data of suspect quality based on lev_status_cd
205 |
206 | :param df: (pandas dataframe) groundwater dataframe
207 | :param colm: column to parse; defaults to 'qualifiers'
208 |
209 | :type colm: str
210 | :returns: sitno (str) - subset of input dataframe as new dataframe
211 | """
212 | if inplace:
213 | data = df
214 | else:
215 | data = df.copy(deep=True)
216 | data[colm] = data[colm].apply(get_first_string)
217 | CleanData = data[~data[colm].isin(['Z', 'R', 'V', 'P', 'O', 'F', 'W', 'G', 'S', 'C', 'E', 'N'])]
218 | return CleanData
219 |
220 | def my_agg(self, x):
221 |
222 | names = {
223 | 'mean': x[self.avgtype].mean(numeric_only=True),
224 | 'std': x[self.avgtype].std(numeric_only=True),
225 | 'min': x[self.avgtype].min(numeric_only=True),
226 | 'max': x[self.avgtype].max(numeric_only=True),
227 | 'median': x[self.avgtype].median(numeric_only=True),
228 | 'cnt': (np.count_nonzero(~np.isnan(x[self.avgtype]))),
229 | 'err_pls': (np.mean(x[self.avgtype]) + (np.std(x[self.avgtype]) * 1.96)),
230 | 'err_min': (np.mean(x[self.avgtype]) - (np.std(x[self.avgtype]) * 1.96))
231 | #'5 percent': np.percentile(x[self.avgtype], 5),
232 | #'95 percent': np.percentile(x[self.avgtype], 95)
233 | }
234 |
235 | return pd.Series(names, index=list(names.keys()))
236 |
237 | def avg_wl(self, numObs=50, avgtype='stdWL', grptype='bytime', grper='12ME'):
238 | """Calculates standardized statistics for a list of stations or a huc from the USGS
239 | avgDiffWL = average difference from mean WL for each station
240 |
241 |
242 |
243 | :param numObs: minimum observations per site required to include site in analysis; default is 50
244 | :param avgtype: averaging technique for site data; options are 'avgDiffWL','stdWL','cdm','avgDiff_dWL', and 'std_dWWL'; default is 'stWL'
245 | :param grptype: way to group the averaged data; options are 'bytime' or 'monthly' or user input; default 'bytime'
246 | :param grper: only used if 'bytime' called; defaults to '12M'; other times can be put in
247 | :return:
248 | """
249 | self.avgtype = avgtype
250 | data = self.cleanGWL(self.data)
251 | # stationWL = pd.merge(siteinfo, data, on = 'site_no')
252 | data.reset_index(inplace=True)
253 | data.set_index(['datetime'], inplace=True)
254 | # get averages by year, month, and site number
255 | site_size = data.groupby('site_no').size()
256 | wl_long = data[data['site_no'].isin(list(site_size[site_size >= numObs].index.values))]
257 | # eliminate any duplicate site numbers
258 | siteList = list(wl_long.site_no.unique())
259 | for site in siteList:
260 | mean = wl_long.loc[wl_long.site_no == site, 'value'].mean()
261 | std = wl_long.loc[wl_long.site_no == site, 'value'].std()
262 | meandiff = wl_long.loc[wl_long.site_no == site, 'value'].diff().mean()
263 | stddiff = wl_long.loc[wl_long.site_no == site, 'value'].diff().std()
264 | wl_long.loc[wl_long.site_no == site, 'diff'] = wl_long.loc[wl_long.site_no == site, 'value'].diff()
265 | wl_long.loc[wl_long.site_no == site, 'avgDiffWL'] = wl_long.loc[wl_long.site_no == site, 'value'] - mean
266 | wl_long.loc[wl_long.site_no == site, 'stdWL'] = wl_long.loc[wl_long.site_no == site, 'avgDiffWL'] / std
267 | wl_long.loc[wl_long.site_no == site, 'cdm'] = wl_long.loc[wl_long.site_no == site, 'avgDiffWL'].cumsum()
268 | wl_long.loc[wl_long.site_no == site, 'avgDiff_dWL'] = wl_long.loc[
269 | wl_long.site_no == site, 'diff'] - meandiff
270 | wl_long.loc[wl_long.site_no == site, 'std_dWL'] = wl_long.loc[
271 | wl_long.site_no == site, 'avgDiff_dWL'] / stddiff
272 |
273 | if grptype == 'bytime':
274 | grp = pd.Grouper(freq=grper)
275 | elif grptype == 'monthly':
276 | grp = wl_long.index.month
277 | else:
278 | grp = grptype
279 |
280 | # this statement reduces bias from one station
281 | wllong = wl_long.groupby(['site_no',grp]).mean(numeric_only=True)
282 | wllong.index = wllong.index.droplevel(level=0)
283 | # this statement gets the statistics
284 | wl_stats = wllong.groupby([grp]).apply(self.my_agg)
285 |
286 | self.wl_stats = wl_stats
287 |
288 | return wl_stats
289 |
290 | def pltavgwl(self, maxdate = [0,0,0], mindate=[1950,1,1],):
291 |
292 | if maxdate[0] == 0:
293 | maxdate = [datetime.today().year,1,1]
294 |
295 | grpd = self.wl_stats
296 | x2 = grpd.index
297 | y3 = grpd['mean']
298 | y2 = grpd['median']
299 |
300 | fig = plt.figure()
301 | ax = fig.add_subplot(111)
302 |
303 | rcParams['figure.figsize'] = 15, 10
304 | rcParams['legend.numpoints'] = 1
305 | plt.plot(x2, y3, '+-', color='green', label='Median')
306 | ax.plot(x2, y2, '+-', color='red', label='Average')
307 | ax.fill_between(x2, grpd['err_min'], grpd['err_pls'], alpha=0.2, label='2 Standard Deviations', linewidth=0)
308 |
309 | ax.set_ylabel(self.avgtype, color='red')
310 | ax.invert_yaxis()
311 | ax.grid()
312 | ax2 = ax.twinx()
313 | ax2.plot(x2, grpd['cnt'], label='Number of Wells Observed')
314 | ax2.set_ylim(0, int(grpd['cnt'].max()) * 3)
315 | ax2.set_yticks(range(0, int(grpd['cnt'].max()), int(grpd['cnt'].max() / 10)))
316 | ax2.set_ylabel('Number of Wells Observed', color='blue')
317 | ax2.yaxis.set_label_coords(1.03, 0.2)
318 | ax.set_xlim(datetime(*mindate), datetime(*maxdate))
319 | date_range = pd.date_range('{:}-{:}-{:}'.format(*mindate), '{:}-{:}-{:}'.format(*maxdate), freq='36ME')
320 | date_range = date_range.map(lambda t: t.strftime('%Y-%m-%d'))
321 | ax.set_xticks(date_range)
322 | ax.set_xlabel('date')
323 | # ask matplotlib for the plotted objects and their labels
324 | lines, labels = ax.get_legend_handles_labels()
325 | lines2, labels2 = ax2.get_legend_handles_labels()
326 | ax2.legend(lines + lines2, labels + labels2, loc=0)
327 |
328 | return fig,ax,ax2
329 |
330 | def xcheck(self, x):
331 | """Converts empty list to empty string and filled list into string of first value"""
332 | if type(x) == list:
333 | if len(x) == 0:
334 | return ''
335 | else:
336 | return str(x[0])
337 | else:
338 | return x
339 |
340 | def nwis_heat_map(self):
341 | from scipy.interpolate import griddata
342 | import matplotlib.cm as cm
343 | import matplotlib as mpl
344 |
345 | meth = 'linear' # 'nearest'
346 |
347 | data = self.data
348 |
349 | if isinstance(data.index, pd.core.index.MultiIndex):
350 | data.index = data.index.droplevel(0)
351 |
352 | x = data.index.dayofyear
353 | y = data.index.year
354 | z = data.value.values
355 |
356 | xi = np.linspace(x.min(), x.max(), 1000)
357 | yi = np.linspace(y.min(), y.max(), 1000)
358 | zi = griddata((x, y), z, (xi[None, :], yi[:, None]), method=meth)
359 |
360 | cmap = plt.cm.get_cmap('RdYlBu')
361 | norm = mpl.colors.Normalize(vmin=z.min(), vmax=z.max())
362 | #norm = mpl.colors.LogNorm(vmin=0.1, vmax=100000)
363 | m = cm.ScalarMappable(norm=norm, cmap=cmap)
364 | m.set_array(z)
365 |
366 | br = plt.contourf(xi, yi, zi, color=m.to_rgba(z), cmap=cmap)
367 | # setup the colorbar
368 |
369 |
370 | cbar = plt.colorbar(m)
371 | cbar.set_label('Discharge (cfs)')
372 |
373 | plt.xlabel('Month')
374 | plt.ylabel('Year')
375 | plt.yticks(range(y.min(), y.max()))
376 |
377 | mons = {'Apr': 90.25, 'Aug': 212.25, 'Dec': 334.25, 'Feb': 31, 'Jan': 1, 'Jul': 181.25, 'Jun': 151.25,
378 | 'Mar': 59.25, 'May': 120.25,
379 | 'Nov': 304.25, 'Oct': 273.25, 'Sep': 243.25}
380 | monnms = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
381 |
382 | plt.title(self.sites.station_nm[0].title())
383 | tickplc = []
384 | plt.xticks([mons[i] for i in monnms], monnms)
385 | plt.grid()
386 |
387 | def get_elev(x, units='Meters'):
388 | """Uses USGS elevation service to retrieve elevation
389 | :param x: longitude and latitude of point where elevation is desired
390 | :type x: list
391 | :param units: units for returned value; defaults to Meters; options are 'Meters' or 'Feet'
392 | :type units: str
393 |
394 | :returns: ned float elevation of location in meters
395 |
396 | :Example:
397 | >>> get_elev([-111.21,41.4])
398 | 1951.99
399 | """
400 |
401 | values = {
402 | 'x': x[0],
403 | 'y': x[1],
404 | 'units': units,
405 | 'output': 'json'
406 | }
407 |
408 | elev_url = 'https://nationalmap.gov/epqs/pqs.php?'
409 |
410 | attempts = 0
411 | while attempts < 4:
412 | try:
413 | response = requests.get(elev_url, params=values).json()
414 | g = float(response['USGS_Elevation_Point_Query_Service']['Elevation_Query']['Elevation'])
415 | break
416 | except:
417 | print("Connection attempt {:} of 3 failed.".format(attempts))
418 | attempts += 1
419 | g = 0
420 | return g
421 |
422 | def get_huc(x):
423 | """Receive the content of ``url``, parse it as JSON and return the object.
424 |
425 | :param x: [longitude, latitude]
426 |
427 | :returns: HUC12, HUC12_Name - 12 digit hydrologic unit code of location and the name associated with that code
428 | """
429 | values = {
430 | 'geometry': '{:},{:}'.format(x[0], x[1]),
431 | 'geometryType': 'esriGeometryPoint',
432 | 'inSR': '4326',
433 | 'spatialRel': 'esriSpatialRelIntersects',
434 | 'returnGeometry': 'false',
435 | 'outFields': 'huc12,name',
436 | 'returnDistinctValues': 'true',
437 | 'f': 'pjson'}
438 |
439 | huc_url = 'https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer/6/query?'
440 | # huc_url = 'https://services.nationalmap.gov/arcgis/rest/services/USGSHydroNHDLarge/MapServer/10/query?'
441 | # huc_url2 = 'https://services.nationalmap.gov/arcgis/rest/services/nhd/mapserver/8/query?'
442 | response = requests.get(huc_url, params=values).json()
443 | return response['features'][0]['attributes']['huc12'], response['features'][0]['attributes']['name']
444 |
445 | def get_fips(x):
446 | """Receive the content of ``url``, parse it as JSON and return the object.
447 | :param x: [longitude, latitude]
448 | :returns: tuple containing five digit county fips and county name
449 | """
450 | values = {
451 | 'latitude': '{:}'.format(x[1]),
452 | 'longitude': '{:}'.format(x[0]),
453 | 'showall': 'true',
454 | 'format': 'json'}
455 |
456 | huc_url = "http://data.fcc.gov/api/block/find?"
457 | response = requests.get(huc_url, params=values).json()
458 | return response['County']['FIPS'], response['County']['name']
459 |
460 | def USGSID(x):
461 | """Parses decimal latitude and longitude values into DDMMSSDDDMMSS01 USGS site id.
462 | See https://help.waterdata.usgs.gov/faq/sites/do-station-numbers-have-any-particular-meaning for documentation.
463 |
464 | :param x: [longitude,latitude]
465 | :type x: str
466 | :returns: USGS-style site id (groundwater) DDMMSSDDDMMSS01
467 | """
468 | return dms(x[1]) + dms(x[0]) + '01'
469 |
470 | def dms(dec):
471 | """converts decimal degree coordinates to a usgs station id
472 | :param dec: latitude or longitude value in decimal degrees
473 | :return: usgs id value
474 |
475 | .. note:: https://help.waterdata.usgs.gov/faq/sites/do-station-numbers-have-any-particular-meaning
476 | """
477 | DD = str(int(abs(dec)))
478 | MM = str(int((abs(dec) - int(DD)) * 60)).zfill(2)
479 | SS = str(int(round((((abs(dec) - int(DD)) * 60) - int(MM)) * 60, 0))).zfill(2)
480 | if SS == '60':
481 | MM = str(int(MM) + 1)
482 | SS = '00'
483 | if MM == '60':
484 | DD = str(int(DD) + 1)
485 | MM = '00'
486 | return DD + MM + SS
487 |
488 |
489 | def get_recess(df, Q, freq='1D', inplace=False):
490 | """ Select the data when values are decreasing compared to previous time step
491 |
492 | :param df: DataFrame of hydro data
493 | :param Q: DataFrame field with discharge or water level data
494 | :param freq: Frequency of measurement of data; default is 1D
495 | :param inplace: If True, replace input DataFrame; default is false
496 | :return: DataFrame of all of the decreasing segments of the input DataFrame
497 |
498 | .. note:: from https://github.com/stijnvanhoey/hydropy
499 | """
500 | recess = df[Q].diff() < 0.0
501 | if inplace:
502 | df = df
503 | else:
504 | df = df[recess].copy()
505 | df = df.resample(freq).mean()
506 | return df
507 |
508 |
509 | def RB_Flashiness(series):
510 | """Richards-Baker Flashiness Index for a series of daily mean discharges.
511 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py
512 | """
513 | Qsum = np.sum(series) # sum of daily mean discharges
514 | Qpath = 0.0
515 | for i in range(len(series)):
516 | if i == 0:
517 | Qpath = series[i] # first entry only
518 | else:
519 | Qpath += np.abs(series[i] - series[i-1]) # sum the absolute differences of the mean discharges
520 | return Qpath/Qsum
521 |
522 |
523 | def flow_duration(series):
524 | """Creates the flow duration curve for a discharge dataset. Returns a pandas
525 | series whose index is the discharge values and series is exceedance probability.
526 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py
527 | """
528 | fd = pd.Series(series).value_counts() # frequency of unique values
529 | fd.sort_index(inplace=True) # sort in order of increasing discharges
530 | fd = fd.cumsum() # cumulative sum of frequencies
531 | fd = fd.apply(lambda x: 100 - x/fd.max() * 100) # normalize
532 | return fd
533 |
534 | def Lyne_Hollick(series, alpha=.925, direction='f'):
535 | """Recursive digital filter for baseflow separation. Based on Lyne and Hollick, 1979.
536 | series = array of discharge measurements
537 | alpha = filter parameter
538 | direction = (f)orward or (r)everse calculation
539 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py
540 | """
541 | series = np.array(series)
542 | f = np.zeros(len(series))
543 | if direction == 'f':
544 | for t in np.arange(1,len(series)):
545 | f[t] = alpha * f[t-1] + (1 + alpha)/2 * (series[t] - series[t-1])
546 | if series[t] - f[t] > series[t]:
547 | f[t] = 0
548 | elif direction == 'r':
549 | for t in np.arange(len(series)-2, 1, -1):
550 | f[t] = alpha * f[t+1] + (1 + alpha)/2 * (series[t] - series[t+1])
551 | if series[t] - f[t] > series[t]:
552 | f[t] = 0
553 | return np.array(series - f)
554 |
555 | def Eckhardt(series, alpha=.98, BFI=.80):
556 | """Recursive digital filter for baseflow separation. Based on Eckhardt, 2004.
557 | series = array of discharge measurements
558 | alpha = filter parameter
559 | BFI = BFI_max (maximum baseflow index)
560 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py
561 | """
562 | series = np.array(series)
563 | f = np.zeros(len(series))
564 | f[0] = series[0]
565 | for t in np.arange(1,len(series)):
566 | f[t] = ((1 - BFI) * alpha * f[t-1] + (1 - alpha) * BFI * series[t]) / (1 - alpha * BFI)
567 | if f[t] > series[t]:
568 | f[t] = series[t]
569 | return f
570 |
571 | def ratingCurve(discharge, stage):
572 | """Computes rating curve based on discharge measurements coupled with stage
573 | readings.
574 | discharge = array of measured discharges;
575 | stage = array of corresponding stage readings;
576 | Returns coefficients a, b for the rating curve in the form y = a * x**b
577 | https://github.com/hydrogeog/hydro/blob/master/hydro/core.py
578 | """
579 | from scipy.optimize import curve_fit
580 |
581 | exp_curve = lambda x, a, b: (a * x ** b)
582 | popt, pcov = curve_fit(exp_curve, stage, discharge)
583 |
584 |
585 | a = 0.0
586 | b = 0.0
587 |
588 | for i, j in zip(discharge, stage):
589 | a += (i - exp_curve(j, popt[0], popt[1]))**2
590 | b += (i - np.mean(discharge))**2
591 | r_squ = 1 - a / b
592 |
593 |
594 | return popt, r_squ
595 |
--------------------------------------------------------------------------------
/wellapplication/mesopy.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function, unicode_literals
2 |
3 | # ==================================================================================================================== #
4 | # MesoPy #
5 | # Version: 2.0.0 #
6 | # Copyright (c) 2015 MesoWest Developers #
7 | # #
8 | # LICENSE: #
9 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated #
10 | # documentation files (the "Software"), to deal in the Software without restriction, including without limitation the #
11 | # rights to use,copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to #
12 | # permit persons to whom the Software is furnished to do so, subject to the following conditions: #
13 | # #
14 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the #
15 | # Software. #
16 | # ==================================================================================================================== #
17 |
18 | try:
19 | import urllib.parse
20 | import urllib.request
21 | import urllib.error
22 | except ImportError:
23 | import urllib2
24 | import urllib
25 |
26 | import json
27 |
28 |
29 | # ==================================================================================================================== #
30 | # MesoPyError class #
31 | # Type: Exception #
32 | # Description: This class is simply the means for error handling when an exception is raised. #
33 | # ==================================================================================================================== #
34 |
35 |
36 | class MesoPyError(Exception):
37 | def __init__(self, error_message):
38 | self.error_message = error_message
39 |
40 | def __str__(self):
41 | r""" This just returns one of the error messages listed in the checkresponse() function"""
42 | return repr(self.error_message)
43 |
44 |
45 | # ==================================================================================================================== #
46 | # Meso class #
47 | # Type: Main #
48 | # Description: This class defines an instance of MesoPy and takes in the user's token #
49 | # ==================================================================================================================== #
50 |
51 |
52 | class Meso(object):
53 | def __init__(self, token):
54 | r""" Instantiates an instance of MesoPy.
55 |
56 | Arguments:
57 | ----------
58 | token: string, mandatory
59 | Your API token that authenticates you for requests against MesoWest.mes
60 |
61 | Returns:
62 | --------
63 | None.
64 |
65 | Raises:
66 | -------
67 | None.
68 | """
69 |
70 | self.base_url = 'http://api.mesowest.net/v2/'
71 | self.token = token
72 | self.geo_criteria = ['stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 'nwsfirezone', 'gacc',
73 | 'subgacc']
74 |
75 | # ================================================================================================================ #
76 | # Functions: #
77 | # ================================================================================================================ #
78 |
79 | @staticmethod
80 | def _checkresponse(response):
81 | r""" Returns the data requested by the other methods assuming the response from the API is ok. If not, provides
82 | error handling for all possible API errors. HTTP errors are handled in the get_response() function.
83 |
84 | Arguments:
85 | ----------
86 | None.
87 |
88 | Returns:
89 | --------
90 | The response from the API as a dictionary if the API code is 2.
91 |
92 | Raises:
93 | -------
94 | MesoPyError: Gives different response messages depending on returned code from API. If the response is 2,
95 | resultsError is displayed. For a response of 200, an authError message is shown. A ruleError is displayed
96 | if the code is 400, a formatError for -1, and catchError for any other invalid response.
97 |
98 | """
99 |
100 | results_error = 'No results were found matching your query'
101 | auth_error = 'The token or API key is not valid, please contact Josh Clark at joshua.m.clark@utah.edu to ' \
102 | 'resolve this'
103 | rule_error = 'This request violates a rule of the API. Please check the guidelines for formatting a data ' \
104 | 'request and try again'
105 | catch_error = 'Something went wrong. Check all your calls and try again'
106 |
107 | if response['SUMMARY']['RESPONSE_CODE'] == 1:
108 | return response
109 | elif response['SUMMARY']['RESPONSE_CODE'] == 2:
110 | raise MesoPyError(results_error)
111 | elif response['SUMMARY']['RESPONSE_CODE'] == 200:
112 | raise MesoPyError(auth_error)
113 | elif response['SUMMARY']['RESPONSE_CODE'] == 400:
114 | raise MesoPyError(rule_error)
115 | elif response['SUMMARY']['RESPONSE_CODE'] == -1:
116 | format_error = response['SUMMARY']['RESPONSE_MESSAGE']
117 | raise MesoPyError(format_error)
118 | else:
119 | raise MesoPyError(catch_error)
120 |
121 | def _get_response(self, endpoint, request_dict):
122 | """ Returns a dictionary of data requested by each function.
123 |
124 | Arguments:
125 | ----------
126 | endpoint: string, mandatory
127 | Set in all other methods, this is the API endpoint specific to each function.
128 | request_dict: string, mandatory
129 | A dictionary of parameters that are formatted into the API call.
130 |
131 | Returns:
132 | --------
133 | response: A dictionary that has been dumped from JSON.
134 |
135 | Raises:
136 | -------
137 | MesoPyError: Overrides the exceptions given in the requests library to give more custom error messages.
138 | Connection_error occurs if no internet connection exists. Timeout_error occurs if the request takes too
139 | long and redirect_error is shown if the url is formatted incorrectly.
140 |
141 | """
142 | http_error = 'Could not connect to the API. This could be because you have no internet connection, a parameter' \
143 | ' was input incorrectly, or the API is currently down. Please try again.'
144 | # For python 3.4
145 | try:
146 | qsp = urllib.parse.urlencode(request_dict, doseq=True)
147 | resp = urllib.request.urlopen(self.base_url + endpoint + '?' + qsp).read()
148 |
149 | # For python 2.7
150 | except AttributeError or NameError:
151 | try:
152 | qsp = urllib.urlencode(request_dict, doseq=True)
153 | resp = urllib2.urlopen(self.base_url + endpoint + '?' + qsp).read()
154 | except urllib2.URLError:
155 | raise MesoPyError(http_error)
156 | except urllib.error.URLError:
157 | raise MesoPyError(http_error)
158 | return self._checkresponse(json.loads(resp.decode('utf-8')))
159 |
160 | def _check_geo_param(self, arg_list):
161 | r""" Checks each function call to make sure that the user has provided at least one of the following geographic
162 | parameters: 'stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 'nwsfirezone', 'gacc', or 'subgacc'.
163 |
164 | Arguments:
165 | ----------
166 | arg_list: list, mandatory
167 | A list of kwargs from other functions.
168 |
169 | Returns:
170 | --------
171 | None.
172 |
173 | Raises:
174 | -------
175 | MesoPyError if no geographic search criteria is provided.
176 |
177 | """
178 |
179 | geo_func = lambda a, b: any(i in b for i in a)
180 | check = geo_func(self.geo_criteria, arg_list)
181 | if check is False:
182 | raise MesoPyError('No stations or geographic search criteria specified. Please provide one of the '
183 | 'following: stid, state, county, country, radius, bbox, cwa, nwsfirezone, gacc, subgacc')
184 |
185 |
186 | def precip(self, start, end, **kwargs):
187 | r""" Returns precipitation observations at a user specified location for a specified time. Users must specify at
188 | least one geographic search parameter ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa',
189 | 'nwsfirezone', 'gacc', or 'subgacc') to obtain observation data. Other parameters may also be included. See
190 | below mandatory and optional parameters. Also see the metadata() function for station IDs.
191 |
192 | Arguments:
193 | ----------
194 | start: string, mandatory
195 | Start date in form of YYYYMMDDhhmm. MUST BE USED WITH THE END PARAMETER. Default time is UTC
196 | e.g., start='201306011800'
197 | end: string, mandatory
198 | End date in form of YYYYMMDDhhmm. MUST BE USED WITH THE START PARAMETER. Default time is UTC
199 | e.g., end='201306011800'
200 | obtimezone: string, optional
201 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local'
202 | showemptystations: string, optional
203 | Set to '1' to show stations even if no obs exist that match the time period. Stations without obs are
204 | omitted by default.
205 | stid: string, optional
206 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb'
207 | county: string, optional
208 | County/parish/borough (US/Canada only), full name e.g. county='Larimer'
209 | state: string, optional
210 | US state, 2-letter ID e.g. state='CO'
211 | country: string, optional
212 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx'
213 | radius: list, optional
214 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20"
215 | bbox: list, optional
216 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41"
217 | cwa: string, optional
218 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX'
219 | nwsfirezone: string, optional
220 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile
221 | containing the full list of zones. e.g. nwsfirezone='LOX241'
222 | gacc: string, optional
223 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs.
224 | subgacc: string, optional
225 | Name of Sub GACC e.g. subgacc='EB07'
226 | vars: string, optional
227 | Single or comma separated list of sensor variables. Will return all stations that match one of provided
228 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in
229 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars.
230 | status: string, optional
231 | A value of either active or inactive returns stations currently set as active or inactive in the archive.
232 | Omitting this param returns all stations. e.g. status='active'
233 | units: string, optional
234 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for
235 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph,
236 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa,
237 | alti|inhg. e.g. units='temp|F,speed|kph,metric'
238 | groupby: string, optional
239 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc
240 | e.g. groupby='state'
241 | timeformat: string, optional
242 | A python format string for returning customized date-time groups for observation times. Can include
243 | characters. e.g. timeformat='%m/%d/%Y at %H:%M'
244 |
245 | Returns:
246 | --------
247 | Dictionary of precipitation observations.
248 |
249 | Raises:
250 | -------
251 | None.
252 |
253 | """
254 |
255 | self._check_geo_param(kwargs)
256 | kwargs['start'] = start
257 | kwargs['end'] = end
258 | kwargs['token'] = self.token
259 |
260 | return self._get_response('stations/precipitation', kwargs)
261 |
262 | def timeseries(self, start, end, **kwargs):
263 | r""" Returns a time series of observations at a user specified location for a specified time. Users must specify
264 | at least one geographic search parameter ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa',
265 | 'nwsfirezone', 'gacc', or 'subgacc') to obtain observation data. Other parameters may also be included. See
266 | below mandatory and optional parameters. Also see the metadata() function for station IDs.
267 |
268 | Arguments:
269 | ----------
270 | start: string, mandatory
271 | Start date in form of YYYYMMDDhhmm. MUST BE USED WITH THE END PARAMETER. Default time is UTC
272 | e.g., start='201306011800'
273 | end: string, mandatory
274 | End date in form of YYYYMMDDhhmm. MUST BE USED WITH THE START PARAMETER. Default time is UTC
275 | e.g., end='201306011800'
276 | obtimezone: string, optional
277 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local'
278 | showemptystations: string, optional
279 | Set to '1' to show stations even if no obs exist that match the time period. Stations without obs are
280 | omitted by default.
281 | stid: string, optional
282 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb'
283 | county: string, optional
284 | County/parish/borough (US/Canada only), full name e.g. county='Larimer'
285 | state: string, optional
286 | US state, 2-letter ID e.g. state='CO'
287 | country: string, optional
288 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx'
289 | radius: string, optional
290 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20"
291 | bbox: string, optional
292 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41"
293 | cwa: string, optional
294 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX'
295 | nwsfirezone: string, optional
296 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile
297 | containing the full list of zones. e.g. nwsfirezone='LOX241'
298 | gacc: string, optional
299 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs.
300 | subgacc: string, optional
301 | Name of Sub GACC e.g. subgacc='EB07'
302 | vars: string, optional
303 | Single or comma separated list of sensor variables. Will return all stations that match one of provided
304 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in
305 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars.
306 | status: string, optional
307 | A value of either active or inactive returns stations currently set as active or inactive in the archive.
308 | Omitting this param returns all stations. e.g. status='active'
309 | units: string, optional
310 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for
311 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph,
312 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa,
313 | alti|inhg. e.g. units='temp|F,speed|kph,metric'
314 | groupby: string, optional
315 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc
316 | e.g. groupby='state'
317 | timeformat: string, optional
318 | A python format string for returning customized date-time groups for observation times. Can include
319 | characters. e.g. timeformat='%m/%d/%Y at %H:%M'
320 |
321 | Returns:
322 | --------
323 | Dictionary of time series observations through the get_response() function.
324 |
325 | Raises:
326 | -------
327 | None.
328 | """
329 |
330 | self._check_geo_param(kwargs)
331 | kwargs['start'] = start
332 | kwargs['end'] = end
333 | kwargs['token'] = self.token
334 |
335 | return self._get_response('stations/timeseries', kwargs)
336 |
337 | def climatology(self, startclim, endclim, **kwargs):
338 | r""" Returns a climatology of observations at a user specified location for a specified time. Users must specify
339 | at least one geographic search parameter ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa',
340 | 'nwsfirezone', 'gacc', or 'subgacc') to obtain observation data. Other parameters may also be included. See
341 | below mandatory and optional parameters. Also see the metadata() function for station IDs.
342 |
343 | Arguments:
344 | ----------
345 | startclim: string, mandatory
346 | Start date in form of MMDDhhmm. MUST BE USED WITH THE ENDCLIM PARAMETER. Default time is UTC
347 | e.g. startclim='06011800' Do not specify a year
348 | endclim: string, mandatory
349 | End date in form of MMDDhhmm. MUST BE USED WITH THE STARTCLIM PARAMETER. Default time is UTC
350 | e.g. endclim='06011800' Do not specify a year
351 | obtimezone: string, optional
352 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local'
353 | showemptystations: string, optional
354 | Set to '1' to show stations even if no obs exist that match the time period. Stations without obs are
355 | omitted by default.
356 | stid: string, optional
357 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb'
358 | county: string, optional
359 | County/parish/borough (US/Canada only), full name e.g. county='Larimer'
360 | state: string, optional
361 | US state, 2-letter ID e.g. state='CO'
362 | country: string, optional
363 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx'
364 | radius: string, optional
365 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20"
366 | bbox: string, optional
367 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41"
368 | cwa: string, optional
369 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX'
370 | nwsfirezone: string, optional
371 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile
372 | containing the full list of zones. e.g. nwsfirezone='LOX241'
373 | gacc: string, optional
374 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs.
375 | subgacc: string, optional
376 | Name of Sub GACC e.g. subgacc='EB07'
377 | vars: string, optional
378 | Single or comma separated list of sensor variables. Will return all stations that match one of provided
379 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in
380 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars.
381 | status: string, optional
382 | A value of either active or inactive returns stations currently set as active or inactive in the archive.
383 | Omitting this param returns all stations. e.g. status='active'
384 | units: string, optional
385 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for
386 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph,
387 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa,
388 | alti|inhg. e.g. units='temp|F,speed|kph,metric'
389 | groupby: string, optional
390 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc
391 | e.g. groupby='state'
392 | timeformat: string, optional
393 | A python format string for returning customized date-time groups for observation times. Can include
394 | characters. e.g. timeformat='%m/%d/%Y at %H:%M'
395 |
396 | Returns:
397 | --------
398 | Dictionary of climatology observations through the get_response() function.
399 |
400 | Raises:
401 | -------
402 | None.
403 |
404 | """
405 |
406 | self._check_geo_param(kwargs)
407 | kwargs['startclim'] = startclim
408 | kwargs['endclim'] = endclim
409 | kwargs['token'] = self.token
410 |
411 | return self._get_response('stations/climatology', kwargs)
412 |
413 | def variables(self):
414 | """ Returns a dictionary of a list of variables that could be obtained from the 'vars' param in other functions.
415 | Some stations may not record all variables listed. Use the metadata() function to return metadata on each
416 | station.
417 |
418 | Arguments:
419 | ----------
420 | None.
421 |
422 | Returns:
423 | --------
424 | Dictionary of variables.
425 |
426 | Raises:
427 | -------
428 | None.
429 |
430 | """
431 |
432 | return self._get_response('variables', {'token': self.token})
433 |
434 | def metadata(self, **kwargs):
435 | r""" Returns the metadata for a station or stations. Users must specify at least one geographic search parameter
436 | ('stid', 'state', 'country', 'county', 'radius', 'bbox', 'cwa', 'nwsfirezone', 'gacc', or 'subgacc') to obtain
437 | observation data. Other parameters may also be included. See below for optional parameters.
438 |
439 | Arguments:
440 | ----------
441 | complete: string, optional
442 | A value of 1 or 0. When set to 1, an extended list of metadata attributes for each returned station is
443 | provided. This result is useful for exploring the zones and regions in which a station resides.
444 | e.g. complete='1'
445 | sensorvars: string, optional
446 | A value of 1 or 0. When set to 1, a complete history of sensor variables and period of record is given for
447 | each station. e.g. sensorvars='1'
448 | obrange: string, optional
449 | Filters metadata for stations which were in operation for a specified time period. Users can specify one
450 | date or a date range. Dates are in the format of YYYYmmdd. e.g. obrange='20150101',
451 | obrange='20040101,20060101'
452 | obtimezone: string, optional
453 | Set to either UTC or local. Sets timezone of obs. Default is UTC. e.g. obtimezone='local'
454 | stid: string, optional
455 | Single or comma separated list of MesoWest station IDs. e.g. stid='kden,kslc,wbb'
456 | county: string, optional
457 | County/parish/borough (US/Canada only), full name e.g. county='Larimer'
458 | state: string, optional
459 | US state, 2-letter ID e.g. state='CO'
460 | country: string, optional
461 | Single or comma separated list of abbreviated 2 or 3 character countries e.g. country='us,ca,mx'
462 | radius: string, optional
463 | Distance from a lat/lon pt or stid as [lat,lon,radius (mi)] or [stid, radius (mi)]. e.g. radius="-120,40,20"
464 | bbox: string, optional
465 | Stations within a [lon/lat] box in the order [lonmin,latmin,lonmax,latmax] e.g. bbox="-120,40,-119,41"
466 | cwa: string, optional
467 | NWS county warning area. See http://www.nws.noaa.gov/organization.php for CWA list. e.g. cwa='LOX'
468 | nwsfirezone: string, optional
469 | NWS fire zones. See http://www.nws.noaa.gov/geodata/catalog/wsom/html/firezone.htm for a shapefile
470 | containing the full list of zones. e.g. nwsfirezone='LOX241'
471 | gacc: string, optional
472 | Name of Geographic Area Coordination Center e.g. gacc='EBCC' See http://gacc.nifc.gov/ for a list of GACCs.
473 | subgacc: string, optional
474 | Name of Sub GACC e.g. subgacc='EB07'
475 | vars: string, optional
476 | Single or comma separated list of sensor variables. Will return all stations that match one of provided
477 | variables. Useful for filtering all stations that sense only certain vars. Do not request vars twice in
478 | the query. e.g. vars='wind_speed,pressure' Use the variables function to see a list of sensor vars.
479 | status: string, optional
480 | A value of either active or inactive returns stations currently set as active or inactive in the archive.
481 | Omitting this param returns all stations. e.g. status='active'
482 | units: string, optional
483 | String or set of strings and pipes separated by commas. Default is metric units. Set units='ENGLISH' for
484 | FREEDOM UNITS ;) Valid other combinations are as follows: temp|C, temp|F, temp|K; speed|mps, speed|mph,
485 | speed|kph, speed|kts; pres|pa, pres|mb; height|m, height|ft; precip|mm, precip|cm, precip|in; alti|pa,
486 | alti|inhg. e.g. units='temp|F,speed|kph,metric'
487 | groupby: string, optional
488 | Results can be grouped by key words: state, county, country, cwa, nwszone, mwsfirezone, gacc, subgacc
489 | e.g. groupby='state'
490 | timeformat: string, optional
491 | A python format string for returning customized date-time groups for observation times. Can include
492 | characters. e.g. timeformat='%m/%d/%Y at %H:%M'
493 |
494 | Returns:
495 | --------
496 | A dictionary of metadata.
497 |
498 | Raises:
499 | -------
500 | None.
501 |
502 | """
503 |
504 | self._check_geo_param(kwargs)
505 | kwargs['token'] = self.token
506 |
507 | return self._get_response('stations/metadata', kwargs)
508 |
509 |
--------------------------------------------------------------------------------
/wellapplication/graphs.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Thu Nov 19 12:32:51 2015
4 |
5 | @author: paulinkenbrandt
6 | """
7 | from __future__ import absolute_import, division, print_function, unicode_literals
8 |
9 | import pandas as pd
10 | import matplotlib.pyplot as plt
11 | import matplotlib as mpl
12 | import matplotlib.cm as cm
13 | from scipy.stats import linregress
14 | import numpy as np
15 | from collections import OrderedDict
16 | from datetime import datetime, timedelta
17 |
18 |
19 |
20 | def get_recess_int(df, Q, maxper=18, minper=6, thresh=30, inplace=False):
21 | """Gets intervals of recession from a hydrograph
22 |
23 | :param df: DataFrame with hydrograph data
24 | :param Q: Field in DataFrame with discharge data
25 | :param maxper: Period of record to scan discharge data for maxima; created for daily values; defaults to 18
26 | :param minper: Period of record to scan discharge data for minima; should be less than maxper; defaulst to 6
27 | :param thresh: Threshold of discharge below which maxima are not considered; defaults to 30
28 | :param inplace: Append to input database or create new one; defaults to False
29 | :return: DataFrame of original data and Max and Min, Indexes of maxima, Indexes of minima
30 | """
31 | if inplace:
32 | data = df
33 | else:
34 | data = df.copy()
35 | data['max'] = data[Q].rolling(maxper,center=True).max()
36 | data['max'] = data.ix[data['max'] == data['value'],'max']
37 | data['max'] = data.ix[data['max'] > thresh, 'max']
38 | data['min'] = data[Q].rolling(minper,center=True).min()
39 | data['min'] = data.ix[data['min'] == data['value'],'min']
40 |
41 | maxlist = data.index[data['max'].notnull()]
42 | firstmin = []
43 | for ind in maxlist:
44 | firstmin.append(data.ix[ind:,'min'].first_valid_index())
45 | data['min'] = data.ix[data.index.isin(firstmin),'min']
46 | return data, maxlist, firstmin
47 |
48 |
49 | class recess(object):
50 | """Creates recession curve and modeled output to describe spring and streamflow recession.
51 |
52 | :param df: dataframe with spring discharge data
53 | :param Q: string indicating discharge field in df in units of gpm
54 | :param st: start date to examine data in [YYYY, MM, DD] format, where values are integers in an array
55 | :param end: end date to examine data
56 | :param excs: = begin date of exclusion period
57 | :param excf: = end date of exclusion period
58 |
59 | :type st: list
60 | :type end: list
61 | :type Q: str
62 |
63 | :return popt: alpha value for recession curve
64 | :return x1: days from start of recession
65 | :return x2: dates of recession curve analysis
66 | :return y1: points used for recession curve analysis
67 | :return y2: recession curve values
68 | :returns: Plot of recession curve
69 | """
70 |
71 | def __init__(self, df, Q, st, end='', lab='', excs=[0, 0, 0], excf=[0, 0, 0]):
72 | self.ymd = [datetime.now().year, datetime.now().month, datetime.now().day]
73 | if end == '':
74 | end = self.ymd
75 |
76 | if lab == '':
77 | self.Qlab = 'Discharge'
78 | else:
79 | self.Qlab = lab
80 |
81 | self.Qz = df[Q][0]
82 |
83 | self.rec_results = self.recession(df, Q, st, end, excs, excf)
84 |
85 | def fitit(self, x, y, Q):
86 | from scipy.optimize import curve_fit
87 |
88 | func = lambda x, c: Q * np.exp(-1 * c * x)
89 |
90 | popt, pcov = curve_fit(func, x, y, p0=(1e-1))
91 | return popt, pcov
92 |
93 |
94 | def recession(self, df, Q, st, end, excs, excf):
95 | """Creates recession curve and modeled output to describe spring and streamflow recession.
96 |
97 | The user puts in a dataframe with discharge data and defines the date range of recession. The Class will return
98 | recession values of the date range given.
99 |
100 | :param df: DataFrame with spring discharge data
101 | :type df: pandas.core.frame.DataFrame
102 | :param Q: discharge field in df in units of gpm
103 | :type Q: str
104 | :param st: start date to examine data in [YYYY, MM, DD] format, where values are integers in an array
105 | :type st: list
106 | :param end: end date to examine data
107 | :type end: list
108 | :param excs: begin date of exclusion period
109 | :param excf: end date of exclusion period
110 | :type excs: list
111 | :type excs: list
112 |
113 | :returns: popt1, x1, x2, y1, y2
114 | :return popt1: alpha value for recession curve
115 | :return x1: days from start of recession
116 | :return x2: dates of recession curve analysis
117 | :return y1: points used for recession curve analysis
118 | :return y2: recession curve values
119 | """
120 | # account for hours in time input
121 | if len(st) == 3 and len(end) == 3:
122 | df1 = df[(df.index >= pd.datetime(st[0], st[1], st[2])) & (df.index <= pd.datetime(end[0], end[1], end[2]))]
123 | else:
124 | df1 = df[(df.index >= pd.datetime(st[0], st[1], st[2], st[3], st[4])) & (
125 | df.index <= pd.datetime(end[0], end[1], end[2], st[3], st[4]))]
126 |
127 | # account for hours in time input
128 | if excs[0] == 0:
129 | pass
130 | else:
131 | if len(excs) == 3:
132 | df1 = df1[(df1.index < pd.datetime(excs[0], excs[1], excs[2])) | (
133 | df1.index > pd.datetime(excf[0], excf[1], excf[2]))]
134 | else:
135 | df1 = df1[(df1.index < pd.datetime(excs[0], excs[1], excs[2], excs[3], excs[4])) | (
136 | df1.index > pd.datetime(excf[0], excf[1], excf[2], excf[3], excf[4]))]
137 |
138 | df2 = df1.dropna(subset=[Q])
139 |
140 | y1 = df2[Q]
141 | x1 = (df2.index.to_julian_date() - df2.index.to_julian_date()[0]) # convert to numeric days for opt. function
142 | popt1, pcov1 = self.fitit(x1, y1, y1[0]) # fit curve
143 | x2 = [df2.index[0] + timedelta(i) for i in x1] # convert back to dates for labels
144 | y2 = [y1[0] * np.exp(-1 * popt1[0] * i) for i in x1] # run function with optimized variables
145 | plt.plot(x2, y2, label='Recession (alpha = %.3f)' % popt1[0]) # report alpha value
146 | plt.scatter(x2, y1, label='Discharge')
147 | plt.ylabel(self.Qlab)
148 | plt.legend(scatterpoints=1)
149 | plt.show()
150 | return popt1, x1, x2, y1, y2
151 |
152 |
153 | class piper(object):
154 | """Class that generates rectangular piper diagrams.
155 |
156 | :param df: DataFrame containing chemistry data; must have fields labeled as abbreviations of the major ions; Na, K,
157 | NaK, Ca, Mg, Cl, HCO3, CO3, and SO4
158 | :type df: pandas.core.frame.DataFrame
159 | :param type_col: Name of field that designates the sample type (optional); defaults to ''
160 | :type type_col: str
161 | :param var_col: Name of field that contains a scalar variable to be designated by color (optional); defaults to ''
162 | :type var_col: str
163 |
164 |
165 | .. note::
166 | Hydrochemistry - Construct Rectangular Piper plot
167 | Adopted from: Ray and Mukherjee, 2008, Groundwater 46(6): 893-896 and from code found at:
168 | http://python.hydrology-amsterdam.nl/scripts/piper_rectangular.py
169 | Based on code by:
170 | B.M. van Breukelen
171 |
172 | """
173 |
174 | def __init__(self, df, type_col='', var_col=''):
175 |
176 | self.fieldnames = [u'Na', u'K', u'NaK', u'Ca', u'Mg', u'Cl', u'HCO3', u'CO3', u'SO4']
177 | self.anions = ['Cl', 'HCO3', 'CO3', 'SO4']
178 | self.cations = ['Na', 'K', 'Ca', 'Mg', 'NaK']
179 | self.piperplot(df, type_col, var_col)
180 |
181 |
182 | def fillMissing(self, df):
183 |
184 | # fill in nulls with 0
185 | for col in df.columns:
186 | if col in self.fieldnames:
187 | for i in df.index:
188 | if df.loc[i, col] is None or df.loc[i, col] == '' or np.isnan(df.loc[i, col]):
189 | df.loc[i, col] = 0
190 | else:
191 | df.col = 0
192 |
193 | # add missing columns
194 | for name in self.fieldnames:
195 | if name in df.columns:
196 | pass
197 | else:
198 | print(name)
199 | df[name] = 0
200 |
201 | return df
202 |
203 | def check_nak(self, x):
204 | if x[0] == 0 and x[2] > 0:
205 | return x[2]
206 | else:
207 | return x[0] + x[1]
208 |
209 | def convertIons(self, df):
210 | """Convert major ion concentrations from mg/L to meq
211 |
212 | This function uses conversion factors to convert the concentrations of major ions from mg/L to meq. It also
213 | appends a field to the input database listing the Cation-Anion pair that have the highest meq concentrations.
214 |
215 | :param df: DataFrame containing chemistry data; must have fields labeled as abbreviations of the major ions; Na, K,
216 | NaK, Ca, Mg, Cl, HCO3, CO3, and SO4
217 | :returns: appends convert fields onto DataFrame with the suffix `_meq` and adds the fields 'water type', 'CBE'
218 | (charge balance), 'EC' (Sum(anions+cations))
219 |
220 | """
221 | # Conversion factors from mg/L to meq/L
222 | d = {'Ca': 0.04990269, 'Mg': 0.082287595, 'Na': 0.043497608, 'K': 0.02557656, 'Cl': 0.028206596, 'NaK': 0.043497608,
223 | 'HCO3': 0.016388838, 'CO3': 0.033328223, 'SO4': 0.020833333, 'NO2': 0.021736513, 'NO3': 0.016129032}
224 |
225 | df1 = df
226 |
227 | for name in self.fieldnames:
228 | if name in df.columns:
229 | df1[name + '_meq'] = df1[name].apply(lambda x: float(d.get(name, 0)) * x, 1)
230 |
231 |
232 | df1['NaK_meq'] = df1[['Na_meq', 'K_meq','NaK_meq']].apply(lambda x: self.check_nak(x), 1)
233 |
234 | df1['anions'] = 0
235 | df1['cations'] = 0
236 |
237 | for ion in self.anions:
238 | if ion in df.columns:
239 | df1['anions'] += df1[ion + '_meq']
240 | for ion in self.cations:
241 | if ion in df1.columns:
242 | df1['cations'] += df1[ion + '_meq']
243 |
244 | df1['total_ions'] = df1['cations'] + df1['anions']
245 | df1['EC'] = df1['anions'] - df1['cations']
246 | df1['CBE'] = df1['EC'] / (df1['anions'] + df1['cations'])
247 | df1['maj_cation'] = df1[['Ca_meq','Mg_meq','Na_meq','K_meq','NaK_meq']].idxmax(axis=1)
248 | df1['maj_anion'] = df1[['Cl_meq','SO4_meq','HCO3_meq','CO3_meq']].idxmax(axis=1)
249 | df1['water_type'] = df1[['maj_cation','maj_anion']].apply(lambda x: str(x[0])[:-4]+'-'+str(x[1])[:-4],1)
250 | return df1
251 |
252 | def ionPercentage(self, df):
253 | """Determines percentage of charge for each ion for display on the piper plot"""
254 | for ion in self.anions:
255 | df[ion + 'EC'] = df[[ion + '_meq', 'anions']].apply(lambda x: 100 * x[0] / x[1], 1)
256 | for ion in self.cations:
257 | df[ion + 'EC'] = df[[ion + '_meq', 'cations']].apply(lambda x: 100 * x[0] / x[1], 1)
258 |
259 | return df
260 |
261 | def piperplot(self, df, type_col, var_col):
262 | """Generates a rectangular piper diagram"""
263 | self.fillMissing(df)
264 | self.convertIons(df)
265 | self.ionPercentage(df)
266 |
267 | CaEC = df['CaEC'].values
268 | MgEC = df['MgEC'].values
269 | ClEC = df['ClEC'].values
270 | SO4EC = df['SO4EC'].values
271 | NaKEC = df['NaKEC'].values
272 | SO4ClEC = df[['ClEC', 'SO4EC']].apply(lambda x: x[0] + x[1], 1).values
273 |
274 |
275 |
276 | num_samps = len(df)
277 | if var_col == '':
278 | Elev = ''
279 | else:
280 | Elev = df[var_col].values
281 |
282 | if type_col == '':
283 | typ = ['Station']*num_samps
284 | stationtypes = ['Station']
285 | else:
286 | stationtypes = list(df[type_col].unique())
287 | typ = df[type_col].values
288 |
289 | # Change default settings for figures
290 | plt.rc('xtick', labelsize=10)
291 | plt.rc('ytick', labelsize=10)
292 | plt.rc('font', size=12)
293 | plt.rc('legend', fontsize=12)
294 | plt.rc('figure', figsize=(14, 5.5)) # defines size of Figure window orig (14,4.5)
295 |
296 | markSize = 30
297 | lineW = 0.5
298 |
299 | # Make Figure
300 | fig = plt.figure()
301 | # add title
302 | # fig.suptitle(piperTitle, x=0.20,y=.98, fontsize=14 )
303 | # Colormap and Saving Options for Figure
304 |
305 | if len(Elev) > 0:
306 | vart = Elev
307 | else:
308 | vart = [1] * num_samps
309 | cNorm = plt.Normalize(vmin=min(vart), vmax=max(vart))
310 | cmap = plt.cm.coolwarm
311 | # pdf = PdfPages(fileplace)
312 |
313 | mrkrSymbl = ['v', '^', '+', 's', '.', 'o', '*', 'v', '^', '+', 's', ',', '.', 'o', '*', 'v', '^', '+', 's', ',',
314 | '.', 'o', '*', 'v', '^', '+', 's', ',', '.', 'o', '*']
315 |
316 | # count variable for legend (n)
317 | unique, counts = np.unique(typ, return_counts=True)
318 | nstatTypesDict = dict(zip(unique, counts))
319 |
320 | typdict = {}
321 | for i in range(len(stationtypes)):
322 | typdict[stationtypes[i]] = mrkrSymbl[i]
323 |
324 | # CATIONS-----------------------------------------------------------------------------
325 | # 2 lines below needed to create 2nd y-axis (ax1b) for first subplot
326 | ax1 = fig.add_subplot(131)
327 | ax1b = ax1.twinx()
328 |
329 | ax1.fill([100, 0, 100, 100], [0, 100, 100, 0], color=(0.8, 0.8, 0.8))
330 | ax1.plot([100, 0], [0, 100], 'k')
331 | ax1.plot([50, 0, 50, 50], [0, 50, 50, 0], 'k--')
332 | ax1.text(25, 15, 'Na type')
333 | ax1.text(75, 15, 'Ca type')
334 | ax1.text(25, 65, 'Mg type')
335 |
336 | if len(typ) > 0:
337 | for j in range(len(typ)):
338 | ax1.scatter(CaEC[j], MgEC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm, marker=typdict[typ[j]],
339 | linewidths=lineW)
340 | else:
341 | ax1.scatter(CaEC, MgEC, s=markSize, c=vart, cmap=cmap, norm=cNorm, linewidths=lineW)
342 |
343 | ax1.set_xlim(0, 100)
344 | ax1.set_ylim(0, 100)
345 | ax1b.set_ylim(0, 100)
346 | ax1.set_xlabel('<= Ca (% meq)')
347 | ax1b.set_ylabel('Mg (% meq) =>')
348 | plt.setp(ax1, yticklabels=[])
349 |
350 | # next line needed to reverse x axis:
351 | ax1.set_xlim(ax1.get_xlim()[::-1])
352 |
353 | # ANIONS----------------------------------------------------------------------------
354 | ax = fig.add_subplot(1, 3, 3)
355 | ax.fill([100, 100, 0, 100], [0, 100, 100, 0], color=(0.8, 0.8, 0.8))
356 | ax.plot([0, 100], [100, 0], 'k')
357 | ax.plot([50, 50, 0, 50], [0, 50, 50, 0], 'k--')
358 | ax.text(55, 15, 'Cl type')
359 | ax.text(5, 15, 'HCO3 type')
360 | ax.text(5, 65, 'SO4 type')
361 |
362 | if len(typ) > 0:
363 | for j in range(len(typ)):
364 | labs = "{:} n= {:}".format(typ[j],nstatTypesDict[typ[j]])
365 | if float(nstatTypesDict[typ[j]]) > 1:
366 | s = ax.scatter(ClEC[j], SO4EC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm,
367 | marker=typdict[typ[j]], label=labs, linewidths=lineW)
368 | else:
369 | s = ax.scatter(ClEC[j], SO4EC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm,
370 | marker=typdict[typ[j]], label=typ[j], linewidths=lineW)
371 | else:
372 | s = ax.scatter(ClEC, SO4EC, s=markSize, c=vart, cmap=cmap, norm=cNorm, label='Sample', linewidths=lineW)
373 |
374 | ax.set_xlim(0, 100)
375 | ax.set_ylim(0, 100)
376 | ax.set_xlabel('Cl (% meq) =>')
377 | ax.set_ylabel('SO4 (% meq) =>')
378 |
379 | # CATIONS AND ANIONS COMBINED ---------------------------------------------------------------
380 | # 2 lines below needed to create 2nd y-axis (ax1b) for first subplot
381 | ax2 = fig.add_subplot(132)
382 | ax2b = ax2.twinx()
383 |
384 | ax2.plot([0, 100], [10, 10], 'k--')
385 | ax2.plot([0, 100], [50, 50], 'k--')
386 | ax2.plot([0, 100], [90, 90], 'k--')
387 | ax2.plot([10, 10], [0, 100], 'k--')
388 | ax2.plot([50, 50], [0, 100], 'k--')
389 | ax2.plot([90, 90], [0, 100], 'k--')
390 |
391 | if len(typ) > 0:
392 | for j in range(len(typ)):
393 | ax2.scatter(NaKEC[j], SO4ClEC[j], s=markSize, c=vart[j], cmap=cmap, norm=cNorm, marker=typdict[typ[j]],
394 | linewidths=lineW)
395 | else:
396 | ax2.scatter(NaKEC, SO4ClEC, s=markSize, c=vart, cmap=cmap, norm=cNorm, linewidths=lineW)
397 |
398 | ax2.set_xlim(0, 100)
399 | ax2.set_ylim(0, 100)
400 | ax2.set_xlabel('Na+K (% meq) =>')
401 | ax2.set_ylabel('SO4+Cl (% meq) =>')
402 | ax2.set_title('<= Ca+Mg (% meq)', fontsize=12)
403 | ax2b.set_ylabel('<= CO3+HCO3 (% meq)')
404 | ax2b.set_ylim(0, 100)
405 |
406 | # next two lines needed to reverse 2nd y axis:
407 | ax2b.set_ylim(ax2b.get_ylim()[::-1])
408 |
409 | # Align plots
410 | plt.subplots_adjust(left=0.05, bottom=0.35, right=0.95, top=0.90, wspace=0.4, hspace=0.0)
411 |
412 | # Legend-----------------------------------------------------------------------------------------
413 |
414 | # Add colorbar below legend
415 | # [left, bottom, width, height] where all quantities are in fractions of figure width and height
416 |
417 | if len(typ) > 0:
418 | handles, labels = ax.get_legend_handles_labels()
419 | by_label = OrderedDict(zip(labels, handles))
420 |
421 | plt.legend(by_label.values(), by_label.keys(), loc='lower center', ncol=5, shadow=False, fancybox=True,
422 | bbox_to_anchor=(0.5, -0.3), scatterpoints=1)
423 |
424 | if len(Elev) > 0:
425 | cax = fig.add_axes([0.25, 0.10, 0.50, 0.02])
426 | cb1 = plt.colorbar(s, cax=cax, cmap=cmap, norm=cNorm, orientation='horizontal') # use_gridspec=True
427 | cb1.set_label(var_col, size=8)
428 |
429 | self.plot = fig
430 | self.df = df
431 |
432 |
433 | def fdc(df, site, begyear=1900, endyear=2015, normalizer=1, plot=True):
434 | """Generate flow duration curve for hydrologic time series data
435 |
436 | :param df: DataFrame with discharge data of interest; must have a date or date-time as the index
437 | :type df: pandas.core.frame.DataFrame
438 | :param site: Name of DataFrame column in df containing discharge data
439 | :type site: str
440 | :param begyear: beginning year of analysis; defaults to 1900
441 | :type begyear: int
442 | :param endyear: end year of analysis; defaults to 2015
443 | :type endyear: int
444 | :param normalizer: value to use to normalize discharge; defaults to 1 (no normalization)
445 | :type normalizer: int
446 | :param plot: Whether to generate the plot or just return the variables for a plot; defaults to true
447 | :type plot: bool
448 |
449 | :returns: matplotlib plot displaying the flow duration curve of the data
450 | :return prob: x field stating the probability of a discharge in data
451 | :rtype prob: list
452 | :return data: y field stating the discharge for probability prob
453 | :rtype data: list
454 |
455 | """
456 | from scipy import stats as sp
457 | # limit dataframe to only the site
458 | df = df[[site]]
459 |
460 | # filter dataframe to only include dates of interest
461 | data = df[
462 | (pd.to_datetime(df.index) > pd.datetime(begyear, 1, 1)) & (pd.to_datetime(df.index) < pd.datetime(endyear, 1, 1))]
463 |
464 | # remove na values from dataframe
465 | data = data.dropna()
466 |
467 | # take average of each day of year (from 1 to 366) over the selected period of record
468 | data['doy'] = data.index.dayofyear
469 | dailyavg = data[site].groupby(data['doy']).mean()
470 |
471 | data = np.sort(dailyavg)
472 |
473 | ## uncomment the following to use normalized discharge instead of discharge
474 | # mean = np.mean(data)
475 | # std = np.std(data)
476 | # data = [(data[i]-np.mean(data))/np.std(data) for i in range(len(data))]
477 | data = [(data[i]) / normalizer for i in range(len(data))]
478 |
479 | # ranks data from smallest to largest
480 | ranks = sp.rankdata(data, method='average')
481 |
482 | # reverses rank order
483 | ranks = ranks[::-1]
484 |
485 | # calculate probability of each rank
486 | prob = [(ranks[i] / (len(data) + 1)) for i in range(len(data))]
487 |
488 | # plot data via matplotlib
489 | if plot:
490 | plt.plot(prob, data, label=site + ' ' + str(begyear) + '-' + str(endyear))
491 | else:
492 | pass
493 | return prob, data
494 |
495 |
496 | class gantt(object):
497 | """Class to create gantt plots and to summarize pandas timeseries dataframes.
498 |
499 | Finds gaps and measuring duration of data.
500 | :param df: The DataFrame with a datetime index and columns as site time-series data; each column name
501 | should be the site name or the site labels should be input for chart
502 | :param stations: List of columns to include in the chart; defaults to all columns
503 | :param labels: Labels to use in the resulting plot for each station; must be equal to the length of stations list;
504 | defaults to stations
505 | :param samp_int: regular interval that the datetime index will be resampled. Defaults to daily ('D');
506 | see http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases for all of the options
507 | :type samp_int: str
508 | :type df: pandas.core.frame.DataFrame
509 | :type stations: list
510 | :type labels: list
511 |
512 | .. note::
513 | `.stations` produces a list describing the stations put into the class
514 | `.labels` produces a list describing the labels put into the class
515 | `.dateranges` is a dictionary describing gaps in the dataframe based on the presence of nan values in the frame
516 | `.ganttPlotter()` plots a gantt plot
517 |
518 | """
519 |
520 | def __init__(self, df, stations=[], labels=[], samp_int = 'D'):
521 | if len(stations) == 0:
522 | stations = df.columns
523 | if len(labels) == 0:
524 | labels = stations
525 |
526 | self.data = df.resample(samp_int).mean()
527 | self.stations = stations
528 | self.labels = labels
529 | self.dateranges = self.markGaps()
530 | self.sitestats = self.site_info()
531 | print(
532 | 'Data Loaded \nType .ganttPlotter() after your defined object to make plot\nType .sitestats after your defined object to get summary stats')
533 |
534 | def markGaps(self):
535 | """Produces dictionary of list of gaps in time series data based on the presence of nan values;
536 | used for gantt plotting
537 |
538 | :returns: dateranges; a dictionary with station names as keys and lists of begin and end dates as values
539 | """
540 | df = self.data
541 | stations = self.stations
542 |
543 | dateranges = {}
544 | for station in stations:
545 | dateranges[station] = []
546 | first = df.ix[:, station].first_valid_index()
547 | last = df.ix[:, station].last_valid_index()
548 | records = df.ix[first:last, station]
549 | #dateranges[station].append(pd.to_datetime(first))
550 | for i in range(len(records) - 1):
551 | if pd.isnull(records[i + 1]) and pd.notnull(records[i]):
552 | dateranges[station].append(pd.to_datetime(records.index)[i])
553 | elif pd.isnull(records[i]) and pd.notnull(records[i + 1]):
554 | dateranges[station].append(pd.to_datetime(records.index)[i])
555 | dateranges[station].append(pd.to_datetime(last))
556 | return dateranges
557 |
558 | def site_info(self):
559 | """Creates a table of summary statistics for all of the stations in the stations field defined in the class
560 |
561 | :returns: site_info; a table of summary statistics; first, last, min, max, std, median, avg, 25%tile, 75%tile,
562 | and count
563 |
564 | """
565 | stations = self.stations
566 | df = self.data
567 |
568 | stat, first, last, minum, maxum, stdev, medin, avg, q25, q75, count = [], [], [], [], [], [], [], [], [], [], []
569 | for station in stations:
570 | stdt = df.ix[:, station]
571 | stat.append(station)
572 | first.append(stdt.first_valid_index())
573 | last.append(stdt.last_valid_index())
574 | minum.append(stdt.min())
575 | maxum.append(stdt.max())
576 | stdev.append(stdt.std())
577 | medin.append(stdt.median())
578 | avg.append(stdt.mean())
579 | q25.append(stdt.quantile(0.25))
580 | q75.append(stdt.quantile(0.75))
581 | count.append(stdt.count())
582 | colm = {'StationId': stat, 'first': first, 'last': last, 'min': minum, 'max': maxum,
583 | 'std': stdev, 'median': medin, 'mean': avg, 'q25': q25, 'q75': q75, 'count': count}
584 | Site_Info = pd.DataFrame(colm)
585 | return Site_Info
586 |
587 | def ganttPlotter(self):
588 | """Plots gantt plot using dictionary of stations and associated start and end dates;
589 | uses output from markGaps function"""
590 |
591 | labs, tickloc, col = [], [], []
592 |
593 | dateranges = self.dateranges
594 | stations = self.stations
595 | labels = self.labels
596 |
597 | # create color iterator for multi-color lines in gantt chart
598 | color = iter(plt.cm.Dark2(np.linspace(0, 1, len(stations))))
599 |
600 | plt.figure(figsize=[8, 10])
601 | fig, ax = plt.subplots()
602 |
603 | for i in range(len(stations)):
604 | c = next(color)
605 | for j in range(len(dateranges[stations[i]]) - 1):
606 | if (j + 1) % 2 != 0:
607 | if len(labels) == 0 or len(labels) != len(stations):
608 | plt.hlines(i + 1, dateranges[stations[i]][j], dateranges[stations[i]][j + 1], label=stations[i],
609 | color=c, linewidth=3)
610 | else:
611 | plt.hlines(i + 1, dateranges[stations[i]][j], dateranges[stations[i]][j + 1], label=labels[i],
612 | color=c, linewidth=3)
613 | labs.append(stations[i])
614 | tickloc.append(i + 1)
615 | col.append(c)
616 | plt.ylim(0, len(stations) + 1)
617 |
618 | if len(labels) == 0 or len(labels) != len(stations):
619 | labels = stations
620 | plt.yticks(tickloc, labs)
621 | else:
622 | plt.yticks(tickloc, labels)
623 |
624 | plt.xlabel('Date')
625 | plt.ylabel('Station Name')
626 | plt.grid(linewidth=0.2)
627 |
628 | gytl = plt.gca().get_yticklabels()
629 | for i in range(len(gytl)):
630 | gytl[i].set_color(col[i])
631 | plt.tight_layout()
632 | return fig
633 |
634 | def gantt(self):
635 | """This function runs the other functions in this class."""
636 | stations = self.stations
637 | labels = self.labels
638 | df = self.data
639 |
640 | df1 = df.ix[:, stations]
641 | df1.sort_index(inplace=True)
642 | Site_Info = self.site_info()
643 | dateranges = self.markGaps()
644 | fig = self.ganttPlotter()
645 | return Site_Info, dateranges, fig
646 |
647 |
648 | def scatterColor(x0, y, w):
649 | """Creates scatter plot with points colored by variable.
650 | All input arrays must have matching lengths
651 |
652 | :param x0: x values to plot
653 | :type x0: list
654 | :param y: y values to plot
655 | :type y: list
656 | :param w: z values to plot
657 |
658 | :returns: plot; slope and intercept of the RLM best fit line shown on the plot
659 | .. warning:: all input arrays must have matching lengths and scalar values
660 | .. note:: See documentation at http://statsmodels.sourceforge.net/0.6.0/generated/statsmodels.robust.robust_linear_model.RLM.html
661 | for the RLM line
662 | """
663 |
664 |
665 | cmap = plt.cm.get_cmap('RdYlBu')
666 | norm = mpl.colors.Normalize(vmin=w.min(), vmax=w.max())
667 | m = cm.ScalarMappable(norm=norm, cmap=cmap)
668 | m.set_array(w)
669 |
670 |
671 | plt.scatter(x0, y, label='', color=m.to_rgba(w))
672 |
673 | slope, intercept, r_value, p_value, std_err = linregress(x0, y)
674 |
675 | x1 = np.arange(np.min(x0), np.max(x0), 0.1)
676 | y1 = [i * slope + intercept for i in x1]
677 |
678 | plt.plot(x1, y1, c='g',
679 | label='simple linear regression m = {:.2f} b = {:.0f}, r^2 = {:.2f}'.format(slope, intercept, r_value ** 2))
680 |
681 | plt.legend()
682 | cbar = plt.colorbar(m)
683 |
684 | cbar.set_label('Julian Date')
685 |
686 | return slope, intercept
687 |
688 |
689 |
690 |
691 | def peakdet(v, delta, x=None):
692 | """
693 | :param v: data vector
694 | :param delta:
695 | :param x:
696 | :returns: two arrays
697 |
698 | .. note:: https://gist.github.com/endolith/250860
699 | Converted from MATLAB script at http://billauer.co.il/peakdet.html
700 | """
701 | import sys
702 |
703 | maxtab = []
704 | mintab = []
705 |
706 | if x is None:
707 | x = np.arange(len(v))
708 |
709 | v = np.asarray(v)
710 |
711 | if len(v) != len(x):
712 | sys.exit('Input vectors v and x must have same length')
713 |
714 | if not np.isscalar(delta):
715 | sys.exit('Input argument delta must be a scalar')
716 |
717 | if delta <= 0:
718 | sys.exit('Input argument delta must be positive')
719 |
720 | mn, mx = np.Inf, -np.Inf
721 | mnpos, mxpos = np.NaN, np.NaN
722 |
723 | lookformax = True
724 |
725 | for i in np.arange(len(v)):
726 | this = v[i]
727 | if this > mx:
728 | mx = this
729 | mxpos = x[i]
730 | if this < mn:
731 | mn = this
732 | mnpos = x[i]
733 |
734 | if lookformax:
735 | if this < mx - delta:
736 | maxtab.append((mxpos, mx))
737 | mn = this
738 | mnpos = x[i]
739 | lookformax = False
740 | else:
741 | if this > mn + delta:
742 | mintab.append((mnpos, mn))
743 | mx = this
744 | mxpos = x[i]
745 | lookformax = True
746 |
747 | return np.array(maxtab), np.array(mintab)
748 |
--------------------------------------------------------------------------------