├── gis
├── biolok_stn_locs.cpg
├── biolok_stn_locs2.CPG
├── forsuringsregionene.cpg
├── biolok_stn_locs_update_2024-12-23.cpg
├── site_locs.mxd
├── site_locs.png
├── biolok_stn_locs.dbf
├── biolok_stn_locs.sbn
├── biolok_stn_locs.sbx
├── biolok_stn_locs.shp
├── biolok_stn_locs.shx
├── biolok_stn_locs2.dbf
├── biolok_stn_locs2.sbn
├── biolok_stn_locs2.sbx
├── biolok_stn_locs2.shp
├── biolok_stn_locs2.shx
├── forsuringsregionene.dbf
├── forsuringsregionene.sbn
├── forsuringsregionene.sbx
├── forsuringsregionene.shp
├── forsuringsregionene.shx
├── site_locs_2021-08-06.png
├── site_locs_2025-01-01.png
├── biolok_stn_locs_update_2024-12-23.dbf
├── biolok_stn_locs_update_2024-12-23.sbn
├── biolok_stn_locs_update_2024-12-23.sbx
├── biolok_stn_locs_update_2024-12-23.shp
├── biolok_stn_locs_update_2024-12-23.shx
├── biolok_stn_locs.prj
├── biolok_stn_locs2.prj
├── biolok_stn_locs_update_2024-12-23.prj
└── forsuringsregionene.prj
├── README.md
├── .gitattributes
├── station_locs.xlsx
├── biolok_data_2020-08-21.xlsx
├── biolok_data_2020-09-02.xlsx
├── project_overview_from_lbs.xlsx
├── station_locs_update_2024-12-23.xlsx
├── water_samples_to_add_2020-08-21.xlsx
├── notebooks
├── region_plots
│ ├── water_chem_ts_region_i.png
│ ├── water_chem_ts_region_ii.png
│ ├── water_chem_ts_region_iii.png
│ ├── water_chem_ts_region_iv.png
│ ├── water_chem_ts_region_ix.png
│ ├── water_chem_ts_region_v.png
│ ├── water_chem_ts_region_vi.png
│ ├── water_chem_ts_region_vii.png
│ ├── water_chem_ts_region_x.png
│ └── water_chem_ts_region_viii.png
├── regional_mk_trends.csv
├── biolok_figures_2018_report.ipynb
└── update_db_2018_report.ipynb
└── .gitignore
/gis/biolok_stn_locs.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.CPG:
--------------------------------------------------------------------------------
1 | UTF-8
--------------------------------------------------------------------------------
/gis/forsuringsregionene.cpg:
--------------------------------------------------------------------------------
1 | ANSI 1252
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Biolok
2 |
3 | Data management for the Biolok project.
4 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/gis/site_locs.mxd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/site_locs.mxd
--------------------------------------------------------------------------------
/gis/site_locs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/site_locs.png
--------------------------------------------------------------------------------
/station_locs.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/station_locs.xlsx
--------------------------------------------------------------------------------
/gis/biolok_stn_locs.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs.dbf
--------------------------------------------------------------------------------
/gis/biolok_stn_locs.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs.sbn
--------------------------------------------------------------------------------
/gis/biolok_stn_locs.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs.sbx
--------------------------------------------------------------------------------
/gis/biolok_stn_locs.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs.shp
--------------------------------------------------------------------------------
/gis/biolok_stn_locs.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs.shx
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs2.dbf
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs2.sbn
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs2.sbx
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs2.shp
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs2.shx
--------------------------------------------------------------------------------
/biolok_data_2020-08-21.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/biolok_data_2020-08-21.xlsx
--------------------------------------------------------------------------------
/biolok_data_2020-09-02.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/biolok_data_2020-09-02.xlsx
--------------------------------------------------------------------------------
/gis/forsuringsregionene.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/forsuringsregionene.dbf
--------------------------------------------------------------------------------
/gis/forsuringsregionene.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/forsuringsregionene.sbn
--------------------------------------------------------------------------------
/gis/forsuringsregionene.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/forsuringsregionene.sbx
--------------------------------------------------------------------------------
/gis/forsuringsregionene.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/forsuringsregionene.shp
--------------------------------------------------------------------------------
/gis/forsuringsregionene.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/forsuringsregionene.shx
--------------------------------------------------------------------------------
/gis/site_locs_2021-08-06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/site_locs_2021-08-06.png
--------------------------------------------------------------------------------
/gis/site_locs_2025-01-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/site_locs_2025-01-01.png
--------------------------------------------------------------------------------
/project_overview_from_lbs.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/project_overview_from_lbs.xlsx
--------------------------------------------------------------------------------
/station_locs_update_2024-12-23.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/station_locs_update_2024-12-23.xlsx
--------------------------------------------------------------------------------
/water_samples_to_add_2020-08-21.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/water_samples_to_add_2020-08-21.xlsx
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs_update_2024-12-23.dbf
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs_update_2024-12-23.sbn
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs_update_2024-12-23.sbx
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs_update_2024-12-23.shp
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/gis/biolok_stn_locs_update_2024-12-23.shx
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_i.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_i.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_ii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_ii.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_iii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_iii.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_iv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_iv.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_ix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_ix.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_v.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_v.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_vi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_vi.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_vii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_vii.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_x.png
--------------------------------------------------------------------------------
/notebooks/region_plots/water_chem_ts_region_viii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JamesSample/biolok/master/notebooks/region_plots/water_chem_ts_region_viii.png
--------------------------------------------------------------------------------
/gis/biolok_stn_locs.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
--------------------------------------------------------------------------------
/gis/biolok_stn_locs2.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
--------------------------------------------------------------------------------
/gis/biolok_stn_locs_update_2024-12-23.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
--------------------------------------------------------------------------------
/gis/forsuringsregionene.prj:
--------------------------------------------------------------------------------
1 | PROJCS["WGS_1984_UTM_Zone_33N",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",500000.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",15.0],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",0.0],UNIT["Meter",1.0]]
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # celery beat schedule file
95 | celerybeat-schedule
96 |
97 | # SageMath parsed files
98 | *.sage.py
99 |
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 |
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 |
113 | # Rope project settings
114 | .ropeproject
115 |
116 | # mkdocs documentation
117 | /site
118 |
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 |
124 | # Pyre type checker
125 | .pyre/
126 |
--------------------------------------------------------------------------------
/notebooks/regional_mk_trends.csv:
--------------------------------------------------------------------------------
1 | region,parameter,s,z,var_s,p,sslp,trend
2 | I,ESO4_uekv/l,-991.0,-11.63525509403131,7239.666666666666,0.0,-0.8412699999999997,decreasing
3 | I,ECa-Mg_uekv/l,-60.0,-0.6933663075078519,7240.666666666666,0.48807970071388085,-0.04429083333333361,no trend
4 | I,ANC_uekv/l,705.0,8.27281655005634,7241.666666666666,2.220446049250313e-16,1.1235575000000004,increasing
5 | I,pH,657.0,7.7133778580770835,7232.999999999999,1.2212453270876722e-14,0.015263157894736845,increasing
6 | I,LAL_ug/l,-8.0,-0.08264130673512297,7174.666666666667,0.9341367555671447,0.0,no trend
7 | I,TOC_mg-C/l,387.0,4.545792767979856,7210.333333333333,5.4728864435915625e-06,0.026666666666666648,increasing
8 | IV,ESO4_uekv/l,-1679.0,-14.151564147572177,14059.666666666666,0.0,-2.06651,decreasing
9 | IV,ECa-Mg_uekv/l,-711.0,-5.987848953978692,14059.666666666666,2.1263439897722947e-09,-0.5111227564102564,decreasing
10 | IV,ANC_uekv/l,1315.0,11.081737359898593,14059.666666666666,0.0,1.9401192857142857,increasing
11 | IV,pH,1137.0,9.586697014296265,14041.666666666666,0.0,0.021764705882353,increasing
12 | IV,LAL_ug/l,-1066.0,-8.985502275273886,14048.000000000002,0.0,-2.6666666666666665,decreasing
13 | IV,TOC_mg-C/l,981.0,8.270606652674687,14040.333333333334,2.220446049250313e-16,0.0918181818181818,increasing
14 | II,ESO4_uekv/l,-2320.0,-16.069846166706817,20824.666666666664,0.0,-2.2696648263888886,decreasing
15 | II,ECa-Mg_uekv/l,-1546.0,-10.705787053760268,20826.666666666664,0.0,-0.8966040760389037,decreasing
16 | II,ANC_uekv/l,1788.0,12.382680559915599,20826.666666666664,0.0,1.7542065614035092,increasing
17 | II,pH,1279.0,8.859702987302262,20807.666666666664,0.0,0.01428571428571431,increasing
18 | II,LAL_ug/l,-1201.0,-8.320436273483097,20800.333333333332,0.0,-2.0000000000000004,decreasing
19 | II,TOC_mg-C/l,1113.0,7.711259617325164,20795.0,1.2434497875801753e-14,0.08614814814814814,increasing
20 | X,ESO4_uekv/l,-720.0,-8.666635310745093,6882.666666666667,0.0,-1.3306742663043472,decreasing
21 | X,ECa-Mg_uekv/l,54.0,0.6388479436293324,6882.666666666667,0.522921855509767,0.0802303846153839,no trend
22 | X,ANC_uekv/l,700.0,8.425560615035911,6882.666666666667,0.0,1.9263091125541125,increasing
23 | X,pH,693.0,8.348667986226507,6870.333333333334,0.0,0.020000000000000316,increasing
24 | X,LAL_ug/l,14.0,0.16038386220948114,6569.999999999999,0.8725787011511899,0.0,no trend
25 | X,TOC_mg-C/l,321.0,3.8745917870024393,6821.0,0.00010680358242098897,0.013137254901960767,increasing
26 | V,ESO4_uekv/l,-1374.0,-12.677147142636008,11730.0,0.0,-1.6488776,decreasing
27 | V,ECa-Mg_uekv/l,-520.0,-4.792017018957092,11730.0,1.6511289293585918e-06,-0.3514439999999999,decreasing
28 | V,ANC_uekv/l,978.0,9.020810457651406,11730.0,0.0,1.8899920000000001,increasing
29 | V,pH,998.0,9.211234385281168,11715.333333333332,0.0,0.021568627450980368,increasing
30 | V,LAL_ug/l,-899.0,-8.295752124677072,11717.666666666666,0.0,-2.833333333333334,decreasing
31 | V,TOC_mg-C/l,950.0,8.77200874309453,11704.0,0.0,0.03611111111111106,increasing
32 | III,ESO4_uekv/l,-671.0,-9.23720648536655,5261.0,0.0,-0.8624024747474749,decreasing
33 | III,ECa-Mg_uekv/l,-87.0,-1.1856712802112288,5261.0,0.23575214060493388,-0.10593597222222106,no trend
34 | III,ANC_uekv/l,524.0,7.209850624536281,5262.0,5.60218538225854e-13,1.0438662037037039,increasing
35 | III,pH,490.0,6.743705065232814,5258.0,1.5439871603462052e-11,0.023333333333333428,increasing
36 | III,LAL_ug/l,-208.0,-2.866900355272018,5213.333333333333,0.004145135015002799,-0.14068825910931176,decreasing
37 | III,TOC_mg-C/l,324.0,4.454430953108791,5258.0,8.411599311086704e-06,0.010350877192982456,increasing
38 | VII,ESO4_uekv/l,-1209.0,-11.506849504471477,11021.0,0.0,-0.42436677419354835,decreasing
39 | VII,ECa-Mg_uekv/l,16.0,0.1428765831706107,11022.0,0.8863876527344121,0.002630833333333508,no trend
40 | VII,ANC_uekv/l,941.0,8.953193070217464,11023.0,0.0,0.6463140350877193,increasing
41 | VII,pH,1020.0,9.70960690839739,11014.0,0.0,0.018888888888888882,increasing
42 | VII,LAL_ug/l,-588.0,-5.6032813815041465,10974.666666666668,2.1033135677583914e-08,-0.3333333333333333,decreasing
43 | VII,TOC_mg-C/l,60.0,0.562389569590987,11006.0,0.5738506240062637,0.0010256410256410321,no trend
44 | VI,ESO4_uekv/l,-892.0,-9.708517041764388,8422.666666666666,0.0,-0.906736,decreasing
45 | VI,ECa-Mg_uekv/l,-150.0,-1.6235342752221031,8422.666666666666,0.10447524250493267,-0.10405453081232498,no trend
46 | VI,ANC_uekv/l,704.0,7.660030842155292,8422.666666666666,1.865174681370263e-14,1.116824431818182,increasing
47 | VI,pH,730.0,7.945218977030653,8418.666666666666,1.9984014443252818e-15,0.026904761904761924,increasing
48 | VI,LAL_ug/l,-651.0,-7.09391144746038,8395.666666666666,1.3038459201197838e-12,-1.5,decreasing
49 | VI,TOC_mg-C/l,565.0,6.155332394411776,8395.666666666666,7.492018117005728e-10,0.020875,increasing
50 | IX,ESO4_uekv/l,-34.0,-0.9964984287699772,1096.6666666666667,0.3190080301151297,-0.0524026362179487,no trend
51 | IX,ECa-Mg_uekv/l,90.0,2.6875260654705446,1096.6666666666667,0.007198348391553422,0.43646352083333295,increasing
52 | IX,ANC_uekv/l,90.0,2.6875260654705446,1096.6666666666667,0.007198348391553422,0.4849736499999999,increasing
53 | IX,pH,66.0,1.964592174394741,1094.6666666666667,0.049461458439756134,0.005634920634920839,increasing
54 | IX,LAL_ug/l,87.0,2.6092572470734536,1086.3333333333333,0.009073900073062857,0.18181818181818182,increasing
55 | IX,TOC_mg-C/l,41.0,1.2084279640112776,1095.6666666666667,0.22688268885268403,0.0032539682539682365,no trend
56 | VIII,ESO4_uekv/l,-469.0,-5.204392390329606,8086.333333333334,1.9463236622385693e-07,-0.20392529411764707,decreasing
57 | VIII,ECa-Mg_uekv/l,265.0,2.9346016451584553,8093.0,0.003339761465631419,0.20926399999999973,increasing
58 | VIII,ANC_uekv/l,613.0,6.802940177412783,8093.0,1.0250467141759145e-11,0.7292633333333335,increasing
59 | VIII,pH,397.0,4.403716637971206,8086.333333333333,1.0641191712457143e-05,0.009230769230769239,increasing
60 | VIII,LAL_ug/l,289.0,3.273502341364007,7740.333333333334,0.001062235106070064,0.03333333333333333,increasing
61 | VIII,TOC_mg-C/l,196.0,2.170421259572428,8072.0,0.029974947708887534,0.004700000000000002,increasing
62 |
--------------------------------------------------------------------------------
/notebooks/biolok_figures_2018_report.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%matplotlib inline\n",
10 | "\n",
11 | "import datetime as dt\n",
12 | "\n",
13 | "import matplotlib.pyplot as plt\n",
14 | "import numpy as np\n",
15 | "import pandas as pd\n",
16 | "import seaborn as sn\n",
17 | "\n",
18 | "import nivapy3 as nivapy\n",
19 | "\n",
20 | "plt.style.use(\"ggplot\")"
21 | ]
22 | },
23 | {
24 | "cell_type": "markdown",
25 | "metadata": {},
26 | "source": [
27 | "# Biolok data analysis"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {},
33 | "source": [
34 | "## 1. Figures for the 2018 report\n",
35 | "\n",
36 | "With help from Liv Bente, I have created a new \"dataset\" named `BIOLOK rapp2018`, which is accessible under `Datasets` in RESA's `Projects` window. The steps to create this dataset are described in *update_db_2018_report.ipynb*.\n",
37 | "\n",
38 | "To export the relevant data from RESA2, select the dataset and remember to check `Use only water samples from selected projects` before extracting the data.\n",
39 | "\n",
40 | "Øyvind would like a set of plots similar to the ones on pages 29 - 38 [here](https://www.miljodirektoratet.no/globalassets/publikasjoner/m503/m503.pdf) (see e-mail received 17.08.2020 at 15:34.\n",
41 | "\n",
42 | "For now, I have manually extracted the relevant data from RESA and saved it to \n",
43 | "\n",
44 | " biolok\\biolok_data_2020-08-21.xlsx"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 2,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "data": {
54 | "text/html": [
55 | "
\n",
56 | "\n",
69 | "
\n",
70 | " \n",
71 | " \n",
72 | " | \n",
73 | " station_id | \n",
74 | " station_code | \n",
75 | " biolok_code | \n",
76 | " station_name | \n",
77 | " date | \n",
78 | " depth1 | \n",
79 | " depth2 | \n",
80 | " ESO4_uekv/l | \n",
81 | " ANC_uekv/l | \n",
82 | " pH | \n",
83 | " LAL_ug/l | \n",
84 | " TOC_mg-C/l | \n",
85 | " ECa-Mg_uekv/l | \n",
86 | " region | \n",
87 | " site_no | \n",
88 | " year | \n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " \n",
93 | " | 0 | \n",
94 | " 14 | \n",
95 | " 432-1-26 | \n",
96 | " I-3 | \n",
97 | " Måsabutjørna | \n",
98 | " 1995-09-20 | \n",
99 | " 0.0 | \n",
100 | " 0.0 | \n",
101 | " 38.68637 | \n",
102 | " 6.63334 | \n",
103 | " 5.68 | \n",
104 | " 0.0 | \n",
105 | " 1.6 | \n",
106 | " 26.03949 | \n",
107 | " I | \n",
108 | " 3 | \n",
109 | " 1995 | \n",
110 | "
\n",
111 | " \n",
112 | " | 1 | \n",
113 | " 14 | \n",
114 | " 432-1-26 | \n",
115 | " I-3 | \n",
116 | " Måsabutjørna | \n",
117 | " 1996-10-10 | \n",
118 | " 0.0 | \n",
119 | " 0.0 | \n",
120 | " 36.31385 | \n",
121 | " 3.69217 | \n",
122 | " 5.73 | \n",
123 | " 0.0 | \n",
124 | " 1.9 | \n",
125 | " 24.88327 | \n",
126 | " I | \n",
127 | " 3 | \n",
128 | " 1996 | \n",
129 | "
\n",
130 | " \n",
131 | " | 2 | \n",
132 | " 14 | \n",
133 | " 432-1-26 | \n",
134 | " I-3 | \n",
135 | " Måsabutjørna | \n",
136 | " 1997-10-26 | \n",
137 | " 0.0 | \n",
138 | " 0.0 | \n",
139 | " 36.02333 | \n",
140 | " 6.46292 | \n",
141 | " 5.70 | \n",
142 | " 9.0 | \n",
143 | " 1.5 | \n",
144 | " 27.71908 | \n",
145 | " I | \n",
146 | " 3 | \n",
147 | " 1997 | \n",
148 | "
\n",
149 | " \n",
150 | " | 3 | \n",
151 | " 14 | \n",
152 | " 432-1-26 | \n",
153 | " I-3 | \n",
154 | " Måsabutjørna | \n",
155 | " 1998-07-06 | \n",
156 | " 0.0 | \n",
157 | " 0.0 | \n",
158 | " 30.06786 | \n",
159 | " 5.48708 | \n",
160 | " 5.78 | \n",
161 | " 2.0 | \n",
162 | " 1.7 | \n",
163 | " 21.91635 | \n",
164 | " I | \n",
165 | " 3 | \n",
166 | " 1998 | \n",
167 | "
\n",
168 | " \n",
169 | " | 4 | \n",
170 | " 14 | \n",
171 | " 432-1-26 | \n",
172 | " I-3 | \n",
173 | " Måsabutjørna | \n",
174 | " 1998-09-15 | \n",
175 | " 0.0 | \n",
176 | " 0.0 | \n",
177 | " 32.14986 | \n",
178 | " 4.58344 | \n",
179 | " 5.93 | \n",
180 | " 2.0 | \n",
181 | " 1.7 | \n",
182 | " 22.23999 | \n",
183 | " I | \n",
184 | " 3 | \n",
185 | " 1998 | \n",
186 | "
\n",
187 | " \n",
188 | "
\n",
189 | "
"
190 | ],
191 | "text/plain": [
192 | " station_id station_code biolok_code station_name date depth1 \\\n",
193 | "0 14 432-1-26 I-3 Måsabutjørna 1995-09-20 0.0 \n",
194 | "1 14 432-1-26 I-3 Måsabutjørna 1996-10-10 0.0 \n",
195 | "2 14 432-1-26 I-3 Måsabutjørna 1997-10-26 0.0 \n",
196 | "3 14 432-1-26 I-3 Måsabutjørna 1998-07-06 0.0 \n",
197 | "4 14 432-1-26 I-3 Måsabutjørna 1998-09-15 0.0 \n",
198 | "\n",
199 | " depth2 ESO4_uekv/l ANC_uekv/l pH LAL_ug/l TOC_mg-C/l ECa-Mg_uekv/l \\\n",
200 | "0 0.0 38.68637 6.63334 5.68 0.0 1.6 26.03949 \n",
201 | "1 0.0 36.31385 3.69217 5.73 0.0 1.9 24.88327 \n",
202 | "2 0.0 36.02333 6.46292 5.70 9.0 1.5 27.71908 \n",
203 | "3 0.0 30.06786 5.48708 5.78 2.0 1.7 21.91635 \n",
204 | "4 0.0 32.14986 4.58344 5.93 2.0 1.7 22.23999 \n",
205 | "\n",
206 | " region site_no year \n",
207 | "0 I 3 1995 \n",
208 | "1 I 3 1996 \n",
209 | "2 I 3 1997 \n",
210 | "3 I 3 1998 \n",
211 | "4 I 3 1998 "
212 | ]
213 | },
214 | "execution_count": 2,
215 | "metadata": {},
216 | "output_type": "execute_result"
217 | }
218 | ],
219 | "source": [
220 | "# Read data\n",
221 | "df = pd.read_excel(\"../biolok_data_2020-08-21.xlsx\", sheet_name=\"data\")\n",
222 | "\n",
223 | "# Replace neagtive LAL with 0\n",
224 | "df[\"LAL_ug/l\"] = df[\"LAL_ug/l\"].clip(lower=0)\n",
225 | "\n",
226 | "# Add Ca and Mg\n",
227 | "df[\"ECa-Mg_uekv/l\"] = df[\"EMg_uekv/l\"] + df[\"ECa_uekv/l\"]\n",
228 | "del df[\"EMg_uekv/l\"], df[\"ECa_uekv/l\"]\n",
229 | "\n",
230 | "# Get region and site number\n",
231 | "df[[\"region\", \"site_no\"]] = df[\"biolok_code\"].str.split(\"-\", expand=True)\n",
232 | "\n",
233 | "df[\"year\"] = df[\"date\"].dt.year\n",
234 | "\n",
235 | "df.head()"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 3,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "# Annual means by station\n",
245 | "agg = df.groupby([\"station_name\", \"year\", \"region\"]).mean().reset_index()\n",
246 | "del agg[\"station_id\"], agg[\"depth1\"], agg[\"depth2\"]"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 4,
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "label_dict = {\n",
256 | " \"ESO4_uekv/l\": \"Ikke-marin $SO_4$ (µekv/l)\",\n",
257 | " \"ECa-Mg_uekv/l\": \"Ikke-marin Ca+Mg (µekv/l)\",\n",
258 | " \"ANC_uekv/l\": \"ANC (µekv/l)\",\n",
259 | " \"pH\": \"pH\",\n",
260 | " \"LAL_ug/l\": \"Labilt Al (µg/l)\",\n",
261 | " \"TOC_mg-C/l\": \"TOC (mg C/l)\",\n",
262 | "}\n",
263 | "\n",
264 | "# Pars to plot\n",
265 | "pars = [\"ESO4_uekv/l\", \"ECa-Mg_uekv/l\", \"ANC_uekv/l\", \"pH\", \"LAL_ug/l\", \"TOC_mg-C/l\"]\n",
266 | "\n",
267 | "# Loop over regions\n",
268 | "for reg in agg[\"region\"].unique():\n",
269 | " reg_df = agg.query(\"region == @reg\")\n",
270 | " del reg_df[\"region\"]\n",
271 | "\n",
272 | " # Setup plot\n",
273 | " fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(16, 9))\n",
274 | " axes = axes.flatten()\n",
275 | "\n",
276 | " # Loop over pars\n",
277 | " # max_list = []\n",
278 | " for idx, par in enumerate(pars):\n",
279 | " par_df = reg_df[[\"station_name\", \"year\", par]].copy()\n",
280 | " par_df.set_index([\"station_name\", \"year\"], inplace=True)\n",
281 | " par_df = par_df.unstack(\"station_name\")\n",
282 | " par_df.columns = par_df.columns.get_level_values(1)\n",
283 | "\n",
284 | " if len(par_df.columns) > 1:\n",
285 | " par_df[\"Snitt\"] = par_df.mean(axis=1, skipna=False)\n",
286 | "\n",
287 | " # if par in [\"ESO4_uekv/l\", \"ECa-Mg_uekv/l\", \"ANC_uekv/l\"]:\n",
288 | " # max_list.append(np.nanmax(par_df.values))\n",
289 | "\n",
290 | " # Plot each station\n",
291 | " for stn in par_df.columns:\n",
292 | " if stn == \"Snitt\":\n",
293 | " par_df[stn].dropna().plot(ax=axes[idx], style=\"-\", lw=3, c=\"k\")\n",
294 | " else:\n",
295 | " par_df[stn].dropna().plot(ax=axes[idx], style=\"--\", lw=2)\n",
296 | "\n",
297 | " # if par == 'pH':\n",
298 | " # axes[idx].set_ylim((4, 7))\n",
299 | " axes[idx].set_xlim((1985, 2020))\n",
300 | " axes[idx].set_xlabel(\"\")\n",
301 | " axes[idx].set_ylabel(label_dict[par])\n",
302 | "\n",
303 | " # for ax in [0, 1]:\n",
304 | " # axes[ax].set_ylim((0, max(max_list)))\n",
305 | " # axes[2].set_ylim(ymax=max(max_list))\n",
306 | "\n",
307 | " axes[-1].legend(loc=\"upper center\", bbox_to_anchor=(0.5, -0.1), ncol=3)\n",
308 | " plt.tight_layout()\n",
309 | "\n",
310 | " out_png = f\"./region_plots/water_chem_ts_region_{reg.lower()}.png\"\n",
311 | " plt.savefig(out_png, dpi=300)\n",
312 | " plt.close()"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "## 2. Regional Mann-Kendall"
320 | ]
321 | },
322 | {
323 | "cell_type": "code",
324 | "execution_count": 5,
325 | "metadata": {},
326 | "outputs": [
327 | {
328 | "name": "stdout",
329 | "output_type": "stream",
330 | "text": [
331 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
332 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
333 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
334 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
335 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
336 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
337 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
338 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
339 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
340 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
341 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
342 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
343 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
344 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
345 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
346 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
347 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
348 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
349 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
350 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
351 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
352 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
353 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
354 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
355 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
356 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
357 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
358 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
359 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n",
360 | "WARNING: The data series has fewer than 10 non-null values. Significance estimates may be unreliable.\n"
361 | ]
362 | },
363 | {
364 | "data": {
365 | "text/html": [
366 | "\n",
367 | "\n",
380 | "
\n",
381 | " \n",
382 | " \n",
383 | " | \n",
384 | " region | \n",
385 | " parameter | \n",
386 | " s | \n",
387 | " z | \n",
388 | " var_s | \n",
389 | " p | \n",
390 | " sslp | \n",
391 | " trend | \n",
392 | "
\n",
393 | " \n",
394 | " \n",
395 | " \n",
396 | " | 0 | \n",
397 | " I | \n",
398 | " ESO4_uekv/l | \n",
399 | " -991 | \n",
400 | " -11.6353 | \n",
401 | " 7239.67 | \n",
402 | " 0 | \n",
403 | " -0.84127 | \n",
404 | " decreasing | \n",
405 | "
\n",
406 | " \n",
407 | " | 1 | \n",
408 | " I | \n",
409 | " ECa-Mg_uekv/l | \n",
410 | " -60 | \n",
411 | " -0.693366 | \n",
412 | " 7240.67 | \n",
413 | " 0.48808 | \n",
414 | " -0.0442908 | \n",
415 | " no trend | \n",
416 | "
\n",
417 | " \n",
418 | " | 2 | \n",
419 | " I | \n",
420 | " ANC_uekv/l | \n",
421 | " 705 | \n",
422 | " 8.27282 | \n",
423 | " 7241.67 | \n",
424 | " 2.22045e-16 | \n",
425 | " 1.12356 | \n",
426 | " increasing | \n",
427 | "
\n",
428 | " \n",
429 | " | 3 | \n",
430 | " I | \n",
431 | " pH | \n",
432 | " 657 | \n",
433 | " 7.71338 | \n",
434 | " 7233 | \n",
435 | " 1.22125e-14 | \n",
436 | " 0.0152632 | \n",
437 | " increasing | \n",
438 | "
\n",
439 | " \n",
440 | " | 4 | \n",
441 | " I | \n",
442 | " LAL_ug/l | \n",
443 | " -8 | \n",
444 | " -0.0826413 | \n",
445 | " 7174.67 | \n",
446 | " 0.934137 | \n",
447 | " 0 | \n",
448 | " no trend | \n",
449 | "
\n",
450 | " \n",
451 | " | 5 | \n",
452 | " I | \n",
453 | " TOC_mg-C/l | \n",
454 | " 387 | \n",
455 | " 4.54579 | \n",
456 | " 7210.33 | \n",
457 | " 5.47289e-06 | \n",
458 | " 0.0266667 | \n",
459 | " increasing | \n",
460 | "
\n",
461 | " \n",
462 | " | 6 | \n",
463 | " IV | \n",
464 | " ESO4_uekv/l | \n",
465 | " -1679 | \n",
466 | " -14.1516 | \n",
467 | " 14059.7 | \n",
468 | " 0 | \n",
469 | " -2.06651 | \n",
470 | " decreasing | \n",
471 | "
\n",
472 | " \n",
473 | " | 7 | \n",
474 | " IV | \n",
475 | " ECa-Mg_uekv/l | \n",
476 | " -711 | \n",
477 | " -5.98785 | \n",
478 | " 14059.7 | \n",
479 | " 2.12634e-09 | \n",
480 | " -0.511123 | \n",
481 | " decreasing | \n",
482 | "
\n",
483 | " \n",
484 | " | 8 | \n",
485 | " IV | \n",
486 | " ANC_uekv/l | \n",
487 | " 1315 | \n",
488 | " 11.0817 | \n",
489 | " 14059.7 | \n",
490 | " 0 | \n",
491 | " 1.94012 | \n",
492 | " increasing | \n",
493 | "
\n",
494 | " \n",
495 | " | 9 | \n",
496 | " IV | \n",
497 | " pH | \n",
498 | " 1137 | \n",
499 | " 9.5867 | \n",
500 | " 14041.7 | \n",
501 | " 0 | \n",
502 | " 0.0217647 | \n",
503 | " increasing | \n",
504 | "
\n",
505 | " \n",
506 | " | 10 | \n",
507 | " IV | \n",
508 | " LAL_ug/l | \n",
509 | " -1066 | \n",
510 | " -8.9855 | \n",
511 | " 14048 | \n",
512 | " 0 | \n",
513 | " -2.66667 | \n",
514 | " decreasing | \n",
515 | "
\n",
516 | " \n",
517 | " | 11 | \n",
518 | " IV | \n",
519 | " TOC_mg-C/l | \n",
520 | " 981 | \n",
521 | " 8.27061 | \n",
522 | " 14040.3 | \n",
523 | " 2.22045e-16 | \n",
524 | " 0.0918182 | \n",
525 | " increasing | \n",
526 | "
\n",
527 | " \n",
528 | " | 12 | \n",
529 | " II | \n",
530 | " ESO4_uekv/l | \n",
531 | " -2320 | \n",
532 | " -16.0698 | \n",
533 | " 20824.7 | \n",
534 | " 0 | \n",
535 | " -2.26966 | \n",
536 | " decreasing | \n",
537 | "
\n",
538 | " \n",
539 | " | 13 | \n",
540 | " II | \n",
541 | " ECa-Mg_uekv/l | \n",
542 | " -1546 | \n",
543 | " -10.7058 | \n",
544 | " 20826.7 | \n",
545 | " 0 | \n",
546 | " -0.896604 | \n",
547 | " decreasing | \n",
548 | "
\n",
549 | " \n",
550 | " | 14 | \n",
551 | " II | \n",
552 | " ANC_uekv/l | \n",
553 | " 1788 | \n",
554 | " 12.3827 | \n",
555 | " 20826.7 | \n",
556 | " 0 | \n",
557 | " 1.75421 | \n",
558 | " increasing | \n",
559 | "
\n",
560 | " \n",
561 | " | 15 | \n",
562 | " II | \n",
563 | " pH | \n",
564 | " 1279 | \n",
565 | " 8.8597 | \n",
566 | " 20807.7 | \n",
567 | " 0 | \n",
568 | " 0.0142857 | \n",
569 | " increasing | \n",
570 | "
\n",
571 | " \n",
572 | " | 16 | \n",
573 | " II | \n",
574 | " LAL_ug/l | \n",
575 | " -1201 | \n",
576 | " -8.32044 | \n",
577 | " 20800.3 | \n",
578 | " 0 | \n",
579 | " -2 | \n",
580 | " decreasing | \n",
581 | "
\n",
582 | " \n",
583 | " | 17 | \n",
584 | " II | \n",
585 | " TOC_mg-C/l | \n",
586 | " 1113 | \n",
587 | " 7.71126 | \n",
588 | " 20795 | \n",
589 | " 1.24345e-14 | \n",
590 | " 0.0861481 | \n",
591 | " increasing | \n",
592 | "
\n",
593 | " \n",
594 | " | 18 | \n",
595 | " X | \n",
596 | " ESO4_uekv/l | \n",
597 | " -720 | \n",
598 | " -8.66664 | \n",
599 | " 6882.67 | \n",
600 | " 0 | \n",
601 | " -1.33067 | \n",
602 | " decreasing | \n",
603 | "
\n",
604 | " \n",
605 | " | 19 | \n",
606 | " X | \n",
607 | " ECa-Mg_uekv/l | \n",
608 | " 54 | \n",
609 | " 0.638848 | \n",
610 | " 6882.67 | \n",
611 | " 0.522922 | \n",
612 | " 0.0802304 | \n",
613 | " no trend | \n",
614 | "
\n",
615 | " \n",
616 | " | 20 | \n",
617 | " X | \n",
618 | " ANC_uekv/l | \n",
619 | " 700 | \n",
620 | " 8.42556 | \n",
621 | " 6882.67 | \n",
622 | " 0 | \n",
623 | " 1.92631 | \n",
624 | " increasing | \n",
625 | "
\n",
626 | " \n",
627 | " | 21 | \n",
628 | " X | \n",
629 | " pH | \n",
630 | " 693 | \n",
631 | " 8.34867 | \n",
632 | " 6870.33 | \n",
633 | " 0 | \n",
634 | " 0.02 | \n",
635 | " increasing | \n",
636 | "
\n",
637 | " \n",
638 | " | 22 | \n",
639 | " X | \n",
640 | " LAL_ug/l | \n",
641 | " 14 | \n",
642 | " 0.160384 | \n",
643 | " 6570 | \n",
644 | " 0.872579 | \n",
645 | " 0 | \n",
646 | " no trend | \n",
647 | "
\n",
648 | " \n",
649 | " | 23 | \n",
650 | " X | \n",
651 | " TOC_mg-C/l | \n",
652 | " 321 | \n",
653 | " 3.87459 | \n",
654 | " 6821 | \n",
655 | " 0.000106804 | \n",
656 | " 0.0131373 | \n",
657 | " increasing | \n",
658 | "
\n",
659 | " \n",
660 | " | 24 | \n",
661 | " V | \n",
662 | " ESO4_uekv/l | \n",
663 | " -1374 | \n",
664 | " -12.6771 | \n",
665 | " 11730 | \n",
666 | " 0 | \n",
667 | " -1.64888 | \n",
668 | " decreasing | \n",
669 | "
\n",
670 | " \n",
671 | " | 25 | \n",
672 | " V | \n",
673 | " ECa-Mg_uekv/l | \n",
674 | " -520 | \n",
675 | " -4.79202 | \n",
676 | " 11730 | \n",
677 | " 1.65113e-06 | \n",
678 | " -0.351444 | \n",
679 | " decreasing | \n",
680 | "
\n",
681 | " \n",
682 | " | 26 | \n",
683 | " V | \n",
684 | " ANC_uekv/l | \n",
685 | " 978 | \n",
686 | " 9.02081 | \n",
687 | " 11730 | \n",
688 | " 0 | \n",
689 | " 1.88999 | \n",
690 | " increasing | \n",
691 | "
\n",
692 | " \n",
693 | " | 27 | \n",
694 | " V | \n",
695 | " pH | \n",
696 | " 998 | \n",
697 | " 9.21123 | \n",
698 | " 11715.3 | \n",
699 | " 0 | \n",
700 | " 0.0215686 | \n",
701 | " increasing | \n",
702 | "
\n",
703 | " \n",
704 | " | 28 | \n",
705 | " V | \n",
706 | " LAL_ug/l | \n",
707 | " -899 | \n",
708 | " -8.29575 | \n",
709 | " 11717.7 | \n",
710 | " 0 | \n",
711 | " -2.83333 | \n",
712 | " decreasing | \n",
713 | "
\n",
714 | " \n",
715 | " | 29 | \n",
716 | " V | \n",
717 | " TOC_mg-C/l | \n",
718 | " 950 | \n",
719 | " 8.77201 | \n",
720 | " 11704 | \n",
721 | " 0 | \n",
722 | " 0.0361111 | \n",
723 | " increasing | \n",
724 | "
\n",
725 | " \n",
726 | " | 30 | \n",
727 | " III | \n",
728 | " ESO4_uekv/l | \n",
729 | " -671 | \n",
730 | " -9.23721 | \n",
731 | " 5261 | \n",
732 | " 0 | \n",
733 | " -0.862402 | \n",
734 | " decreasing | \n",
735 | "
\n",
736 | " \n",
737 | " | 31 | \n",
738 | " III | \n",
739 | " ECa-Mg_uekv/l | \n",
740 | " -87 | \n",
741 | " -1.18567 | \n",
742 | " 5261 | \n",
743 | " 0.235752 | \n",
744 | " -0.105936 | \n",
745 | " no trend | \n",
746 | "
\n",
747 | " \n",
748 | " | 32 | \n",
749 | " III | \n",
750 | " ANC_uekv/l | \n",
751 | " 524 | \n",
752 | " 7.20985 | \n",
753 | " 5262 | \n",
754 | " 5.60219e-13 | \n",
755 | " 1.04387 | \n",
756 | " increasing | \n",
757 | "
\n",
758 | " \n",
759 | " | 33 | \n",
760 | " III | \n",
761 | " pH | \n",
762 | " 490 | \n",
763 | " 6.74371 | \n",
764 | " 5258 | \n",
765 | " 1.54399e-11 | \n",
766 | " 0.0233333 | \n",
767 | " increasing | \n",
768 | "
\n",
769 | " \n",
770 | " | 34 | \n",
771 | " III | \n",
772 | " LAL_ug/l | \n",
773 | " -208 | \n",
774 | " -2.8669 | \n",
775 | " 5213.33 | \n",
776 | " 0.00414514 | \n",
777 | " -0.140688 | \n",
778 | " decreasing | \n",
779 | "
\n",
780 | " \n",
781 | " | 35 | \n",
782 | " III | \n",
783 | " TOC_mg-C/l | \n",
784 | " 324 | \n",
785 | " 4.45443 | \n",
786 | " 5258 | \n",
787 | " 8.4116e-06 | \n",
788 | " 0.0103509 | \n",
789 | " increasing | \n",
790 | "
\n",
791 | " \n",
792 | " | 36 | \n",
793 | " VII | \n",
794 | " ESO4_uekv/l | \n",
795 | " -1209 | \n",
796 | " -11.5068 | \n",
797 | " 11021 | \n",
798 | " 0 | \n",
799 | " -0.424367 | \n",
800 | " decreasing | \n",
801 | "
\n",
802 | " \n",
803 | " | 37 | \n",
804 | " VII | \n",
805 | " ECa-Mg_uekv/l | \n",
806 | " 16 | \n",
807 | " 0.142877 | \n",
808 | " 11022 | \n",
809 | " 0.886388 | \n",
810 | " 0.00263083 | \n",
811 | " no trend | \n",
812 | "
\n",
813 | " \n",
814 | " | 38 | \n",
815 | " VII | \n",
816 | " ANC_uekv/l | \n",
817 | " 941 | \n",
818 | " 8.95319 | \n",
819 | " 11023 | \n",
820 | " 0 | \n",
821 | " 0.646314 | \n",
822 | " increasing | \n",
823 | "
\n",
824 | " \n",
825 | " | 39 | \n",
826 | " VII | \n",
827 | " pH | \n",
828 | " 1020 | \n",
829 | " 9.70961 | \n",
830 | " 11014 | \n",
831 | " 0 | \n",
832 | " 0.0188889 | \n",
833 | " increasing | \n",
834 | "
\n",
835 | " \n",
836 | " | 40 | \n",
837 | " VII | \n",
838 | " LAL_ug/l | \n",
839 | " -588 | \n",
840 | " -5.60328 | \n",
841 | " 10974.7 | \n",
842 | " 2.10331e-08 | \n",
843 | " -0.333333 | \n",
844 | " decreasing | \n",
845 | "
\n",
846 | " \n",
847 | " | 41 | \n",
848 | " VII | \n",
849 | " TOC_mg-C/l | \n",
850 | " 60 | \n",
851 | " 0.56239 | \n",
852 | " 11006 | \n",
853 | " 0.573851 | \n",
854 | " 0.00102564 | \n",
855 | " no trend | \n",
856 | "
\n",
857 | " \n",
858 | " | 42 | \n",
859 | " VI | \n",
860 | " ESO4_uekv/l | \n",
861 | " -892 | \n",
862 | " -9.70852 | \n",
863 | " 8422.67 | \n",
864 | " 0 | \n",
865 | " -0.906736 | \n",
866 | " decreasing | \n",
867 | "
\n",
868 | " \n",
869 | " | 43 | \n",
870 | " VI | \n",
871 | " ECa-Mg_uekv/l | \n",
872 | " -150 | \n",
873 | " -1.62353 | \n",
874 | " 8422.67 | \n",
875 | " 0.104475 | \n",
876 | " -0.104055 | \n",
877 | " no trend | \n",
878 | "
\n",
879 | " \n",
880 | " | 44 | \n",
881 | " VI | \n",
882 | " ANC_uekv/l | \n",
883 | " 704 | \n",
884 | " 7.66003 | \n",
885 | " 8422.67 | \n",
886 | " 1.86517e-14 | \n",
887 | " 1.11682 | \n",
888 | " increasing | \n",
889 | "
\n",
890 | " \n",
891 | " | 45 | \n",
892 | " VI | \n",
893 | " pH | \n",
894 | " 730 | \n",
895 | " 7.94522 | \n",
896 | " 8418.67 | \n",
897 | " 1.9984e-15 | \n",
898 | " 0.0269048 | \n",
899 | " increasing | \n",
900 | "
\n",
901 | " \n",
902 | " | 46 | \n",
903 | " VI | \n",
904 | " LAL_ug/l | \n",
905 | " -651 | \n",
906 | " -7.09391 | \n",
907 | " 8395.67 | \n",
908 | " 1.30385e-12 | \n",
909 | " -1.5 | \n",
910 | " decreasing | \n",
911 | "
\n",
912 | " \n",
913 | " | 47 | \n",
914 | " VI | \n",
915 | " TOC_mg-C/l | \n",
916 | " 565 | \n",
917 | " 6.15533 | \n",
918 | " 8395.67 | \n",
919 | " 7.49202e-10 | \n",
920 | " 0.020875 | \n",
921 | " increasing | \n",
922 | "
\n",
923 | " \n",
924 | " | 48 | \n",
925 | " IX | \n",
926 | " ESO4_uekv/l | \n",
927 | " -34 | \n",
928 | " -0.996498 | \n",
929 | " 1096.67 | \n",
930 | " 0.319008 | \n",
931 | " -0.0524026 | \n",
932 | " no trend | \n",
933 | "
\n",
934 | " \n",
935 | " | 49 | \n",
936 | " IX | \n",
937 | " ECa-Mg_uekv/l | \n",
938 | " 90 | \n",
939 | " 2.68753 | \n",
940 | " 1096.67 | \n",
941 | " 0.00719835 | \n",
942 | " 0.436464 | \n",
943 | " increasing | \n",
944 | "
\n",
945 | " \n",
946 | " | 50 | \n",
947 | " IX | \n",
948 | " ANC_uekv/l | \n",
949 | " 90 | \n",
950 | " 2.68753 | \n",
951 | " 1096.67 | \n",
952 | " 0.00719835 | \n",
953 | " 0.484974 | \n",
954 | " increasing | \n",
955 | "
\n",
956 | " \n",
957 | " | 51 | \n",
958 | " IX | \n",
959 | " pH | \n",
960 | " 66 | \n",
961 | " 1.96459 | \n",
962 | " 1094.67 | \n",
963 | " 0.0494615 | \n",
964 | " 0.00563492 | \n",
965 | " increasing | \n",
966 | "
\n",
967 | " \n",
968 | " | 52 | \n",
969 | " IX | \n",
970 | " LAL_ug/l | \n",
971 | " 87 | \n",
972 | " 2.60926 | \n",
973 | " 1086.33 | \n",
974 | " 0.0090739 | \n",
975 | " 0.181818 | \n",
976 | " increasing | \n",
977 | "
\n",
978 | " \n",
979 | " | 53 | \n",
980 | " IX | \n",
981 | " TOC_mg-C/l | \n",
982 | " 41 | \n",
983 | " 1.20843 | \n",
984 | " 1095.67 | \n",
985 | " 0.226883 | \n",
986 | " 0.00325397 | \n",
987 | " no trend | \n",
988 | "
\n",
989 | " \n",
990 | " | 54 | \n",
991 | " VIII | \n",
992 | " ESO4_uekv/l | \n",
993 | " -469 | \n",
994 | " -5.20439 | \n",
995 | " 8086.33 | \n",
996 | " 1.94632e-07 | \n",
997 | " -0.203925 | \n",
998 | " decreasing | \n",
999 | "
\n",
1000 | " \n",
1001 | " | 55 | \n",
1002 | " VIII | \n",
1003 | " ECa-Mg_uekv/l | \n",
1004 | " 265 | \n",
1005 | " 2.9346 | \n",
1006 | " 8093 | \n",
1007 | " 0.00333976 | \n",
1008 | " 0.209264 | \n",
1009 | " increasing | \n",
1010 | "
\n",
1011 | " \n",
1012 | " | 56 | \n",
1013 | " VIII | \n",
1014 | " ANC_uekv/l | \n",
1015 | " 613 | \n",
1016 | " 6.80294 | \n",
1017 | " 8093 | \n",
1018 | " 1.02505e-11 | \n",
1019 | " 0.729263 | \n",
1020 | " increasing | \n",
1021 | "
\n",
1022 | " \n",
1023 | " | 57 | \n",
1024 | " VIII | \n",
1025 | " pH | \n",
1026 | " 397 | \n",
1027 | " 4.40372 | \n",
1028 | " 8086.33 | \n",
1029 | " 1.06412e-05 | \n",
1030 | " 0.00923077 | \n",
1031 | " increasing | \n",
1032 | "
\n",
1033 | " \n",
1034 | " | 58 | \n",
1035 | " VIII | \n",
1036 | " LAL_ug/l | \n",
1037 | " 289 | \n",
1038 | " 3.2735 | \n",
1039 | " 7740.33 | \n",
1040 | " 0.00106224 | \n",
1041 | " 0.0333333 | \n",
1042 | " increasing | \n",
1043 | "
\n",
1044 | " \n",
1045 | " | 59 | \n",
1046 | " VIII | \n",
1047 | " TOC_mg-C/l | \n",
1048 | " 196 | \n",
1049 | " 2.17042 | \n",
1050 | " 8072 | \n",
1051 | " 0.0299749 | \n",
1052 | " 0.0047 | \n",
1053 | " increasing | \n",
1054 | "
\n",
1055 | " \n",
1056 | "
\n",
1057 | "
"
1058 | ],
1059 | "text/plain": [
1060 | " region parameter s z var_s p sslp \\\n",
1061 | "0 I ESO4_uekv/l -991 -11.6353 7239.67 0 -0.84127 \n",
1062 | "1 I ECa-Mg_uekv/l -60 -0.693366 7240.67 0.48808 -0.0442908 \n",
1063 | "2 I ANC_uekv/l 705 8.27282 7241.67 2.22045e-16 1.12356 \n",
1064 | "3 I pH 657 7.71338 7233 1.22125e-14 0.0152632 \n",
1065 | "4 I LAL_ug/l -8 -0.0826413 7174.67 0.934137 0 \n",
1066 | "5 I TOC_mg-C/l 387 4.54579 7210.33 5.47289e-06 0.0266667 \n",
1067 | "6 IV ESO4_uekv/l -1679 -14.1516 14059.7 0 -2.06651 \n",
1068 | "7 IV ECa-Mg_uekv/l -711 -5.98785 14059.7 2.12634e-09 -0.511123 \n",
1069 | "8 IV ANC_uekv/l 1315 11.0817 14059.7 0 1.94012 \n",
1070 | "9 IV pH 1137 9.5867 14041.7 0 0.0217647 \n",
1071 | "10 IV LAL_ug/l -1066 -8.9855 14048 0 -2.66667 \n",
1072 | "11 IV TOC_mg-C/l 981 8.27061 14040.3 2.22045e-16 0.0918182 \n",
1073 | "12 II ESO4_uekv/l -2320 -16.0698 20824.7 0 -2.26966 \n",
1074 | "13 II ECa-Mg_uekv/l -1546 -10.7058 20826.7 0 -0.896604 \n",
1075 | "14 II ANC_uekv/l 1788 12.3827 20826.7 0 1.75421 \n",
1076 | "15 II pH 1279 8.8597 20807.7 0 0.0142857 \n",
1077 | "16 II LAL_ug/l -1201 -8.32044 20800.3 0 -2 \n",
1078 | "17 II TOC_mg-C/l 1113 7.71126 20795 1.24345e-14 0.0861481 \n",
1079 | "18 X ESO4_uekv/l -720 -8.66664 6882.67 0 -1.33067 \n",
1080 | "19 X ECa-Mg_uekv/l 54 0.638848 6882.67 0.522922 0.0802304 \n",
1081 | "20 X ANC_uekv/l 700 8.42556 6882.67 0 1.92631 \n",
1082 | "21 X pH 693 8.34867 6870.33 0 0.02 \n",
1083 | "22 X LAL_ug/l 14 0.160384 6570 0.872579 0 \n",
1084 | "23 X TOC_mg-C/l 321 3.87459 6821 0.000106804 0.0131373 \n",
1085 | "24 V ESO4_uekv/l -1374 -12.6771 11730 0 -1.64888 \n",
1086 | "25 V ECa-Mg_uekv/l -520 -4.79202 11730 1.65113e-06 -0.351444 \n",
1087 | "26 V ANC_uekv/l 978 9.02081 11730 0 1.88999 \n",
1088 | "27 V pH 998 9.21123 11715.3 0 0.0215686 \n",
1089 | "28 V LAL_ug/l -899 -8.29575 11717.7 0 -2.83333 \n",
1090 | "29 V TOC_mg-C/l 950 8.77201 11704 0 0.0361111 \n",
1091 | "30 III ESO4_uekv/l -671 -9.23721 5261 0 -0.862402 \n",
1092 | "31 III ECa-Mg_uekv/l -87 -1.18567 5261 0.235752 -0.105936 \n",
1093 | "32 III ANC_uekv/l 524 7.20985 5262 5.60219e-13 1.04387 \n",
1094 | "33 III pH 490 6.74371 5258 1.54399e-11 0.0233333 \n",
1095 | "34 III LAL_ug/l -208 -2.8669 5213.33 0.00414514 -0.140688 \n",
1096 | "35 III TOC_mg-C/l 324 4.45443 5258 8.4116e-06 0.0103509 \n",
1097 | "36 VII ESO4_uekv/l -1209 -11.5068 11021 0 -0.424367 \n",
1098 | "37 VII ECa-Mg_uekv/l 16 0.142877 11022 0.886388 0.00263083 \n",
1099 | "38 VII ANC_uekv/l 941 8.95319 11023 0 0.646314 \n",
1100 | "39 VII pH 1020 9.70961 11014 0 0.0188889 \n",
1101 | "40 VII LAL_ug/l -588 -5.60328 10974.7 2.10331e-08 -0.333333 \n",
1102 | "41 VII TOC_mg-C/l 60 0.56239 11006 0.573851 0.00102564 \n",
1103 | "42 VI ESO4_uekv/l -892 -9.70852 8422.67 0 -0.906736 \n",
1104 | "43 VI ECa-Mg_uekv/l -150 -1.62353 8422.67 0.104475 -0.104055 \n",
1105 | "44 VI ANC_uekv/l 704 7.66003 8422.67 1.86517e-14 1.11682 \n",
1106 | "45 VI pH 730 7.94522 8418.67 1.9984e-15 0.0269048 \n",
1107 | "46 VI LAL_ug/l -651 -7.09391 8395.67 1.30385e-12 -1.5 \n",
1108 | "47 VI TOC_mg-C/l 565 6.15533 8395.67 7.49202e-10 0.020875 \n",
1109 | "48 IX ESO4_uekv/l -34 -0.996498 1096.67 0.319008 -0.0524026 \n",
1110 | "49 IX ECa-Mg_uekv/l 90 2.68753 1096.67 0.00719835 0.436464 \n",
1111 | "50 IX ANC_uekv/l 90 2.68753 1096.67 0.00719835 0.484974 \n",
1112 | "51 IX pH 66 1.96459 1094.67 0.0494615 0.00563492 \n",
1113 | "52 IX LAL_ug/l 87 2.60926 1086.33 0.0090739 0.181818 \n",
1114 | "53 IX TOC_mg-C/l 41 1.20843 1095.67 0.226883 0.00325397 \n",
1115 | "54 VIII ESO4_uekv/l -469 -5.20439 8086.33 1.94632e-07 -0.203925 \n",
1116 | "55 VIII ECa-Mg_uekv/l 265 2.9346 8093 0.00333976 0.209264 \n",
1117 | "56 VIII ANC_uekv/l 613 6.80294 8093 1.02505e-11 0.729263 \n",
1118 | "57 VIII pH 397 4.40372 8086.33 1.06412e-05 0.00923077 \n",
1119 | "58 VIII LAL_ug/l 289 3.2735 7740.33 0.00106224 0.0333333 \n",
1120 | "59 VIII TOC_mg-C/l 196 2.17042 8072 0.0299749 0.0047 \n",
1121 | "\n",
1122 | " trend \n",
1123 | "0 decreasing \n",
1124 | "1 no trend \n",
1125 | "2 increasing \n",
1126 | "3 increasing \n",
1127 | "4 no trend \n",
1128 | "5 increasing \n",
1129 | "6 decreasing \n",
1130 | "7 decreasing \n",
1131 | "8 increasing \n",
1132 | "9 increasing \n",
1133 | "10 decreasing \n",
1134 | "11 increasing \n",
1135 | "12 decreasing \n",
1136 | "13 decreasing \n",
1137 | "14 increasing \n",
1138 | "15 increasing \n",
1139 | "16 decreasing \n",
1140 | "17 increasing \n",
1141 | "18 decreasing \n",
1142 | "19 no trend \n",
1143 | "20 increasing \n",
1144 | "21 increasing \n",
1145 | "22 no trend \n",
1146 | "23 increasing \n",
1147 | "24 decreasing \n",
1148 | "25 decreasing \n",
1149 | "26 increasing \n",
1150 | "27 increasing \n",
1151 | "28 decreasing \n",
1152 | "29 increasing \n",
1153 | "30 decreasing \n",
1154 | "31 no trend \n",
1155 | "32 increasing \n",
1156 | "33 increasing \n",
1157 | "34 decreasing \n",
1158 | "35 increasing \n",
1159 | "36 decreasing \n",
1160 | "37 no trend \n",
1161 | "38 increasing \n",
1162 | "39 increasing \n",
1163 | "40 decreasing \n",
1164 | "41 no trend \n",
1165 | "42 decreasing \n",
1166 | "43 no trend \n",
1167 | "44 increasing \n",
1168 | "45 increasing \n",
1169 | "46 decreasing \n",
1170 | "47 increasing \n",
1171 | "48 no trend \n",
1172 | "49 increasing \n",
1173 | "50 increasing \n",
1174 | "51 increasing \n",
1175 | "52 increasing \n",
1176 | "53 no trend \n",
1177 | "54 decreasing \n",
1178 | "55 increasing \n",
1179 | "56 increasing \n",
1180 | "57 increasing \n",
1181 | "58 increasing \n",
1182 | "59 increasing "
1183 | ]
1184 | },
1185 | "execution_count": 5,
1186 | "metadata": {},
1187 | "output_type": "execute_result"
1188 | }
1189 | ],
1190 | "source": [
1191 | "# Pars to plot\n",
1192 | "pars = [\"ESO4_uekv/l\", \"ECa-Mg_uekv/l\", \"ANC_uekv/l\", \"pH\", \"LAL_ug/l\", \"TOC_mg-C/l\"]\n",
1193 | "\n",
1194 | "res_list = []\n",
1195 | "\n",
1196 | "# Loop over regions\n",
1197 | "for reg in agg[\"region\"].unique():\n",
1198 | " reg_df = agg.query(\"region == @reg\")\n",
1199 | "\n",
1200 | " # Loop over pars\n",
1201 | " for par in pars:\n",
1202 | " res_df = nivapy.stats.seasonal_regional_mk_sen(\n",
1203 | " reg_df, time_col=\"year\", value_col=par, block_col=\"station_name\"\n",
1204 | " ).T.loc['value']\n",
1205 | " res_df['region'] = reg\n",
1206 | " res_df['parameter'] = par\n",
1207 | " res_list.append(res_df)\n",
1208 | "\n",
1209 | "res_df = pd.concat(res_list, axis=1, sort=False).T.reset_index(drop=True)\n",
1210 | "res_df = res_df[['region', 'parameter', 's', 'z', 'var_s', 'p', 'sslp', 'trend']]\n",
1211 | "res_df.to_csv('regional_mk_trends.csv', index=False)\n",
1212 | "\n",
1213 | "res_df"
1214 | ]
1215 | }
1216 | ],
1217 | "metadata": {
1218 | "kernelspec": {
1219 | "display_name": "Python 3",
1220 | "language": "python",
1221 | "name": "python3"
1222 | },
1223 | "language_info": {
1224 | "codemirror_mode": {
1225 | "name": "ipython",
1226 | "version": 3
1227 | },
1228 | "file_extension": ".py",
1229 | "mimetype": "text/x-python",
1230 | "name": "python",
1231 | "nbconvert_exporter": "python",
1232 | "pygments_lexer": "ipython3",
1233 | "version": "3.7.6"
1234 | }
1235 | },
1236 | "nbformat": 4,
1237 | "nbformat_minor": 4
1238 | }
1239 |
--------------------------------------------------------------------------------
/notebooks/update_db_2018_report.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "%matplotlib inline\n",
10 | "\n",
11 | "import pandas as pd\n",
12 | "import nivapy3 as nivapy\n",
13 | "import matplotlib.pyplot as plt\n",
14 | "import datetime as dt\n",
15 | "import numpy as np\n",
16 | "from sqlalchemy import text\n",
17 | "\n",
18 | "plt.style.use('ggplot')"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 6,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "name": "stdin",
28 | "output_type": "stream",
29 | "text": [
30 | "Username: ···\n",
31 | "Password: ········\n"
32 | ]
33 | },
34 | {
35 | "name": "stdout",
36 | "output_type": "stream",
37 | "text": [
38 | "Connection successful.\n"
39 | ]
40 | }
41 | ],
42 | "source": [
43 | "# Connect to Oracle\n",
44 | "eng = nivapy.da.connect()"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "# Biolok: updates for the 2018 report\n",
52 | "\n",
53 | "This notebook describes data processing and RESA2 updates for 2018 Biolok report. Data sources are as follows:\n",
54 | "\n",
55 | " * All data prior to 2015 are stored in RESA2 as a **dataset** named `BIOLOK rapp2015` (project ID 4030). This dataset comprisies 43 stations, but only a subset of water samples from each site are included. Relevant data can be exported via the RESA2 application, but it is important to **remember to check the box** labelled *Use only water samples from selected projects*, otherwise all samples will be returned. Within the RESA2 database, relevant samples are associated with `SAMPLE_SELECTION_ID` 62 in the `RESA2.SAMPLE_SELECTIONS` table\n",
56 | " \n",
57 | " * From 2015 onwards, the project comprises 42 stations (two have been removed from the original project and one added). Most of the data are stored in both RESA2 and Aquamonitor, but we also receive data from an external laboratory named Faun. Liv Bente has compiled a spreadsheet here\n",
58 | " \n",
59 | " K:\\Prosjekter\\langtransporterte forurensninger\\O-190033-7 BIOLOK_rapp 2014-18\\Samlede prøver ulike kilder_til RESA_JES_nov19.xlsx\n",
60 | " \n",
61 | " listing relevant samples since 2015\n",
62 | " \n",
63 | "The general workflow is as follows:\n",
64 | "\n",
65 | " 1. Create a new dataset for the 2018 report in the `RESA2.PROJECTS` table\n",
66 | " \n",
67 | " 2. Add the 42 stations in the current project to `PROJECTS_STATIONS`\n",
68 | " \n",
69 | " 3. Create a new `SAMPLE_SELECTION_ID` in `RESA2.SAMPLE_SELECTION_DEFINITIONS`\n",
70 | " \n",
71 | " 4. Update the new sample selection in `RESA2.SAMPLE_SELECTIONS` to include pre-2015 water samples from the 41 \"common\" stations\n",
72 | " \n",
73 | " 5. Check whether all non-Faun samples are already present in RESA, or whether some data need to be transferred from AM\n",
74 | " \n",
75 | " 6. Update the new sample selection to include relevant samples from 2015 onwards for the 41 common sites (based on Liv Bente's spreadsheet)\n",
76 | " \n",
77 | " 7. Also add relevant samples from all years for Stakksheitjørna, which is the new station added to the project since 2014 (based on Liv Bente's spreadsheet)\n",
78 | " \n",
79 | " 8. Add data from Faun to RESA and link to the sample selection (based on Liv Bente's spreadsheet)"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "## 1. Create dataset\n",
87 | "\n",
88 | "There is no real distinction between \"projects\" and \"datasets\" in RESA: both are stored in the `PROJECTS` table. The column named `PROJECT_OR_DATASET` in this table can be used to determine which table the project/dataset is shown in on the *Select project* window of the RESA2 application.\n",
89 | "\n",
90 | "**The new project created below has `PROJECT_ID` 4490**."
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 3,
96 | "metadata": {},
97 | "outputs": [],
98 | "source": [
99 | "## Create a new dataset\n",
100 | "#sql = (\"INSERT INTO resa2.projects \"\n",
101 | "# \" (project_name, contact_person, project_or_dataset, \"\n",
102 | "# \" contact_person_initials, project_description) \"\n",
103 | "# \"VALUES \"\n",
104 | "# \" ('BIOLOK rapp2018', 'Liv Bente Skancke', 'Dataset', \"\n",
105 | "# \" 'LBS', 'Stations included in the Biolok report (data up to 2018; written during 2020)')\"\n",
106 | "# )\n",
107 | "#eng.execute(sql)"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "metadata": {},
113 | "source": [
114 | "## 2. Add stations\n",
115 | "\n",
116 | "The stations involved in the project since 2015 are listed in the `stations` worksheet of `../project_overview_from_lbs.xlsx`."
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 4,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/html": [
127 | "\n",
128 | "\n",
141 | "
\n",
142 | " \n",
143 | " \n",
144 | " | \n",
145 | " station_id | \n",
146 | " station_code | \n",
147 | " station_name | \n",
148 | " region | \n",
149 | " biolok_code | \n",
150 | " nve_vatn_nr | \n",
151 | " elevation_m | \n",
152 | " fylke | \n",
153 | " kommune | \n",
154 | "
\n",
155 | " \n",
156 | " \n",
157 | " \n",
158 | " | 0 | \n",
159 | " 42 | \n",
160 | " 1003-2-4 | \n",
161 | " Saudlandsvatnet | \n",
162 | " 5 | \n",
163 | " V-1 | \n",
164 | " 21894 | \n",
165 | " 106.0 | \n",
166 | " Vest-Agder | \n",
167 | " Farsund kommune | \n",
168 | "
\n",
169 | " \n",
170 | " | 1 | \n",
171 | " 192 | \n",
172 | " 1014-12 | \n",
173 | " Sognevatn | \n",
174 | " 4 | \n",
175 | " IV-9 | \n",
176 | " 11078 | \n",
177 | " 267.0 | \n",
178 | " Vest-Agder | \n",
179 | " Vennesla kommune | \n",
180 | "
\n",
181 | " \n",
182 | " | 2 | \n",
183 | " 182 | \n",
184 | " 1014-25 | \n",
185 | " Drivenesvatn | \n",
186 | " 4 | \n",
187 | " IV-8 | \n",
188 | " 11147 | \n",
189 | " 176.0 | \n",
190 | " Vest-Agder | \n",
191 | " Vennesla kommune | \n",
192 | "
\n",
193 | " \n",
194 | " | 3 | \n",
195 | " 166 | \n",
196 | " 1018-4 | \n",
197 | " Kleivsetvannet | \n",
198 | " 4 | \n",
199 | " IV-10 | \n",
200 | " 11592 | \n",
201 | " 93.0 | \n",
202 | " Vest-Agder | \n",
203 | " Søgne kommune | \n",
204 | "
\n",
205 | " \n",
206 | " | 4 | \n",
207 | " 118 | \n",
208 | " 1034-19 | \n",
209 | " Indre Espelandsvatnet | \n",
210 | " 5 | \n",
211 | " V-2 | \n",
212 | " 11095 | \n",
213 | " 389.0 | \n",
214 | " Vest-Agder | \n",
215 | " Hægebostad kommune | \n",
216 | "
\n",
217 | " \n",
218 | "
\n",
219 | "
"
220 | ],
221 | "text/plain": [
222 | " station_id station_code station_name region biolok_code \\\n",
223 | "0 42 1003-2-4 Saudlandsvatnet 5 V-1 \n",
224 | "1 192 1014-12 Sognevatn 4 IV-9 \n",
225 | "2 182 1014-25 Drivenesvatn 4 IV-8 \n",
226 | "3 166 1018-4 Kleivsetvannet 4 IV-10 \n",
227 | "4 118 1034-19 Indre Espelandsvatnet 5 V-2 \n",
228 | "\n",
229 | " nve_vatn_nr elevation_m fylke kommune \n",
230 | "0 21894 106.0 Vest-Agder Farsund kommune \n",
231 | "1 11078 267.0 Vest-Agder Vennesla kommune \n",
232 | "2 11147 176.0 Vest-Agder Vennesla kommune \n",
233 | "3 11592 93.0 Vest-Agder Søgne kommune \n",
234 | "4 11095 389.0 Vest-Agder Hægebostad kommune "
235 | ]
236 | },
237 | "execution_count": 4,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "# Read stations\n",
244 | "stn_df = pd.read_excel('../project_overview_from_lbs.xlsx',\n",
245 | " sheet_name='stations')\n",
246 | "stn_df.head()"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 5,
252 | "metadata": {},
253 | "outputs": [],
254 | "source": [
255 | "## Refromat for adding to database\n",
256 | "#stn_df2 = stn_df.copy()\n",
257 | "#stn_df2['project_id'] = 4490\n",
258 | "#stn_df2['active'] = 'Y'\n",
259 | "#stn_df2 = stn_df2[['station_id', 'project_id', 'active', 'biolok_code']]\n",
260 | "#stn_df2.rename({'biolok_code':'station_code'}, axis=1, inplace=True)\n",
261 | "#\n",
262 | "## Write to db\n",
263 | "#stn_df2.to_sql('projects_stations', \n",
264 | "# eng, \n",
265 | "# schema='RESA2', \n",
266 | "# if_exists='append', \n",
267 | "# index=False,\n",
268 | "# )"
269 | ]
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "## 3. Create a new sample selection"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 6,
281 | "metadata": {},
282 | "outputs": [],
283 | "source": [
284 | "## Create sample selection\n",
285 | "#sql = (\"INSERT INTO resa2.sample_selection_definitions \"\n",
286 | "# \" (sample_selection_id, project_id, name) \"\n",
287 | "# \"VALUES \"\n",
288 | "# \" (66, 4490, 'Biolok (start to 2018)')\"\n",
289 | "# )\n",
290 | "#eng.execute(sql)"
291 | ]
292 | },
293 | {
294 | "cell_type": "markdown",
295 | "metadata": {},
296 | "source": [
297 | "## 4. Add pre-2015 water samples\n",
298 | "\n",
299 | "The code below first queries all water samples associated with the 2015 report, which includes two stations that are no longer part of the project. Next, the code queries *all* water samples in the database associated with the 42 stations that *are* in the current project, and then calculates the intersection of these two sets. This produces a list of pre-2015 samples to be transferred.\n",
300 | "\n",
301 | "The pre-2015 samples are associated with `SAMPLE_SELECTION_ID` 62."
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 7,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "name": "stdout",
311 | "output_type": "stream",
312 | "text": [
313 | "1566\n"
314 | ]
315 | },
316 | {
317 | "data": {
318 | "text/html": [
319 | "\n",
320 | "\n",
333 | "
\n",
334 | " \n",
335 | " \n",
336 | " | \n",
337 | " water_sample_id | \n",
338 | " sample_selection_id | \n",
339 | "
\n",
340 | " \n",
341 | " \n",
342 | " \n",
343 | " | 0 | \n",
344 | " 273550 | \n",
345 | " 66 | \n",
346 | "
\n",
347 | " \n",
348 | " | 1 | \n",
349 | " 273552 | \n",
350 | " 66 | \n",
351 | "
\n",
352 | " \n",
353 | " | 2 | \n",
354 | " 532542 | \n",
355 | " 66 | \n",
356 | "
\n",
357 | " \n",
358 | " | 3 | \n",
359 | " 532544 | \n",
360 | " 66 | \n",
361 | "
\n",
362 | " \n",
363 | " | 4 | \n",
364 | " 532545 | \n",
365 | " 66 | \n",
366 | "
\n",
367 | " \n",
368 | "
\n",
369 | "
"
370 | ],
371 | "text/plain": [
372 | " water_sample_id sample_selection_id\n",
373 | "0 273550 66\n",
374 | "1 273552 66\n",
375 | "2 532542 66\n",
376 | "3 532544 66\n",
377 | "4 532545 66"
378 | ]
379 | },
380 | "execution_count": 7,
381 | "metadata": {},
382 | "output_type": "execute_result"
383 | }
384 | ],
385 | "source": [
386 | "# Get WS IDs for pre-2015 stations\n",
387 | "sql = (\"SELECT water_sample_id FROM resa2.sample_selections \"\n",
388 | " \"WHERE sample_selection_id = 62\")\n",
389 | "pre2015_df = pd.read_sql(sql, eng)\n",
390 | "\n",
391 | "# Get all WS in db for stations in 2018 project\n",
392 | "bind_pars = ','.join('%d' % i for i in stn_df['station_id'])\n",
393 | "sql = (\"SELECT water_sample_id FROM resa2.water_samples \"\n",
394 | " \"WHERE station_id IN (%s)\" % bind_pars)\n",
395 | "all_df = pd.read_sql(sql, eng)\n",
396 | "\n",
397 | "# Compute intersection\n",
398 | "ws_ids = list(set(pre2015_df['water_sample_id']).intersection(set(all_df['water_sample_id'])))\n",
399 | "\n",
400 | "# Build df\n",
401 | "ws_df = pd.DataFrame({'water_sample_id':ws_ids})\n",
402 | "ws_df['sample_selection_id'] = 66\n",
403 | "print(len(ws_df))\n",
404 | "\n",
405 | "ws_df.head()"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 8,
411 | "metadata": {},
412 | "outputs": [],
413 | "source": [
414 | "## Write to db\n",
415 | "#ws_df.to_sql('sample_selections', \n",
416 | "# eng, \n",
417 | "# schema='RESA2', \n",
418 | "# if_exists='append', \n",
419 | "# index=False,\n",
420 | "# )"
421 | ]
422 | },
423 | {
424 | "cell_type": "markdown",
425 | "metadata": {},
426 | "source": [
427 | "## 5. Check whether all non-Faun data are already in RESA\n",
428 | "\n",
429 | "Liv Bente's Excel file here\n",
430 | "\n",
431 | " K:\\Prosjekter\\langtransporterte forurensninger\\O-190033-7 BIOLOK_rapp 2014-18\\Samlede prøver ulike kilder_til RESA_JES_nov19.xlsx\n",
432 | " \n",
433 | "inludes a worksheet named `Data fra AquaM`, which lists water chemistry data that should be taken from AM. However, it looks as though most of these samples are already available in RESA. I've created a tidied version of Liv Bente's AM samples list in the `lbs_am_samples` worksheet of `../project_overview_from_lbs.xlsx`. The code below loops over these to see whether they can be identified RESA instead.\n",
434 | "\n",
435 | "**Note:** Initially, around 30 water samples could not be identified. Most of these were due to missing links iin the `NIVADATABASE.DATASOURCE_STATION` table, which I have now added. In addition:\n",
436 | "\n",
437 | " * AM station 26199 (Storbørja) was incorrectly linked to RESA ID 3042 (Svartbørja)\n",
438 | " \n",
439 | " * One sample from Rondvatnet (AM ID 49204; RESA ID 12712) had the date incorrectly assigned in RESA: the date was given as 13.07.2017 in AM, but 13.06.2017 in RESA. I will assume the AM date is correct\n",
440 | " \n",
441 | "Having corrected the above issues, all samples in Liv Bente's `Data fra AquaM` worksheet can now be identified in RESA."
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": 9,
447 | "metadata": {},
448 | "outputs": [
449 | {
450 | "data": {
451 | "text/html": [
452 | "\n",
453 | "\n",
466 | "
\n",
467 | " \n",
468 | " \n",
469 | " | \n",
470 | " am_id | \n",
471 | " resa_code | \n",
472 | " am_name | \n",
473 | " resa_name | \n",
474 | " date | \n",
475 | " depth1 | \n",
476 | " depth2 | \n",
477 | "
\n",
478 | " \n",
479 | " \n",
480 | " \n",
481 | " | 0 | \n",
482 | " 8614 | \n",
483 | " 430-I-1 | \n",
484 | " Atnsjøen | \n",
485 | " Atnsjøen | \n",
486 | " 2018-06-10 00:00:00 | \n",
487 | " 0.5 | \n",
488 | " 0.5 | \n",
489 | "
\n",
490 | " \n",
491 | " | 1 | \n",
492 | " 8614 | \n",
493 | " 430-I-1 | \n",
494 | " Atnsjøen | \n",
495 | " Atnsjøen | \n",
496 | " 2018-08-09 00:00:00 | \n",
497 | " 0.5 | \n",
498 | " 0.5 | \n",
499 | "
\n",
500 | " \n",
501 | " | 2 | \n",
502 | " 8614 | \n",
503 | " 430-I-1 | \n",
504 | " Atnsjøen | \n",
505 | " Atnsjøen | \n",
506 | " 2018-09-04 00:00:00 | \n",
507 | " 0.5 | \n",
508 | " 0.5 | \n",
509 | "
\n",
510 | " \n",
511 | " | 3 | \n",
512 | " 26075 | \n",
513 | " 432-1-26 | \n",
514 | " Måsåbutjønna | \n",
515 | " Måsabutjørna | \n",
516 | " 2018-06-14 13:00:00 | \n",
517 | " 0.5 | \n",
518 | " 0.5 | \n",
519 | "
\n",
520 | " \n",
521 | " | 4 | \n",
522 | " 26075 | \n",
523 | " 432-1-26 | \n",
524 | " Måsåbutjønna | \n",
525 | " Måsabutjørna | \n",
526 | " 2018-08-28 00:00:00 | \n",
527 | " 0.5 | \n",
528 | " 0.5 | \n",
529 | "
\n",
530 | " \n",
531 | "
\n",
532 | "
"
533 | ],
534 | "text/plain": [
535 | " am_id resa_code am_name resa_name date depth1 \\\n",
536 | "0 8614 430-I-1 Atnsjøen Atnsjøen 2018-06-10 00:00:00 0.5 \n",
537 | "1 8614 430-I-1 Atnsjøen Atnsjøen 2018-08-09 00:00:00 0.5 \n",
538 | "2 8614 430-I-1 Atnsjøen Atnsjøen 2018-09-04 00:00:00 0.5 \n",
539 | "3 26075 432-1-26 Måsåbutjønna Måsabutjørna 2018-06-14 13:00:00 0.5 \n",
540 | "4 26075 432-1-26 Måsåbutjønna Måsabutjørna 2018-08-28 00:00:00 0.5 \n",
541 | "\n",
542 | " depth2 \n",
543 | "0 0.5 \n",
544 | "1 0.5 \n",
545 | "2 0.5 \n",
546 | "3 0.5 \n",
547 | "4 0.5 "
548 | ]
549 | },
550 | "execution_count": 9,
551 | "metadata": {},
552 | "output_type": "execute_result"
553 | }
554 | ],
555 | "source": [
556 | "# Read list of AM samples\n",
557 | "am_ws = pd.read_excel('../project_overview_from_lbs.xlsx',\n",
558 | " sheet_name='lbs_am_samples')\n",
559 | "am_ws.head()"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": 10,
565 | "metadata": {},
566 | "outputs": [
567 | {
568 | "data": {
569 | "text/html": [
570 | "\n",
571 | "\n",
584 | "
\n",
585 | " \n",
586 | " \n",
587 | " | \n",
588 | " am_id | \n",
589 | " resa_code | \n",
590 | " am_name | \n",
591 | " resa_name | \n",
592 | " date | \n",
593 | " depth1 | \n",
594 | " depth2 | \n",
595 | " resa_ws_id | \n",
596 | "
\n",
597 | " \n",
598 | " \n",
599 | " \n",
600 | " | 0 | \n",
601 | " 8614 | \n",
602 | " 430-I-1 | \n",
603 | " Atnsjøen | \n",
604 | " Atnsjøen | \n",
605 | " 2018-06-10 00:00:00 | \n",
606 | " 0.5 | \n",
607 | " 0.5 | \n",
608 | " 661404 | \n",
609 | "
\n",
610 | " \n",
611 | " | 1 | \n",
612 | " 8614 | \n",
613 | " 430-I-1 | \n",
614 | " Atnsjøen | \n",
615 | " Atnsjøen | \n",
616 | " 2018-08-09 00:00:00 | \n",
617 | " 0.5 | \n",
618 | " 0.5 | \n",
619 | " 661558 | \n",
620 | "
\n",
621 | " \n",
622 | " | 2 | \n",
623 | " 8614 | \n",
624 | " 430-I-1 | \n",
625 | " Atnsjøen | \n",
626 | " Atnsjøen | \n",
627 | " 2018-09-04 00:00:00 | \n",
628 | " 0.5 | \n",
629 | " 0.5 | \n",
630 | " 661844 | \n",
631 | "
\n",
632 | " \n",
633 | " | 3 | \n",
634 | " 26075 | \n",
635 | " 432-1-26 | \n",
636 | " Måsåbutjønna | \n",
637 | " Måsabutjørna | \n",
638 | " 2018-06-14 13:00:00 | \n",
639 | " 0.5 | \n",
640 | " 0.5 | \n",
641 | " 661408 | \n",
642 | "
\n",
643 | " \n",
644 | " | 4 | \n",
645 | " 26075 | \n",
646 | " 432-1-26 | \n",
647 | " Måsåbutjønna | \n",
648 | " Måsabutjørna | \n",
649 | " 2018-08-28 00:00:00 | \n",
650 | " 0.5 | \n",
651 | " 0.5 | \n",
652 | " 661679 | \n",
653 | "
\n",
654 | " \n",
655 | "
\n",
656 | "
"
657 | ],
658 | "text/plain": [
659 | " am_id resa_code am_name resa_name date depth1 \\\n",
660 | "0 8614 430-I-1 Atnsjøen Atnsjøen 2018-06-10 00:00:00 0.5 \n",
661 | "1 8614 430-I-1 Atnsjøen Atnsjøen 2018-08-09 00:00:00 0.5 \n",
662 | "2 8614 430-I-1 Atnsjøen Atnsjøen 2018-09-04 00:00:00 0.5 \n",
663 | "3 26075 432-1-26 Måsåbutjønna Måsabutjørna 2018-06-14 13:00:00 0.5 \n",
664 | "4 26075 432-1-26 Måsåbutjønna Måsabutjørna 2018-08-28 00:00:00 0.5 \n",
665 | "\n",
666 | " depth2 resa_ws_id \n",
667 | "0 0.5 661404 \n",
668 | "1 0.5 661558 \n",
669 | "2 0.5 661844 \n",
670 | "3 0.5 661408 \n",
671 | "4 0.5 661679 "
672 | ]
673 | },
674 | "execution_count": 10,
675 | "metadata": {},
676 | "output_type": "execute_result"
677 | }
678 | ],
679 | "source": [
680 | "# Loop over data\n",
681 | "ws_list = []\n",
682 | "for idx, row in am_ws.iterrows():\n",
683 | " # Get station ID\n",
684 | " stn_code = row['resa_code']\n",
685 | " stn_id = stn_df.query('station_code == @stn_code')['station_id'].iloc[0]\n",
686 | " \n",
687 | " # Query RESA db\n",
688 | " par_dict = {'stn_id':stn_id.item(),\n",
689 | " 'sample_date': row['date'].date(),\n",
690 | " 'depth1':row['depth1'],\n",
691 | " 'depth2':row['depth2'],\n",
692 | " }\n",
693 | " \n",
694 | " sql = (\"SELECT * FROM resa2.water_samples \"\n",
695 | " \"WHERE station_id = :stn_id \"\n",
696 | " \"AND TRUNC(sample_date) = :sample_date \"\n",
697 | " \"AND depth1 = :depth1 \"\n",
698 | " \"AND depth2 = :depth2\")\n",
699 | " \n",
700 | " res = pd.read_sql(sql, con=eng, params=par_dict)\n",
701 | " \n",
702 | " if len(res) == 1:\n",
703 | " ws_list.append(res['water_sample_id'].iloc[0])\n",
704 | " elif len(res) == 0:\n",
705 | " ws_list.append(np.nan)\n",
706 | " else:\n",
707 | " print(stn_code, row['date'].date(), len(res))\n",
708 | " ws_list.append(-1)\n",
709 | "\n",
710 | "am_ws['resa_ws_id'] = ws_list\n",
711 | "am_ws.head()"
712 | ]
713 | },
714 | {
715 | "cell_type": "code",
716 | "execution_count": 11,
717 | "metadata": {},
718 | "outputs": [
719 | {
720 | "data": {
721 | "text/plain": [
722 | "am_id 85\n",
723 | "resa_code 85\n",
724 | "am_name 85\n",
725 | "resa_name 85\n",
726 | "date 85\n",
727 | "depth1 85\n",
728 | "depth2 85\n",
729 | "resa_ws_id 85\n",
730 | "dtype: int64"
731 | ]
732 | },
733 | "execution_count": 11,
734 | "metadata": {},
735 | "output_type": "execute_result"
736 | }
737 | ],
738 | "source": [
739 | "am_ws.count()"
740 | ]
741 | },
742 | {
743 | "cell_type": "code",
744 | "execution_count": 12,
745 | "metadata": {},
746 | "outputs": [
747 | {
748 | "data": {
749 | "text/html": [
750 | "\n",
751 | "\n",
764 | "
\n",
765 | " \n",
766 | " \n",
767 | " | \n",
768 | " am_id | \n",
769 | " resa_code | \n",
770 | " am_name | \n",
771 | " resa_name | \n",
772 | " date | \n",
773 | " depth1 | \n",
774 | " depth2 | \n",
775 | " resa_ws_id | \n",
776 | "
\n",
777 | " \n",
778 | " \n",
779 | " \n",
780 | "
\n",
781 | "
"
782 | ],
783 | "text/plain": [
784 | "Empty DataFrame\n",
785 | "Columns: [am_id, resa_code, am_name, resa_name, date, depth1, depth2, resa_ws_id]\n",
786 | "Index: []"
787 | ]
788 | },
789 | "execution_count": 12,
790 | "metadata": {},
791 | "output_type": "execute_result"
792 | }
793 | ],
794 | "source": [
795 | "not_found_df = am_ws[pd.isna(am_ws['resa_ws_id'])]\n",
796 | "not_found_df"
797 | ]
798 | },
799 | {
800 | "cell_type": "code",
801 | "execution_count": 13,
802 | "metadata": {},
803 | "outputs": [
804 | {
805 | "name": "stdout",
806 | "output_type": "stream",
807 | "text": [
808 | "[]\n"
809 | ]
810 | }
811 | ],
812 | "source": [
813 | "am_list = list(not_found_df['am_id'].unique().astype(str))\n",
814 | "print(am_list)\n",
815 | "if len(am_list) > 0:\n",
816 | " am_ids = ','.join(am_list)\n",
817 | " sql = text(f\"SELECT * \"\n",
818 | " f\"FROM nivadatabase.datasource_station \"\n",
819 | " f\"WHERE station_id IN ({am_ids})\")\n",
820 | " pd.read_sql(sql, eng)"
821 | ]
822 | },
823 | {
824 | "cell_type": "markdown",
825 | "metadata": {},
826 | "source": [
827 | "## 6. Add post-2014 water samples (non-FAUN)\n",
828 | "\n",
829 | "**Note:** This code has not yet been run fully. Once the issues with Storbørja and Svartbørja have been solved, it can be run again.\n",
830 | "\n",
831 | "All of the non-FAUN samples from 2015 to 2018 should now be available in RESA (i.e. everything from Liv Bente's `Data fra RESA 2015-2018 mm` and `Data fra AquaM` worksheets). All these samples are listed in tidied form in the `am_resa_2015-2018` worksheet of `../project_overview_from_lbs.xlsx`. The code below identifies RESA sample IDs for each sample and checks that all samples can be found. They are then associated with `SAMPLE_SELECTION_ID` 66 in RESA."
832 | ]
833 | },
834 | {
835 | "cell_type": "code",
836 | "execution_count": 14,
837 | "metadata": {},
838 | "outputs": [
839 | {
840 | "data": {
841 | "text/html": [
842 | "\n",
843 | "\n",
856 | "
\n",
857 | " \n",
858 | " \n",
859 | " | \n",
860 | " station_id | \n",
861 | " date | \n",
862 | " depth1 | \n",
863 | " depth2 | \n",
864 | "
\n",
865 | " \n",
866 | " \n",
867 | " \n",
868 | " | 0 | \n",
869 | " 16131 | \n",
870 | " 2015-06-11 08:00:00 | \n",
871 | " 0.0 | \n",
872 | " 0.0 | \n",
873 | "
\n",
874 | " \n",
875 | " | 1 | \n",
876 | " 16131 | \n",
877 | " 2015-08-04 08:30:00 | \n",
878 | " 0.0 | \n",
879 | " 0.0 | \n",
880 | "
\n",
881 | " \n",
882 | " | 2 | \n",
883 | " 16131 | \n",
884 | " 2015-08-30 08:30:00 | \n",
885 | " 0.0 | \n",
886 | " 0.0 | \n",
887 | "
\n",
888 | " \n",
889 | " | 3 | \n",
890 | " 16131 | \n",
891 | " 2015-10-04 09:00:00 | \n",
892 | " 0.0 | \n",
893 | " 0.0 | \n",
894 | "
\n",
895 | " \n",
896 | " | 4 | \n",
897 | " 16131 | \n",
898 | " 2016-06-08 00:00:00 | \n",
899 | " 0.0 | \n",
900 | " 10.0 | \n",
901 | "
\n",
902 | " \n",
903 | "
\n",
904 | "
"
905 | ],
906 | "text/plain": [
907 | " station_id date depth1 depth2\n",
908 | "0 16131 2015-06-11 08:00:00 0.0 0.0\n",
909 | "1 16131 2015-08-04 08:30:00 0.0 0.0\n",
910 | "2 16131 2015-08-30 08:30:00 0.0 0.0\n",
911 | "3 16131 2015-10-04 09:00:00 0.0 0.0\n",
912 | "4 16131 2016-06-08 00:00:00 0.0 10.0"
913 | ]
914 | },
915 | "execution_count": 14,
916 | "metadata": {},
917 | "output_type": "execute_result"
918 | }
919 | ],
920 | "source": [
921 | "# Read list of post-2014 non-FAUN samples\n",
922 | "samp_df = pd.read_excel('../project_overview_from_lbs.xlsx',\n",
923 | " sheet_name='am_resa_2015-2018')\n",
924 | "samp_df.head()"
925 | ]
926 | },
927 | {
928 | "cell_type": "code",
929 | "execution_count": 15,
930 | "metadata": {},
931 | "outputs": [
932 | {
933 | "data": {
934 | "text/html": [
935 | "\n",
936 | "\n",
949 | "
\n",
950 | " \n",
951 | " \n",
952 | " | \n",
953 | " station_id | \n",
954 | " date | \n",
955 | " depth1 | \n",
956 | " depth2 | \n",
957 | " water_sample_id | \n",
958 | "
\n",
959 | " \n",
960 | " \n",
961 | " \n",
962 | " | 0 | \n",
963 | " 16131 | \n",
964 | " 2015-06-11 08:00:00 | \n",
965 | " 0.0 | \n",
966 | " 0.0 | \n",
967 | " 638920 | \n",
968 | "
\n",
969 | " \n",
970 | " | 1 | \n",
971 | " 16131 | \n",
972 | " 2015-08-04 08:30:00 | \n",
973 | " 0.0 | \n",
974 | " 0.0 | \n",
975 | " 642959 | \n",
976 | "
\n",
977 | " \n",
978 | " | 2 | \n",
979 | " 16131 | \n",
980 | " 2015-08-30 08:30:00 | \n",
981 | " 0.0 | \n",
982 | " 0.0 | \n",
983 | " 640953 | \n",
984 | "
\n",
985 | " \n",
986 | " | 3 | \n",
987 | " 16131 | \n",
988 | " 2015-10-04 09:00:00 | \n",
989 | " 0.0 | \n",
990 | " 0.0 | \n",
991 | " 642399 | \n",
992 | "
\n",
993 | " \n",
994 | " | 4 | \n",
995 | " 16131 | \n",
996 | " 2016-06-08 00:00:00 | \n",
997 | " 0.0 | \n",
998 | " 10.0 | \n",
999 | " 652766 | \n",
1000 | "
\n",
1001 | " \n",
1002 | "
\n",
1003 | "
"
1004 | ],
1005 | "text/plain": [
1006 | " station_id date depth1 depth2 water_sample_id\n",
1007 | "0 16131 2015-06-11 08:00:00 0.0 0.0 638920\n",
1008 | "1 16131 2015-08-04 08:30:00 0.0 0.0 642959\n",
1009 | "2 16131 2015-08-30 08:30:00 0.0 0.0 640953\n",
1010 | "3 16131 2015-10-04 09:00:00 0.0 0.0 642399\n",
1011 | "4 16131 2016-06-08 00:00:00 0.0 10.0 652766"
1012 | ]
1013 | },
1014 | "execution_count": 15,
1015 | "metadata": {},
1016 | "output_type": "execute_result"
1017 | }
1018 | ],
1019 | "source": [
1020 | "# Loop over data\n",
1021 | "ws_list = []\n",
1022 | "for idx, row in samp_df.iterrows(): \n",
1023 | " # Query RESA db\n",
1024 | " par_dict = {'stn_id':row['station_id'],\n",
1025 | " 'sample_date': row['date'].date(),\n",
1026 | " 'depth1':row['depth1'],\n",
1027 | " 'depth2':row['depth2'],\n",
1028 | " }\n",
1029 | " \n",
1030 | " sql = (\"SELECT * FROM resa2.water_samples \"\n",
1031 | " \"WHERE station_id = :stn_id \"\n",
1032 | " \"AND TRUNC(sample_date) = :sample_date \"\n",
1033 | " \"AND depth1 = :depth1 \"\n",
1034 | " \"AND depth2 = :depth2\")\n",
1035 | " \n",
1036 | " res = pd.read_sql(sql, con=eng, params=par_dict)\n",
1037 | " \n",
1038 | " if len(res) == 1:\n",
1039 | " ws_list.append(res['water_sample_id'].iloc[0])\n",
1040 | " elif len(res) == 0:\n",
1041 | " ws_list.append(np.nan)\n",
1042 | " else:\n",
1043 | " print(stn_code, row['date'].date(), len(res))\n",
1044 | " ws_list.append(-1)\n",
1045 | "\n",
1046 | "samp_df['water_sample_id'] = ws_list\n",
1047 | "samp_df.head()"
1048 | ]
1049 | },
1050 | {
1051 | "cell_type": "code",
1052 | "execution_count": 16,
1053 | "metadata": {},
1054 | "outputs": [
1055 | {
1056 | "data": {
1057 | "text/plain": [
1058 | "station_id 346\n",
1059 | "date 346\n",
1060 | "depth1 346\n",
1061 | "depth2 346\n",
1062 | "water_sample_id 346\n",
1063 | "dtype: int64"
1064 | ]
1065 | },
1066 | "execution_count": 16,
1067 | "metadata": {},
1068 | "output_type": "execute_result"
1069 | }
1070 | ],
1071 | "source": [
1072 | "samp_df.count()"
1073 | ]
1074 | },
1075 | {
1076 | "cell_type": "code",
1077 | "execution_count": 17,
1078 | "metadata": {},
1079 | "outputs": [
1080 | {
1081 | "data": {
1082 | "text/html": [
1083 | "\n",
1084 | "\n",
1097 | "
\n",
1098 | " \n",
1099 | " \n",
1100 | " | \n",
1101 | " station_id | \n",
1102 | " date | \n",
1103 | " depth1 | \n",
1104 | " depth2 | \n",
1105 | " water_sample_id | \n",
1106 | "
\n",
1107 | " \n",
1108 | " \n",
1109 | " \n",
1110 | "
\n",
1111 | "
"
1112 | ],
1113 | "text/plain": [
1114 | "Empty DataFrame\n",
1115 | "Columns: [station_id, date, depth1, depth2, water_sample_id]\n",
1116 | "Index: []"
1117 | ]
1118 | },
1119 | "execution_count": 17,
1120 | "metadata": {},
1121 | "output_type": "execute_result"
1122 | }
1123 | ],
1124 | "source": [
1125 | "not_found_df = samp_df[pd.isna(samp_df['water_sample_id'])]\n",
1126 | "not_found_df"
1127 | ]
1128 | },
1129 | {
1130 | "cell_type": "code",
1131 | "execution_count": 18,
1132 | "metadata": {},
1133 | "outputs": [],
1134 | "source": [
1135 | "## Add to sample selection\n",
1136 | "#samp_df['sample_selection_id'] = 66\n",
1137 | "#samp_df = samp_df[['water_sample_id', 'sample_selection_id']]\n",
1138 | "#\n",
1139 | "## Write to db\n",
1140 | "#samp_df.to_sql('sample_selections', \n",
1141 | "# eng, \n",
1142 | "# schema='RESA2', \n",
1143 | "# if_exists='append', \n",
1144 | "# index=False,\n",
1145 | "# )"
1146 | ]
1147 | },
1148 | {
1149 | "cell_type": "markdown",
1150 | "metadata": {},
1151 | "source": [
1152 | "## 7. Data from Faun\n",
1153 | "\n",
1154 | "The worksheet `faun` of `../project_overview_from_lbs.xlsx` list all the FAUN samples in Liv Bente's spreadsheet. I have also created a list of new methods with `LABORATORY=FAUN` in the worksheet `faun_methods`, which have been added to the database."
1155 | ]
1156 | },
1157 | {
1158 | "cell_type": "markdown",
1159 | "metadata": {},
1160 | "source": [
1161 | "### 7.1. Link new method to parameters"
1162 | ]
1163 | },
1164 | {
1165 | "cell_type": "code",
1166 | "execution_count": 19,
1167 | "metadata": {},
1168 | "outputs": [],
1169 | "source": [
1170 | "# Read methods from Excel\n",
1171 | "meth_df = pd.read_excel('../project_overview_from_lbs.xlsx',\n",
1172 | " sheet_name='faun_methods')\n",
1173 | "\n",
1174 | "# Get new method IDs from database\n",
1175 | "sql = text(\"SELECT wc_method_id, name AS method_name \"\n",
1176 | " \"FROM resa2.wc_method_definitions \"\n",
1177 | " \"WHERE laboratory = 'FAUN'\")\n",
1178 | "meth_id_df = pd.read_sql(sql, eng)\n",
1179 | "\n",
1180 | "# Join\n",
1181 | "meth_df = pd.merge(meth_df, meth_id_df, how='left', on='method_name')\n",
1182 | "\n",
1183 | "# Add corr_fac = 1\n",
1184 | "meth_df['conversion_factor'] = 1\n",
1185 | "\n",
1186 | "# Add to db\n",
1187 | "par_meth_df = meth_df[['wc_parameter_id', 'wc_method_id', 'conversion_factor']]"
1188 | ]
1189 | },
1190 | {
1191 | "cell_type": "code",
1192 | "execution_count": 20,
1193 | "metadata": {},
1194 | "outputs": [],
1195 | "source": [
1196 | "#par_meth_df.to_sql('wc_parameters_methods', \n",
1197 | "# eng, \n",
1198 | "# schema='RESA2', \n",
1199 | "# if_exists='append', \n",
1200 | "# index=False,\n",
1201 | "# )"
1202 | ]
1203 | },
1204 | {
1205 | "cell_type": "markdown",
1206 | "metadata": {},
1207 | "source": [
1208 | "### 7.2. Add water samples"
1209 | ]
1210 | },
1211 | {
1212 | "cell_type": "code",
1213 | "execution_count": 21,
1214 | "metadata": {},
1215 | "outputs": [],
1216 | "source": [
1217 | "# Read FAUN data\n",
1218 | "faun_df = pd.read_excel('../project_overview_from_lbs.xlsx',\n",
1219 | " sheet_name='faun')\n",
1220 | "\n",
1221 | "# Add water samples\n",
1222 | "ws_df = faun_df[['station_id', 'sample_date', 'depth1', 'depth2']]"
1223 | ]
1224 | },
1225 | {
1226 | "cell_type": "code",
1227 | "execution_count": 22,
1228 | "metadata": {},
1229 | "outputs": [],
1230 | "source": [
1231 | "#ws_df.to_sql('water_samples', \n",
1232 | "# eng, \n",
1233 | "# schema='RESA2', \n",
1234 | "# if_exists='append', \n",
1235 | "# index=False,\n",
1236 | "# )"
1237 | ]
1238 | },
1239 | {
1240 | "cell_type": "markdown",
1241 | "metadata": {},
1242 | "source": [
1243 | "### 7.3. Add water chemistry"
1244 | ]
1245 | },
1246 | {
1247 | "cell_type": "code",
1248 | "execution_count": 23,
1249 | "metadata": {},
1250 | "outputs": [],
1251 | "source": [
1252 | "def f(row):\n",
1253 | " \"\"\" Function to deal with flags.\n",
1254 | " \"\"\"\n",
1255 | " if '<' in row['value']:\n",
1256 | " val = '<'\n",
1257 | " elif '>' in row['value']:\n",
1258 | " val = '>'\n",
1259 | " else:\n",
1260 | " val = np.nan\n",
1261 | " return val"
1262 | ]
1263 | },
1264 | {
1265 | "cell_type": "code",
1266 | "execution_count": 24,
1267 | "metadata": {},
1268 | "outputs": [
1269 | {
1270 | "data": {
1271 | "text/html": [
1272 | "\n",
1273 | "\n",
1286 | "
\n",
1287 | " \n",
1288 | " \n",
1289 | " | \n",
1290 | " sample_id | \n",
1291 | " method_id | \n",
1292 | " value | \n",
1293 | " flag1 | \n",
1294 | " approved | \n",
1295 | "
\n",
1296 | " \n",
1297 | " \n",
1298 | " \n",
1299 | " | 0 | \n",
1300 | " 872225 | \n",
1301 | " 10959 | \n",
1302 | " 4.9 | \n",
1303 | " NaN | \n",
1304 | " YES | \n",
1305 | "
\n",
1306 | " \n",
1307 | " | 1 | \n",
1308 | " 872226 | \n",
1309 | " 10959 | \n",
1310 | " 5.4 | \n",
1311 | " NaN | \n",
1312 | " YES | \n",
1313 | "
\n",
1314 | " \n",
1315 | " | 2 | \n",
1316 | " 872227 | \n",
1317 | " 10959 | \n",
1318 | " 5.0 | \n",
1319 | " NaN | \n",
1320 | " YES | \n",
1321 | "
\n",
1322 | " \n",
1323 | " | 3 | \n",
1324 | " 872245 | \n",
1325 | " 10959 | \n",
1326 | " 5.2 | \n",
1327 | " NaN | \n",
1328 | " YES | \n",
1329 | "
\n",
1330 | " \n",
1331 | " | 4 | \n",
1332 | " 872246 | \n",
1333 | " 10959 | \n",
1334 | " 5.4 | \n",
1335 | " NaN | \n",
1336 | " YES | \n",
1337 | "
\n",
1338 | " \n",
1339 | "
\n",
1340 | "
"
1341 | ],
1342 | "text/plain": [
1343 | " sample_id method_id value flag1 approved\n",
1344 | "0 872225 10959 4.9 NaN YES\n",
1345 | "1 872226 10959 5.4 NaN YES\n",
1346 | "2 872227 10959 5.0 NaN YES\n",
1347 | "3 872245 10959 5.2 NaN YES\n",
1348 | "4 872246 10959 5.4 NaN YES"
1349 | ]
1350 | },
1351 | "execution_count": 24,
1352 | "metadata": {},
1353 | "output_type": "execute_result"
1354 | }
1355 | ],
1356 | "source": [
1357 | "# Get ws_ids\n",
1358 | "stn_list = list(ws_df['station_id'].unique().astype(str))\n",
1359 | "stn_txt = ','.join(stn_list)\n",
1360 | "\n",
1361 | "sql = text(\"SELECT water_sample_id, station_id, sample_date, depth1, depth2 \"\n",
1362 | " \"FROM resa2.water_samples \"\n",
1363 | " \"WHERE station_id in (%s)\" % stn_txt)\n",
1364 | "\n",
1365 | "samp_df = pd.read_sql(sql, eng)\n",
1366 | "\n",
1367 | "# Link to faun data\n",
1368 | "df = pd.merge(faun_df, \n",
1369 | " samp_df, \n",
1370 | " how='left', \n",
1371 | " on=['station_id', 'sample_date', 'depth1', 'depth2'],\n",
1372 | " )\n",
1373 | "\n",
1374 | "df.drop(['station_id', 'station_code', 'sample_date', 'depth1', 'depth2'], \n",
1375 | " axis=1, \n",
1376 | " inplace=True)\n",
1377 | "\n",
1378 | "# Convert to long format\n",
1379 | "df = df.melt(id_vars='water_sample_id', \n",
1380 | " var_name='method_name').dropna()\n",
1381 | "\n",
1382 | "# Join method IDs\n",
1383 | "df = pd.merge(df, \n",
1384 | " meth_df[['method_name', 'wc_method_id']],\n",
1385 | " how='left',\n",
1386 | " on='method_name',\n",
1387 | " )\n",
1388 | "\n",
1389 | "# Deal with flags\n",
1390 | "df['value'] = df['value'].astype(str)\n",
1391 | "df['flag1'] = df.apply(f, axis=1)\n",
1392 | "\n",
1393 | "# Extract numeric chars\n",
1394 | "df['value'] = df['value'].str.extract(\"([-+]?\\d*\\.\\d+|\\d+)\", expand=True)\n",
1395 | "df['value'] = df['value'].astype(float)\n",
1396 | "\n",
1397 | "# Reorder\n",
1398 | "df = df[['water_sample_id', 'wc_method_id', 'value', 'flag1']]\n",
1399 | "df.columns = ['sample_id', 'method_id', 'value', 'flag1']\n",
1400 | "df['approved'] = 'YES'\n",
1401 | "\n",
1402 | "df.head()"
1403 | ]
1404 | },
1405 | {
1406 | "cell_type": "code",
1407 | "execution_count": 25,
1408 | "metadata": {},
1409 | "outputs": [],
1410 | "source": [
1411 | "## Add to db\n",
1412 | "#df.to_sql('water_chemistry_values2', \n",
1413 | "# eng, \n",
1414 | "# schema='RESA2', \n",
1415 | "# if_exists='append', \n",
1416 | "# index=False,\n",
1417 | "# )"
1418 | ]
1419 | },
1420 | {
1421 | "cell_type": "markdown",
1422 | "metadata": {},
1423 | "source": [
1424 | "### 7.4. Add to sample selection"
1425 | ]
1426 | },
1427 | {
1428 | "cell_type": "code",
1429 | "execution_count": 26,
1430 | "metadata": {},
1431 | "outputs": [],
1432 | "source": [
1433 | "# Build df\n",
1434 | "ws_df = pd.DataFrame({'water_sample_id':df['sample_id'].unique()})\n",
1435 | "ws_df['sample_selection_id'] = 66\n",
1436 | "\n",
1437 | "## Write to db\n",
1438 | "#ws_df.to_sql('sample_selections', \n",
1439 | "# eng, \n",
1440 | "# schema='RESA2', \n",
1441 | "# if_exists='append', \n",
1442 | "# index=False,\n",
1443 | "# )"
1444 | ]
1445 | },
1446 | {
1447 | "cell_type": "markdown",
1448 | "metadata": {},
1449 | "source": [
1450 | "## 8. Additions from Liv Bente\n",
1451 | "\n",
1452 | "After some further checking, Liv Bente has identified some additional samples that should be included (see e-mail received 20.08.2020 at 16:30). These samples are listed in \n",
1453 | "\n",
1454 | " biolok\\water_samples_to_add_2020-08-21.xlsx"
1455 | ]
1456 | },
1457 | {
1458 | "cell_type": "code",
1459 | "execution_count": 5,
1460 | "metadata": {},
1461 | "outputs": [
1462 | {
1463 | "data": {
1464 | "text/html": [
1465 | "\n",
1466 | "\n",
1479 | "
\n",
1480 | " \n",
1481 | " \n",
1482 | " | \n",
1483 | " station_id | \n",
1484 | " station_code | \n",
1485 | " station_name | \n",
1486 | " date | \n",
1487 | " depth1 | \n",
1488 | " depth2 | \n",
1489 | "
\n",
1490 | " \n",
1491 | " \n",
1492 | " \n",
1493 | " | 0 | \n",
1494 | " 195 | \n",
1495 | " 1502-602 | \n",
1496 | " Lunddalsvatnet | \n",
1497 | " 2015-05-26 | \n",
1498 | " 0 | \n",
1499 | " 12.5 | \n",
1500 | "
\n",
1501 | " \n",
1502 | " | 1 | \n",
1503 | " 195 | \n",
1504 | " 1502-602 | \n",
1505 | " Lunddalsvatnet | \n",
1506 | " 2015-06-30 | \n",
1507 | " 0 | \n",
1508 | " 15.0 | \n",
1509 | "
\n",
1510 | " \n",
1511 | " | 2 | \n",
1512 | " 195 | \n",
1513 | " 1502-602 | \n",
1514 | " Lunddalsvatnet | \n",
1515 | " 2015-07-27 | \n",
1516 | " 0 | \n",
1517 | " 0.0 | \n",
1518 | "
\n",
1519 | " \n",
1520 | " | 3 | \n",
1521 | " 195 | \n",
1522 | " 1502-602 | \n",
1523 | " Lunddalsvatnet | \n",
1524 | " 2015-08-19 | \n",
1525 | " 0 | \n",
1526 | " 12.0 | \n",
1527 | "
\n",
1528 | " \n",
1529 | " | 4 | \n",
1530 | " 195 | \n",
1531 | " 1502-602 | \n",
1532 | " Lunddalsvatnet | \n",
1533 | " 2015-09-21 | \n",
1534 | " 0 | \n",
1535 | " 14.0 | \n",
1536 | "
\n",
1537 | " \n",
1538 | "
\n",
1539 | "
"
1540 | ],
1541 | "text/plain": [
1542 | " station_id station_code station_name date depth1 depth2\n",
1543 | "0 195 1502-602 Lunddalsvatnet 2015-05-26 0 12.5\n",
1544 | "1 195 1502-602 Lunddalsvatnet 2015-06-30 0 15.0\n",
1545 | "2 195 1502-602 Lunddalsvatnet 2015-07-27 0 0.0\n",
1546 | "3 195 1502-602 Lunddalsvatnet 2015-08-19 0 12.0\n",
1547 | "4 195 1502-602 Lunddalsvatnet 2015-09-21 0 14.0"
1548 | ]
1549 | },
1550 | "execution_count": 5,
1551 | "metadata": {},
1552 | "output_type": "execute_result"
1553 | }
1554 | ],
1555 | "source": [
1556 | "# Read list of post-2014 non-FAUN samples\n",
1557 | "samp_df = pd.read_excel('../water_samples_to_add_2020-08-21.xlsx',\n",
1558 | " sheet_name='to_add')\n",
1559 | "samp_df.head()"
1560 | ]
1561 | },
1562 | {
1563 | "cell_type": "code",
1564 | "execution_count": 8,
1565 | "metadata": {},
1566 | "outputs": [
1567 | {
1568 | "data": {
1569 | "text/html": [
1570 | "\n",
1571 | "\n",
1584 | "
\n",
1585 | " \n",
1586 | " \n",
1587 | " | \n",
1588 | " station_id | \n",
1589 | " station_code | \n",
1590 | " station_name | \n",
1591 | " date | \n",
1592 | " depth1 | \n",
1593 | " depth2 | \n",
1594 | " water_sample_id | \n",
1595 | "
\n",
1596 | " \n",
1597 | " \n",
1598 | " \n",
1599 | " | 0 | \n",
1600 | " 195 | \n",
1601 | " 1502-602 | \n",
1602 | " Lunddalsvatnet | \n",
1603 | " 2015-05-26 | \n",
1604 | " 0 | \n",
1605 | " 12.5 | \n",
1606 | " 872164 | \n",
1607 | "
\n",
1608 | " \n",
1609 | " | 1 | \n",
1610 | " 195 | \n",
1611 | " 1502-602 | \n",
1612 | " Lunddalsvatnet | \n",
1613 | " 2015-06-30 | \n",
1614 | " 0 | \n",
1615 | " 15.0 | \n",
1616 | " 872160 | \n",
1617 | "
\n",
1618 | " \n",
1619 | " | 2 | \n",
1620 | " 195 | \n",
1621 | " 1502-602 | \n",
1622 | " Lunddalsvatnet | \n",
1623 | " 2015-07-27 | \n",
1624 | " 0 | \n",
1625 | " 0.0 | \n",
1626 | " 872166 | \n",
1627 | "
\n",
1628 | " \n",
1629 | " | 3 | \n",
1630 | " 195 | \n",
1631 | " 1502-602 | \n",
1632 | " Lunddalsvatnet | \n",
1633 | " 2015-08-19 | \n",
1634 | " 0 | \n",
1635 | " 12.0 | \n",
1636 | " 872158 | \n",
1637 | "
\n",
1638 | " \n",
1639 | " | 4 | \n",
1640 | " 195 | \n",
1641 | " 1502-602 | \n",
1642 | " Lunddalsvatnet | \n",
1643 | " 2015-09-21 | \n",
1644 | " 0 | \n",
1645 | " 14.0 | \n",
1646 | " 872156 | \n",
1647 | "
\n",
1648 | " \n",
1649 | "
\n",
1650 | "
"
1651 | ],
1652 | "text/plain": [
1653 | " station_id station_code station_name date depth1 depth2 \\\n",
1654 | "0 195 1502-602 Lunddalsvatnet 2015-05-26 0 12.5 \n",
1655 | "1 195 1502-602 Lunddalsvatnet 2015-06-30 0 15.0 \n",
1656 | "2 195 1502-602 Lunddalsvatnet 2015-07-27 0 0.0 \n",
1657 | "3 195 1502-602 Lunddalsvatnet 2015-08-19 0 12.0 \n",
1658 | "4 195 1502-602 Lunddalsvatnet 2015-09-21 0 14.0 \n",
1659 | "\n",
1660 | " water_sample_id \n",
1661 | "0 872164 \n",
1662 | "1 872160 \n",
1663 | "2 872166 \n",
1664 | "3 872158 \n",
1665 | "4 872156 "
1666 | ]
1667 | },
1668 | "execution_count": 8,
1669 | "metadata": {},
1670 | "output_type": "execute_result"
1671 | }
1672 | ],
1673 | "source": [
1674 | "# Loop over data\n",
1675 | "ws_list = []\n",
1676 | "for idx, row in samp_df.iterrows(): \n",
1677 | " # Query RESA db\n",
1678 | " par_dict = {'stn_id':row['station_id'],\n",
1679 | " 'sample_date': row['date'].date(),\n",
1680 | " 'depth1':row['depth1'],\n",
1681 | " 'depth2':row['depth2'],\n",
1682 | " }\n",
1683 | " \n",
1684 | " sql = (\"SELECT * FROM resa2.water_samples \"\n",
1685 | " \"WHERE station_id = :stn_id \"\n",
1686 | " \"AND TRUNC(sample_date) = :sample_date \"\n",
1687 | " \"AND depth1 = :depth1 \"\n",
1688 | " \"AND depth2 = :depth2\")\n",
1689 | " \n",
1690 | " res = pd.read_sql(sql, con=eng, params=par_dict)\n",
1691 | " \n",
1692 | " if len(res) == 1:\n",
1693 | " ws_list.append(res['water_sample_id'].iloc[0])\n",
1694 | " elif len(res) == 0:\n",
1695 | " print(stn_code, row['date'].date(), len(res))\n",
1696 | " ws_list.append(np.nan)\n",
1697 | " else:\n",
1698 | " print(stn_code, row['date'].date(), len(res))\n",
1699 | " ws_list.append(-1)\n",
1700 | "\n",
1701 | "samp_df['water_sample_id'] = ws_list\n",
1702 | "samp_df.head()"
1703 | ]
1704 | },
1705 | {
1706 | "cell_type": "code",
1707 | "execution_count": 9,
1708 | "metadata": {},
1709 | "outputs": [],
1710 | "source": [
1711 | "## Add to sample selection\n",
1712 | "#samp_df['sample_selection_id'] = 66\n",
1713 | "#samp_df = samp_df[['water_sample_id', 'sample_selection_id']]\n",
1714 | "#\n",
1715 | "## Write to db\n",
1716 | "#samp_df.to_sql('sample_selections', \n",
1717 | "# eng, \n",
1718 | "# schema='RESA2', \n",
1719 | "# if_exists='append', \n",
1720 | "# index=False,\n",
1721 | "# )"
1722 | ]
1723 | }
1724 | ],
1725 | "metadata": {
1726 | "kernelspec": {
1727 | "display_name": "Python 3",
1728 | "language": "python",
1729 | "name": "python3"
1730 | },
1731 | "language_info": {
1732 | "codemirror_mode": {
1733 | "name": "ipython",
1734 | "version": 3
1735 | },
1736 | "file_extension": ".py",
1737 | "mimetype": "text/x-python",
1738 | "name": "python",
1739 | "nbconvert_exporter": "python",
1740 | "pygments_lexer": "ipython3",
1741 | "version": "3.7.6"
1742 | }
1743 | },
1744 | "nbformat": 4,
1745 | "nbformat_minor": 4
1746 | }
1747 |
--------------------------------------------------------------------------------