├── Zenith LiteT Diffuse Reflectance Target - 95%R.csv
├── Zenith LiteT Diffuse Reflectance Target - 95%R.pdf
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── data-error.md
    │   ├── feature_request.md
    │   ├── bug-report.md
    │   └── generic-issue-template.md
    └── ISSUE_TEMPLATE.md
├── meeting-notes
    ├── 2015-12-11_algroithm_standards.md
    ├── 2015-12-09_sensor_standards.md
    ├── 2016_08_03_algorithms_standards.md
    ├── 2015-12-11_genomics_standards.md
    ├── 2016-02-18-traits-committee.md
    ├── 2016_02_11_sensor_standards.md
    ├── 2016_03_31_sensor_standards.md
    ├── 2016_08_02_traits_standards.md
    ├── 2016_08_01_genomics_standards.md
    └── 2015-10-28_all_standards.md
├── scripts
    ├── terra_dataset_counts.sh
    ├── data_storage_estimates.Rmd
    ├── experimental_design.Rmd
    ├── maricopa_field_data.Rmd
    ├── MAC Season 5.Rmd
    ├── maricopa_field_metadata.Rmd
    └── Season_Upload_Functions.R
├── LICENSE
├── README.md
└── CONTRIBUTING.md


/Zenith LiteT Diffuse Reflectance Target - 95%R.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraref/reference-data/HEAD/Zenith LiteT Diffuse Reflectance Target - 95%R.csv


--------------------------------------------------------------------------------
/Zenith LiteT Diffuse Reflectance Target - 95%R.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/terraref/reference-data/HEAD/Zenith LiteT Diffuse Reflectance Target - 95%R.pdf


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/data-error.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Data Error
 3 | about: Report errors found in published TERRA REF data
 4 | title: "[Data Error]"
 5 | labels: data/review
 6 | assignees: dlebauer
 7 | 
 8 | ---
 9 | 
10 | ## Dataset
11 | 
12 | ## Suggested Correction
13 | 


--------------------------------------------------------------------------------
/meeting-notes/2015-12-11_algroithm_standards.md:
--------------------------------------------------------------------------------
 1 | #December 11, 2015 5pm CST
 2 | 
 3 | In attendance:
 4 | 
 5 | Barnabas Poczos (CMU)
 6 | 
 7 | David Guarrera (DOE)
 8 | 
 9 | David LeBauer (UIUC)
10 | 
11 | 1.  ICASA database collaboration
12 | 
13 |     1.  Simulation and mechanistic model
14 | 
15 |     2.  Shared simulated dataset to allow people to see format <http://terraref.ncsa.illinois.edu/articles/simulated_phenotypes/>
16 | 
17 | 2.  Discussion about what data comes from Plant CV
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/scripts/terra_dataset_counts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | output_file="/gpfs/largeblockFS/projects/arpae/terraref/users/dlebauer/dataset_count_report.csv"
 4 | 
 5 | echo $(date) > ${output_file}
 6 | 
 7 | ## UA-MAC Level 1 ##
 8 | mac_lev1=(envlog_netcdf fullfield ir_geotiff laser3d_heightmap laser3d_ply2las rgb_geotiff scanner3DTop vnir_netcdf)
 9 | 
10 | for s in ${mac_lev1[@]}
11 | do
12 | 	dates=$(ls /gpfs/largeblockFS/projects/arpae/terraref/sites/ua-mac/Level_1/${s}/)
13 | 	for d in ${dates[@]}
14 | 	do
15 | 		count=$(find /gpfs/largeblockFS/projects/arpae/terraref/sites/ua-mac/Level_1/${s}/${d}/ -type d | wc -l)
16 | 		echo "ua-mac, level_1, ${s},${d},${count}" >> ${output_file}
17 | 	done
18 | done
19 | 
20 | ## UA-MAC RAW ##
21 | 
22 | mac_raw=(EnvironmentLogger SWIR VNIR flirIrCamera irrigation lightning scanner3DTop stereoTop weather)
23 | 
24 | for s in ${mac_raw[@]}
25 | do
26 |     dates=$(ls /gpfs/largeblockFS/projects/arpae/terraref/sites/ua-mac/raw_data/${s}/)
27 |     for d in ${dates[@]}
28 |     do
29 |         count=$(find /gpfs/largeblockFS/projects/arpae/terraref/sites/ua-mac/raw_data/${s}/${d}/ -type d | wc -l)
30 |         echo "ua-mac, raw,${s},${d},${count}" >> ${output_file}
31 |     done
32 | done
33 | chown 47852 ${output_file}
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, TERRA REF
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/scripts/data_storage_estimates.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Budget Estimate"
 3 | author: "David LeBauer"
 4 | date: "October 7, 2015"
 5 | output: html_document
 6 | ---
 7 | 
 8 | 
 9 | 
10 | ```{r}
11 | 
12 | library(data.table)
13 | library(udunits2)
14 | ### Rates
15 | 
16 | ## Amazon Glacier
17 | ## $0.007 / GB / mo = $84 / TB / y storage + 
18 | ## $0.07 / GB transfer = $70,000 / PB transfer
19 | 
20 | FOA <- 1 * 365/2 * 4
21 | fourtb_d <- 1 * 365 * 4
22 | 
23 | amazon_glacier <- ud.convert(0.007, "month-1", "year-1")*1000
24 | library(data.table)
25 | time <- data.table(day = 1:(365*4), doy = rep(1:365, 4), year = rep(1:4, each = 365), rate = 4)
26 | time[, `:=` (total = cumsum(rate))]
27 | time[, `:=` (min = cumsum(rate/4),  total = cumsum(rate))]
28 | time <- data.table(day = 1:(365*4), doy = rep(1:365, 4), year = rep(1:4, each = 365), rate = 4)
29 | time[, `:=` (min = cumsum(ifelse(doy < 182, rate/4, 0)),  total = cumsum(rate))]
30 | time
31 | 724*4
32 | 724*8
33 | time <- data.table(day = 1:(365*4), doy = rep(1:365, 4), year = rep(1:4, each = 365), rate = 4)
34 | time[, `:=` (min = cumsum(ifelse(doy <= 182, rate/4, 0)),  total = cumsum(rate))]
35 | time[, `:=` (cost = total * 145)]
36 | time
37 | time[, `:=` (cost = total * (145/365))]
38 | time <- data.table(day = 1:(365*4), doy = rep(1:365, 4), year = rep(1:4, each = 365), rate = 4)
39 | time[, `:=` (min = cumsum(ifelse(doy <= 182, rate/4, 0)),  total = cumsum(rate))]
40 | time[, `:=` (cost = total * (145/365))]
41 | time
42 | time[, `:=` (total_cost = cumsum(cost))]
43 | time
44 | format(time$total_cost, scientific = FALSE, digits = 0)
45 | time[, list(max(total_cost)), by = year]
46 | time[, list(storage = max(total_cost) * 95/145, backup = max(total_cost) * 50/145), by = year]
47 | ```
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TERRA-REF Reference Data
 2 | 
 3 | Reference data encompasses clear definitions of data formats, semantics, and interfaces, file formats, and representations of space, time, and genetic identity based on existing standards, commonly used file formats, and user needs to make it easier to analyze and exchange data and results. 
 4 | 
 5 | 
 6 | ### Contact:
 7 | 
 8 | * [Website](https://terraref.org)
 9 | * email: dlebauer@arizona.edu
10 | * [GitHub Issues](https://github.com/terraref/reference-data/issues)
11 | * [Slack Channel](https://terra-ref.slack.com) ... [register here](https://terraref-slack-invite.herokuapp.com/).
12 | 
13 | To provide input on the computing pipeline, please visit the [Reference Data GitHub repository](https://github.com/terraref/reference-data).
14 | 
15 | * [GitHub Issues](https://github.com/terraref/computing-pipeline/issues)
16 | 
17 | 
18 | 
19 | ### Terms of Use
20 | 
21 | * Project wide - if any code or data do not have clear terms of reuse, please request that the author provide one. We use [BSD 3 Clause](https://opensource.org/licenses/BSD-3-Clause) by default.
22 | 
23 | <p xmlns:dct="http://purl.org/dc/terms/">
24 |   <a rel="license"
25 |      href="http://creativecommons.org/publicdomain/zero/1.0/">
26 |     <img src="https://licensebuttons.net/p/zero/1.0/88x31.png" style="border-style: none;" alt="CC0" />
27 |   </a>
28 |   <br />
29 |   To the extent possible under law,
30 |   <a rel="dct:publisher"
31 |      href="terraref.org">TERRA REF</a>
32 |   has waived all copyright and related or neighboring rights to
33 |   <span property="dct:title">TERRA Reference Phenotyping Data</span>.
34 | Scientists are expected to adhere to conventions for citing our data and software in publications. 
35 | </p>
36 | 


--------------------------------------------------------------------------------
/meeting-notes/2015-12-09_sensor_standards.md:
--------------------------------------------------------------------------------
 1 | # TERRA sensor data standards teleconference	
 2 | 
 3 | December 9, 2015
 4 | 
 5 | David LeBauer (UIUC)
 6 | 
 7 | Christine Laney, Josh Elliot (Neon)
 8 | 
 9 | Melba Crawford (Purdue)
10 | 
11 | Shawn Serbin (BNL)
12 | 
13 | Yewin Shi (Texas A&M)
14 | 
15 | Charlie Zender (UCI)
16 | 
17 | Josh Elliot (NEON)
18 | 
19 | Ed Delp (Purdue)
20 | 
21 | Larry Biel
22 | 
23 | 
24 | 
25 | [issue #2](https://github.com/terraref/reference-data/issues/2)
26 | 
27 | * Meta-data format for raw data Lemnatec – Is there anything that we’re missing?**
28 |  * Dependent on instrument?  Yes, there is a code for each sensor.
29 |  * What is “raw”?  Has a calibration been applied?  Is the hyperspectral data in radiants?  There are two sets of files - one has been corrected.
30 |  * Integers can save space, rather than floating points, this will allow you to compute on the fly
31 | * Where is meta-data for conversion/ integer scale? Scale, add_offset  It may be safer to save raw data in case there is a conversion error.  This may result in the need for multiple data releases.
32 |  * Keep raw reflectance data and convert to radiants data 
33 |  * David to find out how this is handled for the system at Lincoln
34 | * We will have three dimensional surface of plants with laser scanner for normalizaiton. This seems like a very difficult research-level problem
35 |  * BRDF
36 |  * TERRA Ref should create protocols for doing corrections so that a common panel can be shared? (Christine Laney)
37 |  * Calibration for light, distortion needed – is Lemnatec doing that?  Can the calibration data be distributed? David to develop a list
38 |  * Feedback for sampling and frequency - none
39 |  * David to develop mailing lists in January.
40 | 


--------------------------------------------------------------------------------
/scripts/experimental_design.Rmd:
--------------------------------------------------------------------------------
 1 | # Experimental Design
 2 | 
 3 | 
 4 | ```{r setup, echo=FALSE, message = FALSE}
 5 | library(googlesheets)
 6 | library(knitr)
 7 | library(dplyr)
 8 | 
 9 | opts_knit$set(echo = FALSE, message = FALSE, cache = TRUE)
10 | 
11 | ```
12 | ## Accessions
13 | 
14 | ```{r accessions, echo = FALSE, message = FALSE}
15 | ss <- gs_key("1Nfabx_n1rNlO6NW3olD8MAibJ3KHnOMmMwOYYw4wwGc")
16 | accessions <- gs_read(ss, ws = "Accessions")
17 | a <- accessions %>% mutate_each( funs_( lazyeval::interp( ~replace(., is.na(.), "") ) ) ) %>% select(Entry, Code, Source,  Pedigree, Pedigree2)  
18 | 
19 | a[1:193,] %>% kable
20 | 
21 | a[195:201,] %>% kable(caption = a[194,'Entry'])
22 | 
23 | a[204:252,] %>% kable(caption = a[203,'Entry'])
24 | 
25 | ```
26 | 
27 | https://docs.google.com/spreadsheets/d/1Nfabx_n1rNlO6NW3olD8MAibJ3KHnOMmMwOYYw4wwGc/pubhtml?gid=239932660&amp;single=true
28 | 
29 | ## Experiments
30 | 
31 | ```{r experiments, echo = FALSE, message = FALSE}
32 | experiments <- gs_read(ss, ws = "Experiments")
33 | experiments %>% kable 
34 | ```
35 | 
36 | https://docs.google.com/spreadsheets/d/1Nfabx_n1rNlO6NW3olD8MAibJ3KHnOMmMwOYYw4wwGc/pubhtml?gid=890543376&amp;single=true
37 | 
38 | ## 2016 Field Layout 
39 | 
40 | ### Under Gantry
41 | 
42 | ```{r gantry-plot-layout, echo = FALSE, message = FALSE}
43 | gantry_plot_layout <- gs_read(ss, ws = "Gantry Plot Layout")
44 | gantry_plot_layout %>% kable 
45 | ```
46 | 
47 | https://docs.google.com/spreadsheets/d/1Nfabx_n1rNlO6NW3olD8MAibJ3KHnOMmMwOYYw4wwGc/pubhtml?gid=1231399646&amp;single=true
48 | 
49 | ### West of Gantry 
50 | 
51 | ```{r west-of-gantry-plot-layout, echo = FALSE, message = FALSE}
52 | west_of_gantry_plot_layout <- gs_read(ss, ws = "West of Gantry Plot Layout")
53 | west_of_gantry_plot_layout %>% kable 
54 | ```
55 | 
56 | https://docs.google.com/spreadsheets/d/1Nfabx_n1rNlO6NW3olD8MAibJ3KHnOMmMwOYYw4wwGc/pubhtml?gid=728631369&amp;single=true
57 | 
58 | 


--------------------------------------------------------------------------------
/meeting-notes/2016_08_03_algorithms_standards.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | title: "August 2016 Algorithms Standards Committee Meeting Notes"
 4 | modified:
 5 | categories: blog
 6 | excerpt:
 7 | tags: []
 8 | image:
 9 |   feature:
10 | date: 2016-08-03T20:43:38-05:00
11 | ---
12 | # August 2016 Algorithms Standards Committee Meeting Notes
13 | 
14 | ## **Participants**
15 | 
16 | David LeBauer, Max Burnettev ( [mburnet2@illinois.edu](mailto:mburnet2@illinois.edu), Cheryl Porter, Barnabas Poczos. Justin McGrath, Robert Pless
17 | 
18 | REGRETS:
19 | David Guarrera, Roman Garnett
20 | 
21 | ## **Agenda**
22 | 
23 | ### **Infrastructure Overview**
24 | 
25 |  ![](https://github.com/terraref/terraref.github.io/blob/master/images/Pipeline%20July%202016.png)
26 |  
27 | ### **Using Clowder: Developing Extractors**
28 | 
29 | - Development instance: [https://terraref.ncsa.illinois.edu/clowder-dev/](https://terraref.ncsa.illinois.edu/clowder-dev/)
30 | - [https://terraref.ncsa.illinois.edu/clowder/](https://terraref.ncsa.illinois.edu/clowder-dev/)
31 | 
32 | - Slides and video for installation/deployment: [https://opensource.ncsa.illinois.edu/confluence/display/CATS/Documents](https://opensource.ncsa.illinois.edu/confluence/display/CATS/Documents)
33 | 
34 | - Demonstrate tool launcher
35 | 
36 | - General discussion of workflow opportunities
37 | 
38 | ### **Computing Resources at NCSA       **
39 | 
40 | ROGER: https://wiki.ncsa.illinois.edu/display/ROGER/ROGER+User+Guide
41 | 
42 | XSEDE: [https://www.xsede.org/](https://www.xsede.org/)
43 | 
44 | ### **Suggestions**
45 | 
46 | Associating data with software versions
47 | 
48 | Supporting reproducibility
49 | 
50 | Standard process for updating software versions and re-running code
51 | 
52 | Provide ability for end-users to test out different algorithms
53 | 
54 | - g. provide a list that users can choose from
55 | - access algorithms via git repositories
56 | - configure new docker &#39;tools&#39;
57 | 


--------------------------------------------------------------------------------
/meeting-notes/2015-12-11_genomics_standards.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | title: "December 2015 Genomic Standards Committee Meeting Notes"
 4 | modified:
 5 | categories: blog
 6 | excerpt:
 7 | tags: []
 8 | image:
 9 |   feature:
10 | date: 2015-12-11T20:43:38-05:00
11 | ---
12 | 
13 | # Genomic Data Meeting
14 | December 11, 2015
15 | 
16 | Rachel Shekar  
17 | David LeBauer  
18 | Christine Laney (Neon)  
19 | Eric Lyons (AZ)  
20 | Mike Gore (Cornell) 
21 | David Lee (DOE)  
22 | Elodie (Cornell)  
23 |   
24 | -   Eric has a similar pipeline is already developed but can share with Mike
25 | 
26 |     -   May reads using bowtie, tophat…
27 | 
28 |     -   Can call SNPS multiple ways
29 | 
30 |     -   Leads to Jbrows
31 | 
32 | -   User cases and speed need to be known before we will know if this works
33 | 
34 |     -   Mike has 500 lines with 3-4x coverage
35 | 
36 |     -   David LeBauer thinks in total, TERRA will have 200 lines in year 1 and 2 with 40x coverage and some with 100x coverage. Ask David Lee if this is correct.
37 | 
38 |     -   Elodie doesn’t think 200x is correct; 40x is a lot. What is it compared to human genome? What is the return on that volume? The processing, computing, and processing time would be too high. If someone wants to use 100x, we need to prepare for that.
39 | 
40 | -   Eric can run benchmarking with Elodie’s test data sets before sending them through.
41 | 
42 | -   Eric’s pipeline can likely be put in repository – Open Source
43 | 
44 |     -   Pipeline = major tools
45 | 
46 |     -   Do you want integration system too?
47 | 
48 |     -   They can do customization
49 | 
50 |     -   [http://genomevolution.org](http://genomevolution.org/)
51 | 
52 |     -   They have bulk upload capabilities and bulk metadata upload and management
53 | 
54 |     -   They can co-develop with UofI team
55 | 
56 |     -   Data can be transferred with API
57 | 
58 | -   David to meet with Eric in Arizona
59 | 
60 | Installation for CoGe: https://genomevolution.org/wiki/index.php/Install\_coge
61 | 


--------------------------------------------------------------------------------
/meeting-notes/2016-02-18-traits-committee.md:
--------------------------------------------------------------------------------
 1 | # February 2016 Trait Data Standards Committee Meeting
 2 | 
 3 | ## Participants
 4 | 
 5 | Max Burnette, Noah Fahlgren, David LeBauer, David Lee, Cheryl Porter, Geoff Morris, Mitch Tuinstra, Jeffrey White, Jack Gardiner
 6 | 
 7 | ## Agenda
 8 | 
 9 | ### ICASA / NARDN
10 | 
11 | Cheryl Porter will present information aboutu ICASA, NARDN, USDA efforts at database harmonization
12 | 
13 | * [Core experimental meta-data for NARDN (draft)]( https://github.com/terraref/reference-data/files/15501/Core.Harmonized.Crop.Experiment.Data_JWW_chp.docx)
14 | 
15 | ### Genomes2Fields 
16 | 
17 | * Darwin Campbell will give quick overview of genomes2fields project
18 |   * G2F has simple protocols for a core set of five traits that everyone measures. 
19 |   * Also spreadsheet templates and instructions for collecting data.
20 |   * Integration with [BMS](https://www.integratedbreeding.net/) and [GOBII](http://www.gobiiproject.org/) (Jack Gardiner if available)
21 | 
22 | ### FieldBook and BRAPI
23 | 
24 | Field measurements provide a framework for collecting the data
25 | 
26 | ## Discussion
27 | 
28 | ### Shared Field measurement protocols
29 | 
30 | Has been requested by many of the TERRA teams. Who has protocols to start with? What are priority traits?
31 | 
32 | 
33 | Some being discussed within the TERRA Ref team:
34 | 
35 | * Biomass (time series)
36 | * Canopy Architecture (emergence, % cover, LAI, leaf angle, leaf length, leaf number, leaf width)
37 | * Transpiration (and leaf temperature)
38 | * Tissue Chemistry carbohydrates, lignin, N
39 | * Photosynthesis: pigments
40 | 
41 | ### Many databases: Interoperability
42 | 
43 | * [Brapi API](http://docs.brapi.apiary.io/#introduction/structure-of-the-response-object)? used by FieldBook, BMS?
44 | * [BETYdb](www.betydb.org)
45 | * BMS
46 | * GOBII
47 | 
48 | What is the framework for linking these resources?
49 | 
50 | ### References:
51 | 
52 | [Github issue 18: Agronomic data / deta-data Formats](https://github.com/terraref/reference-data/issues/18)
53 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--- 
 2 | Thanks for contributing your input! Instructions are in comments
 3 | 
 4 | comments are between <!--- and ---> 
 5 | 
 6 | 
 7 | _PLEASE USE THIS TEMPLATE when submitting a new issue_
 8 | 
 9 | <!--- (REQUIRED)
10 | Title: Provide a general summary of your feature / question / issue in the title above. 
11 | -->
12 | 
13 | <!---
14 | Tags: please select one or more relevant tags
15 | --->
16 | 
17 | ### Description (REQUIRED)
18 | <!--- 
19 | Is there something you want to do? Is this a bug? Feature request? Discussion? 
20 |   * Question: ask away!
21 |   * New feature / analysis : 
22 |   * Bug: what were you trying to do? What did you expect to happen? What happened? 
23 | -->
24 | 
25 | <!--- 
26 | ### Details
27 | 
28 | USE CASES: "As an _state role_  I would like to _state what you wish to do_ "
29 | 
30 | How would this change help? (you, the project, the user community? 
31 | How would it be used? 
32 |       Are there any examples (existing software / utilities)? Please provide reproducible code snippets, links, screenshots, etc
33 | 
34 | --> 
35 | ### Completion Criteria <!---(REQUIRED/ESSENTIAL)
36 | 
37 | How will we know when this is done?
38 | 
39 | Examples:
40 | 
41 | FOR A BUG:
42 | * [ ] Now I can [topic of question / bug]
43 | 
44 | FOR A DISCUSSION:
45 | * [ ] Discuss and develop requirements docs; create issues for next steps
46 | for a new feature
47 | * [ ] Create follow up epic / issues 
48 | 
49 | 
50 | FOR A FEATURE REQUEST
51 | * [ ] create algorithm
52 | * [ ] write test
53 | * [ ] add to pipeline
54 | 
55 | FOR AN EXTRACTOR / DATA PRODUCT REVIEW
56 | 
57 | * [ ] Code review complete (includes extractor, any functions in terrautils, etc)
58 | * [ ] Sample data output verified 
59 | * [ ] Sample metadata verified
60 | * [ ] Changes for 2017 release implemented
61 | * [ ] Issue(s) for changes postponed until 2018 created and listed in README
62 | * [ ] Extractor tagged/versioned
63 | * [ ] Data from earlier versions of extractor purged
64 | * [ ] Extractor deployed
65 | * [ ] Final output verified/reviewed
66 | 
67 | --->
68 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/generic-issue-template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Generic issue template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!--- 
11 | Thanks for contributing your input! Instructions are in comments
12 | 
13 | comments are between <!--- and ---> 
14 | 
15 | 
16 | _PLEASE USE THIS TEMPLATE when submitting a new issue_
17 | 
18 | <!--- (REQUIRED)
19 | Title: Provide a general summary of your feature / question / issue in the title above. 
20 | -->
21 | 
22 | <!---
23 | Tags: please select one or more relevant tags
24 | --->
25 | 
26 | ### Description (REQUIRED)
27 | <!--- 
28 | Is there something you want to do? Is this a bug? Feature request? Discussion? 
29 |   * Question: ask away!
30 |   * New feature / analysis : 
31 |   * Bug: what were you trying to do? What did you expect to happen? What happened? 
32 | -->
33 | 
34 | <!--- 
35 | ### Details
36 | 
37 | USE CASES: "As an _state role_  I would like to _state what you wish to do_ "
38 | 
39 | How would this change help? (you, the project, the user community? 
40 | How would it be used? 
41 |       Are there any examples (existing software / utilities)? Please provide reproducible code snippets, links, screenshots, etc
42 | 
43 | --> 
44 | ### Completion Criteria <!---(REQUIRED/ESSENTIAL)
45 | 
46 | How will we know when this is done?
47 | 
48 | Examples:
49 | 
50 | FOR A BUG:
51 | * [ ] Now I can [topic of question / bug]
52 | 
53 | FOR A DISCUSSION:
54 | * [ ] Discuss and develop requirements docs; create issues for next steps
55 | for a new feature
56 | * [ ] Create follow up epic / issues 
57 | 
58 | 
59 | FOR A FEATURE REQUEST
60 | * [ ] create algorithm
61 | * [ ] write test
62 | * [ ] add to pipeline
63 | 
64 | FOR AN EXTRACTOR / DATA PRODUCT REVIEW
65 | 
66 | * [ ] Code review complete (includes extractor, any functions in terrautils, etc)
67 | * [ ] Sample data output verified 
68 | * [ ] Sample metadata verified
69 | * [ ] Changes for 2017 release implemented
70 | * [ ] Issue(s) for changes postponed until 2018 created and listed in README
71 | * [ ] Extractor tagged/versioned
72 | * [ ] Data from earlier versions of extractor purged
73 | * [ ] Extractor deployed
74 | * [ ] Final output verified/reviewed
75 | 
76 | --->
77 | 


--------------------------------------------------------------------------------
/scripts/maricopa_field_data.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 |   title: "MAC Field Data"
 3 | author: "David LeBauer"
 4 | date: "June 17, 2016"
 5 | output: html_document
 6 | ---
 7 |   
 8 | ```{r setup, include=FALSE}
 9 | <<<<<<< HEAD
10 | knitr::opts_chunk$set(echo = TRUE)
11 | =======
12 | knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
13 | >>>>>>> a054c9a854eac4741163d59a4a0e1ba0d9b84b14
14 | ```
15 | 
16 | ## Height
17 | 
18 | ```{r}
19 | library(googlesheets)
20 | <<<<<<< HEAD
21 | (my_sheets <- gs_ls())
22 | 
23 | metadata_gs <- googlesheets::gs_url("https://docs.google.com/spreadsheets/d/1s-kQc3K0h4-hfwMzKOU_zpRXN5loByIpKW8Rvw76mh0")
24 | =======
25 | library(dplyr)
26 | (my_sheets <- gs_ls())
27 | 
28 | metadata_gs <- googlesheets::gs_url("https://docs.google.com/spreadsheets/d/1s-kQc3K0h4-hfwMzKOU_zpRXN5loByIpKW8Rvw76mh0")
29 | 
30 | >>>>>>> a054c9a854eac4741163d59a4a0e1ba0d9b84b14
31 | entries <- gs_read(metadata_gs, 'cultivars') %>% 
32 |   select(genotype = name, Entry)
33 | 
34 | data_gs <- gs_url("https://docs.google.com/spreadsheets/d/1FQ8_ualom7ZUtr5iKiEn80vHlh9zIYpDxawpaek20Og")
35 | 
36 | height <- gs_read(data_gs, 'Heights_Full_field_LongFormat') %>% 
37 |   left_join(entries, by = "Entry") %>% 
38 |   mutate(Experiment = replace(Experiment, Experiment == "Nght illum", "Night Illumination")) %>% 
39 |   mutate(Experiment = replace(Experiment, Experiment == "Row#", "Row #"))
40 |   
41 | unique(height$Experiment)
42 | 
43 | ht <- height %>%
44 |   mutate(site = paste("MAC Field Scanner Field Plot ", Plot, Row_subplot), 
45 |          date = lubridate::mdy(Date), species = 'Sorghum bicolor',
46 |          canopy_height  = as.numeric(HtCanopy_cm), 
47 |          spike_height = as.numeric(HtSpike_cm),
48 |          treatment = 'Control') %>% 
49 |   select(site, date, species,
50 |          treatment, 
51 |          cultivar = genotype,
52 |          canopy_height,
53 |          spike_height) %>%
54 |   filter(!(is.na(canopy_height) & is.na(spike_height)))
55 | 
56 | ht1 <- ht %>% filter(is.na(spike_height)) %>% select(-spike_height)
57 | ht2 <- ht %>% filter(!is.na(spike_height))
58 |   
59 | write.csv(ht1, file = 'ht1.csv', row.names = FALSE)
60 | write.csv(ht2[1:100,], file = 'ht2.csv', row.names = FALSE)
61 | 
62 | 
63 | <<<<<<< HEAD
64 | ```
65 | =======
66 | ```
67 | 
68 | ## Emergence
69 | 
70 | ```{r}
71 | 
72 | emergence <- gs_read(data_gs, 'Emergence_StandCounts_BAP')
73 | final_stand_counts <- gs_read(data_gs, 'Final_StandCounts_Full_field') 
74 | 
75 | #%>% 
76 | #  left_join(entries, by = "Entry") 
77 | 
78 | 
79 | 
80 | >>>>>>> a054c9a854eac4741163d59a4a0e1ba0d9b84b14
81 | 


--------------------------------------------------------------------------------
/meeting-notes/2016_02_11_sensor_standards.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | title: "March 11, 2016 Sensor Standards Committee Meeting Notes"
 4 | modified:
 5 | categories: blog
 6 | excerpt:
 7 | tags: []
 8 | image:
 9 |   feature:
10 | date: 2016-03-11T20:43:38-05:00
11 | ---
12 | 
13 | # March 11, 2016 Sensor Data Standards Committee Meeting
14 | 
15 | ## **Participants**
16 | 
17 | David LeBauer, Solmaz Hajmohammadi, Charlie Zender, Ben Niehaus, Max Burnette, Markus Radermacher
18 | 
19 | ## **Agenda**
20 | 
21 | Objective: define v0 data products, priorities, what we have, what is needed.
22 | 
23 | Introductions, updates, roadblocks, milestones
24 | 
25 | 1. Ben &amp; Markus
26 | 2. Charlie
27 | 3. Solmaz
28 | 
29 | Sample data
30 | 
31 | 1. Data we do and don&#39;t have
32 | 2. Review existing meta-data files. [https://goo.gl/QH6PQv](https://goo.gl/QH6PQv)
33 |   a. these taken from sample datasets [https://uofi.box.com/s/c9ngkwi3xvtfr0ei5zfzgf4jya0dmrlc](https://uofi.box.com/s/c9ngkwi3xvtfr0ei5zfzgf4jya0dmrlc)
34 | 3. What data products will Lemnatec generate?
35 | 4. Calibration: what is known, what will Lemnatec do? What is left to us? SOPs? What is the accuracy and sensitivity of each sensor?
36 | 
37 | **References:**
38 | 
39 | 1. System Specification (List of sensors p 9-11) [https://goo.gl/Unt0cv](https://goo.gl/Unt0cv)
40 | 2. (Some) Sensor data sheets [https://goo.gl/1zcQ0t](https://goo.gl/1zcQ0t)
41 | 3. Github issue describing pipeline for hyperspectral data: [https://github.com/terraref/computing-pipeline/issues/38](https://github.com/terraref/computing-pipeline/issues/38)
42 | 
43 | ## **Discussion**
44 | 
45 | What additional information is required to calibrate sensors and interpret data?
46 | 
47 | - Hyperspectral / multispectral cameras
48 |   - will use white target time series
49 |   - Bandwidth, spectral resolution, FWHM, spectral response?
50 |   - time of start and end of image capture
51 | - Downwelling solar radiation?
52 | - External color, geometric calibration
53 |   - &quot;3x3 K matrix&quot; (I think this is what R. Pless mentioned for geometric correction / transformation from sensor to reality)
54 | - What does time stamp mean, for sensors that are open for a long time, including
55 |   - imaging spectrometers
56 |   - laser scanner
57 |   - PSII timing of flash / response
58 | - Geospatial information
59 | 
60 | ### **Sensors Missing From Sample Data**
61 | 
62 | - PAR sensor
63 | - Skye dedicated NDVI
64 | - Color Sensor STS-VIS
65 | - Laser Scanners
66 | - 8 MP RGB Cameras
67 | - Environmental Sensors: Rain, Temp, Humidity, Light, Wind
68 | - Others?
69 | 
70 | Updates:
71 | 
72 | Fiber optic cable in transit, should be installed by end of next week
73 | 
74 | VIS
75 | ![VIS](https://github.com/terraref/terraref.github.io/blob/master/images/VIS.png)
76 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute [DRAFT]
 2 | 
 3 | This repository is for discussing the format and content of data products that the TERRA Reference team will produce.
 4 | 
 5 | Most of the discussions occur in the form of [GitHub issues](https://github.com/terraref/reference-data/issues). You can ask questions, request new or updated data products, and propose new formats to support there by creating a [new issue](https://github.com/terraref/reference-data/issues/new).
 6 | 
 7 | ## Proposing new data products
 8 | 
 9 | Ideally these will be accompanied by open source scripts, code snippets, or pull requests to modify / extend scripts in the https://github.com/terraref/computing-pipeline repository.
10 | 
11 | ## Requesting support for / integration with existing data formats
12 | 
13 | We have a long list of APIs, databases, and data formats that we plan to ingest, process, and provide access to through the TERRA Ref platform. These are listed in the documentation <!--todo more details here and in documentation-->
14 | 
15 | ## Requesting changes to existing data products
16 | 
17 | If using our data, please let us know how you access it, and what works / doesn't work.
18 | 
19 | 
20 | ## Code of Conduct
21 | 
22 | Harassment in code and discussion or violation of physical boundaries is completely unacceptable anywhere in TERRA-REF’s project codebases, issue trackers, chatrooms, mailing lists, meetups, and other events. Violators will be warned by the core team. Repeat violations will result in being blocked or banned by the core team at or before the 3rd violation.
23 | 
24 | ###In detail
25 | Harassment includes offensive verbal comments related to gender identity, gender expression, sexual orientation, disability, physical appearance, body size, race, religion, sexual images, deliberate intimidation, stalking, sustained disruption, and unwelcome sexual attention.
26 | 
27 | Individuals asked to stop any harassing behavior are expected to comply immediately.
28 | 
29 | Maintainers are also subject to the anti-harassment policy.
30 | 
31 | If anyone engages in harassing behavior, including maintainers, we may take appropriate action, up to and including warning the offender, deletion of comments, removal from the project’s codebase and communication systems, and escalation to GitHub support.
32 | 
33 | If you are being harassed, notice that someone else is being harassed, or have any other concerns, please contact a member of the core team or email dlebauer@illinois.edu immediately.
34 | 
35 | We expect everyone to follow these rules anywhere in TERRA-REF's project codebases, issue trackers, chatrooms, and mailing lists.
36 | 
37 | Finally, don't forget that it is human to make mistakes! We all do. Let’s work together to help each other, resolve issues, and learn from the mistakes that we will all inevitably make from time 
38 | 
39 | ###Thanks
40 | Thanks to the [Fedora Code of Conduct](https://getfedora.org/code-of-conduct) and [JSConf Code of Conduct](http://jsconf.com/codeofconduct.html).
41 | 


--------------------------------------------------------------------------------
/meeting-notes/2016_03_31_sensor_standards.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | title: "March 31,  Sensor Standards Committee Meeting Notes"
 4 | modified:
 5 | categories: blog
 6 | excerpt:
 7 | tags: []
 8 | image:
 9 |   feature:
10 | date: 2016-03-31T20:43:38-05:00
11 | ---
12 | 
13 | # March 31, 2016 Sensor Data Standards Committee Meeting
14 | 
15 | ## Participants
16 | 
17 | David LeBauer, Chris Boomsma, Larry Biehl, Ed Delp, Melba Crawford, Solmaz Hajmohammadi
18 | 
19 | Charlie Zender, Matt Colgan, Alex Thomasson
20 | 
21 | ## Agenda
22 | 
23 | ### Updates on Lemnatec Field Scanner in Arizona
24 | 
25 | Sample data from wheat is available
26 | 
27 | Sorghum planting scheduled for mid-April
28 | 
29 | ### Review of initial data sets and products
30 | 
31 | Raw data sets available
32 | 
33 | - How to access raw data: Globus, Clowder (by request, not yet public - Maloney, John Daniel &lt;malone12@illinois.edu&gt;)
34 | - Plans for v0 sensor data products
35 | 
36 | - Data stream for environmental sensors (including meteorology and downwelling spectra)
37 |   - Sample data set   [2016-02-15\_21-20-08\_enviromentlogger.json.txt](https://github.com/terraref/reference-data/files/178402/2016-02-15_21-20-08_enviromentlogger.json.txt) and discussion on [Github issue 26](https://github.com/terraref/reference-data/issues/26):
38 |   - [Draft format specification](https://github.com/terraref/documentation/blob/master/environmental_data.md)
39 | - Hyperspectral and imagery data - Charlie Zender
40 |   - Ugly workflow [script](https://github.com/terraref/computing-pipeline/tree/master/scripts/hyperspectral/terraref.sh) (convert raw-&gt;netCDF, compress, add metadata)
41 |   - [Help screen](https://gist.github.com/czender/39b5249dd92c3b8f471beda7bbc121c5)
42 |   - [Run output](https://gist.github.com/czender/bdd93e4d67aeca8b5c11bd916c655db9#file-terraref-out)
43 |   - Review sample [data](http://dust.ess.uci.edu/tmp/foo.nc) (warning: 134 Mb) and/or [metadata](http://dust.ess.uci.edu/tmp/foo.cdl)
44 |     - People will be most interested in looking at band-specific data so the default script is simplified for this.
45 |     - Lossless compression by Deflate is 20-25% with no loss of data with less than 5 s to decompress.  Higher compression will cause longer decompression time
46 |     - Lossy compression is an option (bit rounding), then compressing with lossless compression to get an extra 10% saving per decimal digit
47 | - Gantry sensors
48 |   - How to correct if velocity is not constant?  Also problems with GPS if moving very slowly.
49 |   - All lemnatec GFE cameras are operating independently and data will be combined later (image stitching)
50 |   - Speed of gantry and start time positions known.  Timestamp with text file accurate to millisec.  Accurate to second when the mirror angle is changing, and the gantry is not moving.
51 |   - Any info on reflectance calibration?  What should we do for calibration?
52 | 
53 | ### Discussion
54 | 
55 | - What are the priority use cases to support?
56 | - (How and where) might you use software components?
57 | - Are you interested in either contributing data or adopting common standards?
58 | - Calibration
59 |   - Light
60 |   - Space
61 |   - Time
62 | 
63 | ### GitHub issues
64 | 
65 | - Complete list of [Open Github Issues related to Sensor Data](https://github.com/search?q=org%3Aterraref++label%3A%22sensor+data%22&amp;ref=searchresults&amp;state=open&amp;type=Issues&amp;utf8=%E2%9C%93)
66 | - Issue 64: [First step of processing and annotating sensor output](https://github.com/terraref/computing-pipeline/issues/64)
67 | - Issue 14: [Formats for image and other hyperspectral data](https://github.com/terraref/reference-data/issues/14)
68 | 


--------------------------------------------------------------------------------
/meeting-notes/2016_08_02_traits_standards.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | title: "August 2016 Traits Standards Committee Meeting Notes"
 4 | modified:
 5 | categories: blog
 6 | excerpt:
 7 | tags: []
 8 | image:
 9 |   feature:
10 | date: 2016-08-02T20:43:38-05:00
11 | ---
12 | # August 2016 Traits Standards Committee Meeting Notes
13 | 
14 | ## Participants
15 | 
16 | David LeBauer, Max Burnette, Cheryl Porter, Shawn Serbinm David Lee, Noah Fahlgren, Mitch Tuinstra, Yuhao Chen (Delp lab), Justin McGrath, Jeff White, Craig Willis, Eric Lyons
17 | 
18 | REGRETS:
19 | Geoff Morris
20 | 
21 | ## Agenda
22 | 
23 | ### Introductions
24 | 
25 | ### Data Pipeline overview
26 | 
27 |  ![](https://github.com/terraref/terraref.github.io/blob/master/images/Pipeline%20July%202016.png)
28 | 
29 | #### **Accessing data**
30 | 
31 | Production data: [https://terraref.ncsa.illinois.edu/bety/](https://terraref.ncsa.illinois.edu/bety/)
32 | 
33 | Test data: [https://terraref.ncsa.illinois.edu/bety-test/](https://terraref.ncsa.illinois.edu/bety-test/)
34 | 
35 | documentation: [https://pecan.gitbooks.io/betydb-data-access/content/](https://pecan.gitbooks.io/betydb-data-access/content/)
36 | 
37 | API documentation [https://pecan.gitbooks.io/betydb-data-access/content/API/beta\_API.html](https://pecan.gitbooks.io/betydb-data-access/content/API/beta_API.html)
38 | 
39 | Examples
40 | 
41 | Traits:
42 | 
43 | Web interface: [https://terraref.ncsa.illinois.edu/bety-test/traits](https://terraref.ncsa.illinois.edu/bety-test/traits)
44 | 
45 | API: [https://terraref.ncsa.illinois.edu/bety-test/api/beta/traits?key=9999999999999999999999999999999999999999&amp;limit=5](https://terraref.ncsa.illinois.edu/bety-test/api/beta/traits?key=9999999999999999999999999999999999999999&amp;limit=5)
46 | 
47 | ### Shared Field Measurement Protocols
48 | 
49 | - Review [draft document of sampling protocols](https://docs.google.com/document/d/1iP8b97kmOyPmETQI_aWbgV_1V6QiKYLblq1jIqXLJ84)
50 | - Interest in sharing across teams?  Dave Lee can circulate among the other TERRA teams and work to get feedback after the Traits Standards Committee and the Cat5 team are satisfied with the document.
51 | - Interest in additional measurements at MAC?
52 | 
53 | ### Data and meta-data formats
54 | 
55 | - Planning to support ICASA / NARDN, BRAPI
56 | - Developing a cross-vocabulary thesaurus
57 | - Review / comment on drafts:
58 |   - [list of available ontologies, vocabularies and meta-data formats](https://docs.google.com/spreadsheets/d/1qu3LYomGIiC4Lmzler-bH8jvyfR-GA6lDl5Go0NhO2c/edit#gid=0)
59 |   - [Requirements for Standard Data Formats, Ontologies/Vocabularies](https://docs.google.com/document/d/13gXD_OVLffm0hqahDZ3tUvru8IV1fRfM6DiuOcfjr3s/edit?usp=sharing).
60 |   - Mapping variable information between systems and vocabularies ( [github issue 31](https://github.com/terraref/reference-data/issues/31))
61 | 
62 | NIH protocol database - Zoe Lacroix
63 | 
64 | Example pub at: [http://www.ncbi.nlm.nih.gov/pubmed/26673793](http://www.ncbi.nlm.nih.gov/pubmed/26673793):
65 | 
66 | Scientific legacy workflows are often developed over many years, poorly documented and implemented with scripting languages. In the context of our cross-disciplinary projects we face the problem of maintaining such scientific workflows. This paper presents the Workflow Instrumentation for Structure Extraction (WISE) method used to process several ad-hoc legacy workflows written in Python and automatically produce their workflow structural skeleton. Unlike many existing methods, WISE does not assume input workflows to be preprocessed in a known workflow formalism. It is also able to identify and analyze calls to external tools. We present the method and report its results on several scientific workflows.
67 | 
68 | Prometheus wiki for plant protocols: [http://prometheuswiki.publish.csiro.au/tiki-custom\_home.php](http://prometheuswiki.publish.csiro.au/tiki-custom_home.php)
69 | 
70 | Pérez-Harguindeguy et al (2013) New handbook for standardised measurement of plant functional traits worldwide. Australian Journal of Botany, 61, 167-234. [https://doi.org/10.1071/BT12225](https://doi.org/10.1071/BT12225) http://www.nucleodiversus.org/index.php?mod=page&amp;id=79
71 | 
72 | [http://earthcube.org/group/geosemantics](http://earthcube.org/group/geosemantics)
73 | 
74 | Austrailian group has workflow software + standards
75 | 
76 | Rothamstead data
77 | 
78 | INRA France
79 | 


--------------------------------------------------------------------------------
/meeting-notes/2016_08_01_genomics_standards.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: post
 3 | title: "August 2016 Genomic Standards Committee Meeting Notes"
 4 | modified:
 5 | categories: blog
 6 | excerpt:
 7 | tags: []
 8 | image:
 9 |   feature:
10 | date: 2016-08-01T20:43:38-05:00
11 | ---
12 | 
13 | #
14 | # TERRA Ref Genomics Standards Committee Meeting
15 | 
16 | ## **Participants**
17 | 
18 | David LeBauer, Christine Laney, Michael Gore, Carolyn Lawrence-Dill, Eric Lyons, Noah Fahlgren
19 | 
20 | REGRETS:
21 | Todd Mockler, Max Burnette, David Lee, Geoff Morris, Craig Willis
22 | 
23 | ## **Agenda**
24 | 
25 | Introductions
26 | 
27 | Objective: review current status of pipeline and plans for first data release in November.
28 | 
29 | Overview (Noah)
30 | 
31 | Sequencing
32 | 
33 | - what has been done
34 |   - 192 resequenced genomes (~20-30x coverage each) from Steve K. bioenergy assoc. panel (BAP)
35 |   - 192 additional samples sent to HudsonAlpha one week ago (20-30x)
36 |   - External funding
37 |     - Illumina for additional ~1000 sequences
38 |     - DOE CSP for de novo
39 |   - Data quality control and analysis to date done on the Danforth Center cluster
40 |     - Trimmomatic =&gt; bwa =&gt; GATK =&gt; CNVator
41 |     - By November: user will upload raw sequencing data and metadata to TERRAref pipeline using CoGe (below)
42 | - what is in pipeline
43 |   - Raw data and experimental metadata added to Clowder
44 |   - Clowder extractor
45 |     - Upload data to the CyVerse data store (TERRA-REF)
46 |     - Launch CoGe workflow using the API
47 |   - Synchronize results back to Clowder/BETYdb
48 | 
49 | - Clowder: a database that can hold data of any format. Data being imported to clowder will automatically trip extractor that will move data to the correct location for discovery and analysis
50 | - Data will be uploaded to NCBI, SRA
51 |   - Can we link from the SRA to CyVerse and Clowder easily and robustly?
52 | 
53 | CoGe pipeline
54 | 
55 | - A sample analysis: [https://genomevolution.org/coge/NotebookView.pl?nid=1344](https://genomevolution.org/coge/NotebookView.pl?nid=1344)
56 | - Draft implementation: [https://github.com/terraref/computing-pipeline/blob/f94a87f851b37ff74ded5b7b6b3b0c1e13107720/scripts/coge/coge\_upload.json](https://github.com/terraref/computing-pipeline/blob/f94a87f851b37ff74ded5b7b6b3b0c1e13107720/scripts/coge/coge_upload.json)
57 | 
58 | Downstream Analyses
59 | 
60 | - GOBII
61 | - Other downstream tools?
62 |   - SNP callling via CoGe
63 |   - What is already within CoGe
64 |   - Putting proprietary GATK on CyVerse (Mike G will send more info)
65 |   
66 | Data Sharing
67 | 
68 | - when, where, and with what will we share as of November
69 | - Currently using CyVerse data store ( [https://de.iplantcollaborative.org/de/](https://de.iplantcollaborative.org/de/))
70 |   - [terraref/reference-data/19](https://github.com/terraref/reference-data/issues/19)
71 | - Phytozome  (a DOE database)- is this an appropriate for our data? Perhaps not for raw reads (Mike G)
72 |   - Maybe we can submit variation information from the CoGe pipeline and update it as the reference genome is updated
73 |   - Is Phytozome interested in hosting a pangenome resources?
74 | - NCBI SRA: raw data + experimental metadata
75 |   - NEON has worked with SRA on data/metadata sharing, keep in touch with them
76 | - Others?
77 | 
78 | Other questions / ideas
79 | 
80 | - How to get from genbank to related
81 | 
82 | NEON: providing metagenomic data, processed and made available to the public w/ mgrast; marker gene sequences will be hosted in SRA / not available w/in NEON portal but available from external repository. Genomic standard meeting next week, working on environmental soil meta-data package for Mixs [http://gensc.org/mixs/submit-mixs-metadata/](http://gensc.org/mixs/submit-mixs-metadata/)
83 | 
84 | NEON has started using EML to begin documenting sensor and observational data (currently online at [http://data.neonscience.org](http://data.neonscience.org) but not pretty). May begin doing this w/ soil samples.
85 | 
86 | Action items:
87 | 
88 | ### **References**
89 | 
90 | - Genomics pipeline documentaiton [https://github.com/terraref/documentation/blob/master/genomics\_pipeline.md](https://github.com/terraref/documentation/blob/master/genomics_pipeline.md)
91 | - Genomics data formats: [terraref/reference-data/19](https://github.com/terraref/reference-data/issues/19)
92 | - Pipeline implementation: [terraref/computing-pipeline/issues/37](https://github.com/terraref/computing-pipeline/issues/37)
93 | - Using CoGe [terraref/computing-pipeline/issues/41](https://github.com/terraref/computing-pipeline/issues/41)
94 | 


--------------------------------------------------------------------------------
/scripts/MAC Season 5.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "MAC Season 5 Metadata Upload to bety"
  3 | output: html_document
  4 | ---
  5 | 
  6 | 
  7 | ########### Connect to bety database
  8 | ```{r dbcon-1, message = FALSE}
  9 | library(RPostgreSQL)
 10 | dbcon <- dbConnect(RPostgreSQL::PostgreSQL(),
 11 |                    dbname = "bety", 
 12 |                    password = 'bety', 
 13 |                    host = 'localhost', 
 14 |                    user = 'bety', 
 15 |                    port = 5432)
 16 | ```
 17 | 
 18 | 
 19 | ########### Insert new experiment to experiments table
 20 | ```{sql connection = dbcon}
 21 | insert into experiments (name, start_date, end_date, description, design, user_id) 
 22 | values ('MAC Season 5: Durum Wheat', '2017-12-01', '2018-03-31', '', '', 6000000004)
 23 | ```
 24 | 
 25 | ########### Associate new experiment id with site ids
 26 | ```{sql connection = dbcon}
 27 | insert into experiments_sites (experiment_id, site_id) 
 28 |    select e.experiment_id, s.site_id 
 29 |         from (select id as experiment_id from experiments where name = 'MAC Season 5: Durum Wheat')  as e 
 30 |      cross join 
 31 |          (select id as site_id from sites where sitename like 'MAC Field Scanner Season 5%') as s
 32 | ```
 33 | 
 34 | ########### Check to see if season 5 sitenames have already been uploaded
 35 | ```{r s5_site_query, message = FALSE}
 36 | #get data frame containing all sitenames already present
 37 | sitename_query <- dbGetQuery(dbcon, 'select sitename from sites')
 38 | #convert to vector
 39 | query_sites <- sitename_query$sitename
 40 | 
 41 | #read in the Season 5 Durum Wheet sheet 3 subplots file
 42 | season_5 <- read.csv('/Users/kimberlyh/Desktop/season_metadata_upload/season5_wheat/input_data/Season5_DurumWheat_SubPlots.csv', stringsAsFactors = FALSE)
 43 | #get vector containing season 5 sitenames
 44 | season_5_sites <- season_5$site
 45 | 
 46 | #determine which season 5 sites have already been inputted
 47 | in_bety <- query_sites[which(query_sites %in% season_5_sites)]
 48 | #looks like all sitenames for season 5 have already been inputted
 49 | identical(sort(in_bety), sort(season_5_sites)) #returns TRUE
 50 | 
 51 | ```
 52 | 
 53 | 
 54 | ########### Check to see which season 4 sitenames overlap with season 5 sitenames
 55 | ```{r s4_s5_overlap, message=FALSE}
 56 | ################################################
 57 | #want to check if both season 4 and season 5 have same sitenames
 58 | #get vector containing all season 4 MAC Field Scanner sitenames
 59 | season_4_query <- dbGetQuery(dbcon, "select sitename from sites where sitename like 'MAC Field Scanner Season 4%'")
 60 | season_4_sites <- season_4_query$sitename
 61 | #write to csv
 62 | write.csv(season_4_sites, 'season4_sitenames_bety.csv', row.names=FALSE)
 63 | 
 64 | #replace Season 4 in site names to Season 5 to make comparisons
 65 | season_4_sites_sub <- gsub('Season 4', 'Season 5', season_4_sites)
 66 | 
 67 | ###################################
 68 | ###################################
 69 | #determine which sites are available for season 4 but not 5
 70 | in_both <- which(season_4_sites_sub %in% season_5_sites) #season 4 has all site names in season 5 subplot excel sheet 
 71 | 
 72 | season_4_unique <- gsub('Season 5', 'Season 4', season_4_sites_sub[which(!(season_4_sites_sub %in% season_5_sites))]) #sitenames unique to season 4
 73 | write.csv(season_4_unique, 'season4_unique.csv', row.names=FALSE) #this file will contain sitenames present for season 4 but not in season 5 subplot excel sheet
 74 | ###################################
 75 | ###################################
 76 | 
 77 | ###################################
 78 | ###################################
 79 | #determine which sites have been uploaded for season 5 but are not present in the excel sheet
 80 | season_5_query <- dbGetQuery(dbcon, "select sitename from sites where sitename like 'MAC Field Scanner Season 5%'")
 81 | #get vector
 82 | s5_bety_sites <- season_5_query$sitename
 83 | 
 84 | #write to csv
 85 | write.csv(s5_bety_sites, 'season5_bety.csv', row.names=FALSE) #this csv will contain all season 5 sitenames already in bety
 86 | 
 87 | #compare with season 5 sitenames in subplot excel sheet
 88 | s5_bety_only <- s5_bety_sites[which(!(s5_bety_sites %in% season_5_sites))] #sitenames for season 5 not present in subplots excel sheet #looks like these are the sitenames without the W and E appended
 89 | 
 90 | #write to csv
 91 | write.csv(s5_bety_only, 'season5_betyonly.csv', row.names=FALSE) # this csv will contain all season 5 sitenames in bety, but not included in the subplots excel sheet
 92 | ```
 93 | 
 94 | 
 95 | ########### Check to see which season 5 cultivars to upload into the cultivars table
 96 | ```{r cultivars_query}
 97 | s5_DurumWheat <- read.csv("/Users/kimberlyh/Desktop/season_metadata_upload/season5_wheat/input_data/Season5_DurumWheat_SubPlots.csv", stringsAsFactors = FALSE)
 98 | 
 99 | s5_unq_cultivars <- unique(s5_DurumWheat$cultivar) #unique values of cultivars for season 5 durum wheat
100 | 
101 | bety_cultivars <- dbGetQuery(dbcon, 'select name from cultivars')$name #cultivars already inputted in bety
102 | 
103 | #see which of season 5 durum wheat cultivars have already been uploaded into database
104 | s5_bety_cultivars <- bety_cultivars[which(bety_cultivars %in% s5_unq_cultivars)] #looks like season 5 durum wheat cultivars have already been uploaded
105 | ```
106 | 
107 | 


--------------------------------------------------------------------------------
/scripts/maricopa_field_metadata.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 |   title: "MAC Field Metadata"
  3 | author: "David LeBauer"
  4 | date: "June 17, 2016"
  5 | output: html_document
  6 | ---
  7 |   
  8 |   ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE)
 10 | ```
 11 | 
 12 | # Sites
 13 | 
 14 | ## Site names
 15 | 
 16 | ```{r}
 17 | library(googlesheets)
 18 | library(dplyr)
 19 | (my_sheets <- gs_ls())
 20 | metadata_gs <- googlesheets::gs_url("https://docs.google.com/spreadsheets/d/1s-kQc3K0h4-hfwMzKOU_zpRXN5loByIpKW8Rvw76mh0")
 21 | 
 22 | sites <- gs_read(metadata_gs, 'sites') %>% 
 23 |   select(sitename)
 24 | 
 25 | sitenames <- with(sites, c(sitename, paste0(sitename, " E"), paste0(sitename, " W")))
 26 | 
 27 | insert <-  paste0("insert into sites  (sitename, created_at, updated_at) values('", sitenames, "', now(), now());")
 28 | 
 29 | writeLines(insert, 'sites.sql')
 30 | ```
 31 | 
 32 | ```sh
 33 | psql -d bety < sites.sql
 34 | ```
 35 | 
 36 | Link sites to citations
 37 | 
 38 | ```sh
 39 | insert into citations_sites (citation_id, site_id, created_at, updated_at) select 6000000001, id, now(), now() from sites where sitename like 'MAC%';
 40 | 
 41 | insert into citations_sites (citation_id, site_id, created_at, updated_at) select 6000000002, id, now(), now() from sites where sitename like 'MAC%';
 42 | ```
 43 | 
 44 | ## Elevation
 45 | 
 46 | ```{r cars}
 47 | library(data.table)
 48 | 
 49 | elevation <- fread("data/MAC-Gantry_foundation-lines_elevation-survey_9-15-2015.csv",
 50 |                    skip = 4, header = FALSE, drop = c(1:6, 9:10, 12:14), 
 51 |                    col.names = c("latitude", "longitude", "elevation"))
 52 | elevation <- elevation[!is.na(latitude) | !is.na(longitude)]
 53 | library(ggplot2)
 54 | ggplot(data = elevation) +
 55 |   geom_point(aes(longitude, latitude, color = elevation))
 56 | 
 57 | boundingbox <- rbind(elevation[longitude > -111.975][latitude == max(latitude)],
 58 |                      elevation[longitude > -111.975][latitude == min(latitude)],
 59 |                      elevation[longitude < -111.975][latitude == min(latitude)],
 60 |                      elevation[longitude < -111.975][latitude == max(latitude)],
 61 |                      elevation[longitude > -111.975][latitude == max(latitude)])
 62 | 
 63 | write.csv(boundingbox, "macboundingbox.csv", row.names = FALSE)
 64 | 
 65 | ```
 66 | 
 67 | ## Plot Layout
 68 | 
 69 | 
 70 | ```{r pressure, echo=FALSE}
 71 | # plot_boxes <- function(x_origin = 0, y_origin = 0,
 72 | #                        length = 3.5,
 73 | #                        alley = 0.5,
 74 | #                        row_spacing = 30, 
 75 | #                        ranges = 54, 
 76 | #                        columns = 16){
 77 | 
 78 | require(udunits2)
 79 | library(dplyr)
 80 | library(tidyr)
 81 | library(ggplot2)
 82 | 
 83 | x_origin = 0 
 84 | y_origin = 0
 85 | length = 3.5
 86 | alley = 0.5
 87 | row_spacing = 30
 88 | ranges = 54
 89 | columns = 16
 90 | 
 91 | row_spacing_cm <- ud.convert(row_spacing, 'in', 'm')
 92 | 
 93 | dx <- 2 * row_spacing_cm
 94 | dy <- length
 95 | 
 96 | x <- x_origin + (1:columns - 1) * dx
 97 | y <- y_origin + (1:ranges - 1) * (length + alley) 
 98 | 
 99 | grid <- data.frame(expand.grid(x,y)) %>% 
100 |   transmute(plot = 1:(ranges*columns), x1 = Var1, y1 = Var2, x2 = x1 + dx, y2 = y1 + dy) #%>% 
101 |   #tidyr::gather(key = 'lat', )
102 | 
103 | 
104 | theme_set(theme_bw())
105 | ggplot(data = grid) + 
106 |   geom_text(aes(x1+dx/2, y1+dy/2, label = plot), size = 2) +
107 |   geom_point(aes(x1,y1), size = 1, alpha = 0.4) + 
108 |   geom_point(aes(x2,y2), col = 'red', size = 1, alpha = 0.4) +
109 |   ggtitle('gantry plot layout') + xlab('x') + ylab('y')
110 | 
111 | 
112 | grid2 <- grid %>% gather(key, value = x, starts_with("x")) %>% gather(key2, value = y, starts_with("y"))   %>% select(-starts_with("key"))  %>% arrange(plot) %>% setDT
113 | 
114 | library(spatstat)
115 | library(wellknown)
116 | plots <- grid2[,wellknown::polygon(as.data.frame(spatstat::bounding.box.xy(x, y))), by=plot] 
117 | write.csv(grid, file = '20160421_grid.csv')
118 | write.csv(plots, file = '20160421_plots.csv')
119 | 
120 | ```
121 | 
122 | Here is an example of how to add lat1 and lon1 for X1 and Y1, add lat2 and lon2 for X2 and Y2 to grid data frame using the projection I provided before.
123 | 
124 | 
125 | ```r
126 | require(proj4)
127 | options(digits = 12)
128 | x1y1=cbind(grid$x1,grid$y1)
129 | x2y2=cbind(grid$x2,grid$y2)
130 | proj= '+proj=tmerc +lat_0=0 +lon_0=-111.843479 +k=1 +x_0=12285.092664 +y_0=-3661028.344732 +datum=WGS84 +units=m +no_defs '
131 | latlon1=project(x1y1, proj, inverse = T, degrees = TRUE, silent = FALSE, ellps.default="sphere")
132 | latlon2=project(x2y2, proj, inverse = T, degrees = TRUE, silent = FALSE, ellps.default="sphere")
133 | grid$lon1=latlon1[,1]
134 | grid$lat1=latlon1[,2]
135 | grid$lon2=latlon2[,1]
136 | grid$lat2=latlon2[,2]
137 | ```
138 | 
139 | 
140 | ## Cultivars
141 | 
142 | ```{r cultivars}
143 | bety <- src_postgres(dbname = 'bety', host = 'localhost', user = 'bety', password = 'bety')
144 | 
145 | cultivars <- tbl(bety, 'cultivars')
146 | 
147 | specie_id <- tbl(bety, 'species') %>% filter(scientificname == 'Sorghum bicolor') %>% select(id)
148 | 
149 | library(googlesheets)
150 | accessions_gs <- googlesheets::gs_url("https://docs.google.com/spreadsheets/d/1Nfabx_n1rNlO6NW3olD8MAibJ3KHnOMmMwOYYw4wwGc")
151 | accessions <- googlesheets::gs_read(accessions_gs, 'Accessions')
152 | 
153 | #accessions <- read.csv("~/Downloads/accessions.csv")
154 | 
155 | sorghum_cultivars <- accessions %>% 
156 |   transmute(specie_id = 2588, name = Source)
157 | 
158 | insert <- sorghum_cultivars %>% 
159 |   mutate(insert = paste('insert into cultivars values (specie_id, name, created_at, updated_at) values(', 
160 |                         specie_id, ",", name, ", now(), now());")) %>% 
161 |   select(insert)
162 | 
163 | writeLines(insert$insert, 'sorghum_cultivars.sql')
164 | ```
165 | 
166 | then insert cultivars (on bety6 / terraref.ncsa.illinois.edu/bety) using
167 | 
168 | ```sh
169 | psql -d bety < sorghum_cultivars.sql
170 | ```
171 | 
172 | ## Covariates
173 | 
174 | For any traits that we upload, an entry in `trait_covariate_associations` is required. By default, they can all have stand age
175 | 
176 | 
177 | ```{r}
178 | insert_tca <- paste0("insert into trait_covariate_associations (trait_variable_id, covariate_variable_id, required) values (", 6000000001:6000000008, ", 343, 'f');")
179 | writeLines(insert_tca, con = 'trait_covariate_associations.sql')
180 | ```
181 | 


--------------------------------------------------------------------------------
/scripts/Season_Upload_Functions.R:
--------------------------------------------------------------------------------
  1 | #############################################################################
  2 | ####################### HOW TO ADD A NEW SEASON #############################
  3 | #############################################################################
  4 | 
  5 | ##################################################
  6 | ########### uploading plot definitions ###########
  7 | ##################################################
  8 | sitesUpload <- function(file_name, file_dir, sitename_pattern, season_num){
  9 |   # query sites table for records with sitename that match 'sitename_pattern'
 10 |   library(RPostgreSQL)
 11 |   dbcon <- dbConnect(RPostgreSQL::PostgreSQL(),
 12 |                      dbname = "bety", 
 13 |                      password = 'bety', 
 14 |                      host = 'localhost', 
 15 |                      user = 'bety', 
 16 |                      port = 5432)
 17 |   sites_query <- paste0("select * from sites where sitename like '", sitename_pattern, "';")
 18 |   sites_table <- dbGetQuery(dbcon, sites_query)
 19 |   bety_sitenames <- sites_table$sitename # sites already uploaded in the bety database that match user search pattern
 20 |   
 21 |   #read in subplot data
 22 |   file_path <- paste0(gsub('/$', '', file_dir), '/', file_name)
 23 |   sites_data <- read.csv(file_path, stringsAsFactors = FALSE)
 24 |   subplots <- gsub("  ", " ", sites_data$site)
 25 |   subplots_to_upload <- subplots[which(!(subplots %in% bety_sitenames))] # sitenames that have not been uploaded
 26 |   if(length(subplots_to_upload) != 0){ 
 27 |     # upload new records to the sites table for new season
 28 |     # use plot definitions from previous season
 29 |     prev_season_num <- as.numseason_num - 1
 30 |     insert_state <- paste0("with season", season_num, " as ( select city, state, replace(sitename, 'Season ", prev_season_num, "', 'Season ",
 31 |                            season_num, "') as sitename, greenhouse, geometry, time_zone from sites where sitename like '%Season ", prev_season_num,
 32 |                            "%') insert into sites (city, state, sitename, greenhouse, geometry, time_zone) select * from season", season_num, ";")
 33 |     sites_table_upload <- dbSendStatement(dbcon, insert_state)
 34 |     dbGetRowsAffected(sites_table_upload)
 35 |     output_message <- paste0(dbGetRowsAffected(sites_table_upload), " rows inserted.")
 36 |     message(output_message)
 37 |   }else{ # no new upload necessary # all sites already in database
 38 |     output_message <- paste0("\n0 rows inserted. All season ", season_num, " sites already in database.")
 39 |     message(output_message)
 40 |   }
 41 | }
 42 | 
 43 | # example
 44 | sitesUpload(file_name = 'S7_SorghumAug_subplots.csv', file_dir = '~/Downloads', sitename_pattern = 'MAC Field Scanner Season 7%', season_num = 7)
 45 | 
 46 | ##########################################################
 47 | ########### associating experiments with sites ###########
 48 | ##########################################################
 49 | expSitesUpload <- function(sitename_pattern, experiment_name){
 50 |   exp_site_query <- paste0("select * from experiments_sites where site_id in (select id from sites where sitename like '", sitename_pattern, "') ",
 51 |                            "and experiment_id = (select id from experiments where name = '", experiment_name, "');")
 52 |   exp_site_table <- dbGetQuery(dbcon, exp_site_query)
 53 |   if(nrow(exp_site_table) == 0){ # no experiments and sites have been associated
 54 |     insert_statement <- paste0("insert into experiments_sites (experiment_id, site_id) select e.experiment_id, s.site_id from (select id as experiment_id from   experiments where name = '", experiment_name, "') as e cross join (select id as site_id from sites where sitename like '", sitename_pattern, "') as s")
 55 |     exp_sites_upload <- dbSendStatement(dbcon, insert_statement)
 56 |     output_message <- paste0("No sites associated with experiment yet. ", dbGetRowsAffected(exp_sites_upload), " rows inserted.")
 57 |     message(output_message)
 58 |   }else{ # not all experiments and sites have been associated
 59 |     sites_query <- paste0("select id from sites where sitename like '", sitename_pattern, "'")
 60 |     site_ids_table <- dbGetQuery(dbcon, sites_query)
 61 |     site_ids <- site_ids_table$id # all site ids for sitenames that match the sitename_pattern
 62 |     exp_site_ids <- exp_site_table$site_id # all site ids associated with experiment so far
 63 |     # see which site ids have not yet been associated
 64 |     site_ids_to_upload <- site_ids[which(!(site_ids %in% exp_site_ids))]
 65 |     # associate these site ids with experiment
 66 |     insert_statement <- paste0("insert into experiments_sites (experiment_id, site_id) select e.experiment_id, s.site_id from (select id as experiment_id from experiments where name = '", experiment_name, "') as e cross join (select id as site_id from sites where id in (", paste(site_ids_to_upload[11:20], collapse = ', '), ")) as s")
 67 |     exp_sites_insert <- dbSendStatement(dbcon, insert_statement)
 68 |     output_message <- paste0(dbGetRowsAffected(exp_sites_insert), " rows inserted.")
 69 |     message(output_message)
 70 |   }
 71 | }
 72 | 
 73 | # example
 74 | expSitesUpload(sitename_pattern = 'MAC Field Scanner Season 7%', experiment_name = 'MAC Season 7: Sorghum Aug')
 75 | 
 76 | ##############################################
 77 | ########### add cultivars records  ###########
 78 | ##############################################
 79 | # question : is there generally only one species for a season?
 80 | cultivarsUpload <- function(file_name, file_dir){
 81 |   file_path <- paste0(gsub('/$', '', file_dir), '/', file_name) # read in season data
 82 |   season_data <- read.csv(file_path, stringsAsFactors = FALSE)
 83 |   unq_season_cultivar <- unique(season_data$cultivar) # unique cultivars for season
 84 |   
 85 |   # query cultivars table and get cultivars already uploaded
 86 |   library(RPostgreSQL)
 87 |   dbcon <- dbConnect(RPostgreSQL::PostgreSQL(),
 88 |                      dbname = "bety", 
 89 |                      password = 'bety', 
 90 |                      host = 'localhost', 
 91 |                      user = 'bety', 
 92 |                      port = 5432)
 93 |   cultivars_query <- "select name from cultivars;"
 94 |   cultivars_table <- dbGetQuery(dbcon, cultivars_query)
 95 |   bety_cultivars <- cultivars_table$name # cultivars already uploaded in bety
 96 |   
 97 |   # determine which season cultivars need to be uploaded
 98 |   cultivars_to_upload <- unq_season_cultivar[which(!(unq_season_cultivar %in% bety_cultivars))]
 99 |   
100 |   if(length(cultivars_to_upload) != 0){
101 |     cultivars_to_upload <- gsub("^ ", "", cultivars_to_upload) # remove starting space if present
102 |     
103 |     # need specie id for insert statement # get id from bety
104 |     season_spp <- unique(season_data$species) # get unique species for season # need specie id for insert statement
105 |     season_spp_gen <- strsplit(season_spp, ' ')[[1]][1]
106 |     id_query <- paste0("select id from species where genus = '", season_spp_gen, "';" )  
107 |     id_table <- dbGetQuery(dbcon, id_query)
108 |     season_spp_id <- id_table$id
109 |     
110 |     #create sql insert statements
111 |     insert_values <- vector('character', length = length(cultivars_to_upload))
112 |     for(i in 1:length(cultivars_to_upload)){
113 |       cultivar <- cultivars_to_upload[i]
114 |       insert_val <- paste0("(", season_spp_id, ", '", cultivar, "')")
115 |       insert_values[i] <- insert_val
116 |     }
117 |     
118 |     sql_insert_full <- paste0("insert into cultivars (specie_id, name) values ", paste(insert_values, collapse = ", "), ";")
119 |     cultivar_insert <- dbSendStatement(dbcon, sql_insert_full) #insert new rows into bety
120 |     output_message <- paste0(dbGetRowsAffected(cultivar_insert), " rows inserted into cultivars table.")
121 |     message(output_message)
122 |   }else{ # no new uploads necessary # all cultivars already present
123 |     output_message <- '0 rows inserted. All cultivars already uploaded'
124 |     message(output_message)
125 |   }
126 | }
127 | 
128 | # example
129 | cultivarsUpload(file_name = 'S7_SorghumAug_subplots.csv', file_dir = '~/Downloads')
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/meeting-notes/2015-10-28_all_standards.md:
--------------------------------------------------------------------------------
  1 | > <span id="Reference_Standards_Committee_Virtual_Me" class="anchor"></span>Reference Standards Committee Virtual Meeting Notes
  2 | >
  3 | > October 28, 2015 2pm EST
  4 | >
  5 | > In attendance: David Lee (ARPA-E), David LeBauer (UIUC), Alex Thomasson (TAMU), Barnabas Pocnos (CMU), Christer Jansson (PNNL), Dan Northrup (ARPA-E), Ed Delp (Purdue), Elodie Gazave (Cornell), Justin Manzo (ARPA-E), Larry Biehl (Purdue), Matt Colgan (BRT), Melba Crawford (Purdue), Mike Gore (Cornell), Elodie Gazave (Cornell)
  6 | 
  7 | 1.  Review location of the following information:
  8 | 
  9 |     1.  [*TERRAref documentation website*](https://dlebauer.gitbooks.io/terraref-documentation/content/data_standards_committee.html)
 10 | 
 11 |         -   Please free to make or suggest edits
 12 | 
 13 |     2.  [*Github website*](https://github.com/terraref/reference-data)
 14 | 
 15 |     3.  [*Danforth plantcv site*](http://plantcv.danforthcenter.org/)
 16 | 
 17 |     4.  [*Sample data available in Box*](https://uofi.app.box.com/terraref-sample-data)
 18 | 
 19 |         -   Do you have any other ideas for storage
 20 | 
 21 | Q. Christer – molecular phenotyping, including transcriptomics/proteomics/metabolomics – should this be added to the ontology, reference data. Spatiotemporal, qualitative and quantitative. Very large dataset.
 22 | 
 23 | A. This is not in the scope of the reference data that we are providing, but uses are welcome to develop a proposal. Talk to Mike Gore, Christer and David Lee about developing a proposal for molecular phenotyping. This data type is quite specialized. This could be built in later, but we don’t want to overlap with kbase and iplant. TERRAref is currently handling genetic and phenotypic data and trying to link these – there is a possibility for molecular data to be linked using these resources.
 24 | 
 25 | Q. Justin – Can’t sync content from box
 26 | 
 27 | A. Look into changing permission
 28 | 
 29 | Cornell can provide a template for genomics data.
 30 | 
 31 | 1.  Member information
 32 | 
 33 |     1.  Identify each participant's expertise and interests with respect to the committee (see list of expertise and interests, below)
 34 | 
 35 |         -   We can provide a file for edits
 36 | 
 37 |         -   We need to identify one person from each funded project to be the point person and attend annual meetings. Anyone can join call, be on the email list, and provide feedback.
 38 | 
 39 |         -   We will be supporting robotics, but TERRAref is not necessarily supporting it
 40 | 
 41 |     2.  Any additional external member suggestions?
 42 | 
 43 |         -   We have USDA, NASA, Neon
 44 | 
 45 |         -   External person to represent genomics data
 46 | 
 47 |             -   JGI John Vogel (via Christer Jansson), though is part of a TERRA team
 48 | 
 49 |             -   Others? (via Dan Northrup and David Lee)
 50 | 
 51 |         -   External person to represent robotics
 52 | 
 53 | 2.  Review specifications for Lemnatec Field system
 54 | 
 55 |     1.  identify additional data / meta-data that will be required [*https://github.com/terraref/reference-data/issues/2*](https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_terraref_reference-2Ddata_issues_2&amp;d=BQMFaQ&amp;c=8hUWFZcy2Z-Za5rBPlktOQ&amp;r=Gy0uSqCgH64pIwd523tNNELrWIy2rVBfYvwBsLceSP0&amp;m=MaAgfvLTXIWTX_NKKuX8rUCL78m83nkc_LwVrotas0U&amp;s=wlg0a7El02RV5vRE8qyQsXWqMjQDE0CQQKix-KYpjpc&amp;e)
 56 | 
 57 |         -   Showed location of data in box and how the sensor data is organized. Each sensor has raw and meta data files.
 58 | 
 59 |         -   What is important to keep in each of the meta data files besides what is already embedded in the database? This includes information about the sensors.
 60 | 
 61 |         -   Q. Melba unsure if the data in the folders is the actual output of the sensors. Are these Headwall output with correcting sensors or straight sensors? We need to know if it’s already been corrected to that output format.
 62 | 
 63 |             -   David Lebauer will talk with Melba about this and determine how to best follow up.
 64 | 
 65 |         -   TERRAref will not be using Lemnatec’s proprietary software because it is not created for all of the sensors that we are using and because we want to be in control of the algorithms
 66 | 
 67 |         -   TERRAref will process sensor data into datacubes
 68 | 
 69 | 3.  Discuss proposed semantics and formats
 70 | 
 71 |     1.  Meteorological variables [*https://github.com/terraref/reference-data/issues/3*](https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_terraref_reference-2Ddata_issues_3&amp;d=BQMFaQ&amp;c=8hUWFZcy2Z-Za5rBPlktOQ&amp;r=Gy0uSqCgH64pIwd523tNNELrWIy2rVBfYvwBsLceSP0&amp;m=MaAgfvLTXIWTX_NKKuX8rUCL78m83nkc_LwVrotas0U&amp;s=usuf7lhfnf1R4FB3AYpyWaRNSNswmcGi5yXewI1rCgk&amp;e)
 72 | 
 73 |         -   Ed Delp will look at this in more detail
 74 | 
 75 |     2.  Imaging and hyperspectral data [*https://github.com/terraref/reference-data/issues/14*](https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_terraref_reference-2Ddata_issues_14&amp;d=BQMFaQ&amp;c=8hUWFZcy2Z-Za5rBPlktOQ&amp;r=Gy0uSqCgH64pIwd523tNNELrWIy2rVBfYvwBsLceSP0&amp;m=MaAgfvLTXIWTX_NKKuX8rUCL78m83nkc_LwVrotas0U&amp;s=emB6y89ydGTN34Tb-RXIMQYbAwMrA7StpXI_dwXQcx0&amp;e)
 76 | 
 77 |         -   Feel free to provide feedback on github
 78 | 
 79 |         -   Matt Colgan to talk to David Lebauer offline about his
 80 | 
 81 |     3.  Plant traits [*https://github.com/terraref/reference-data/issues/18*](https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_terraref_reference-2Ddata_issues_18&amp;d=BQMFaQ&amp;c=8hUWFZcy2Z-Za5rBPlktOQ&amp;r=Gy0uSqCgH64pIwd523tNNELrWIy2rVBfYvwBsLceSP0&amp;m=MaAgfvLTXIWTX_NKKuX8rUCL78m83nkc_LwVrotas0U&amp;s=orlCluEw2NObbz94phzE6tyW7omHjrM30RsSFs2h098&amp;e)
 82 | 
 83 |         -   ICASA provides many traits, but not all that we will need. David Lebuaer suggest to create a table of cross-referring from different databases.
 84 | 
 85 |         -   What resource should molecular phenotype trait standard names be derived from?
 86 | 
 87 | 4.  Other business?
 88 | 
 89 | -   There will be an in person meeting in Pittsburgh at the Kickoff for data standards at the end of the first day (5 pm).
 90 | 
 91 |     -   David will give an overview of the data management plan as part of the Cat 5 session earlier in the day, so there may be general interest in this topic.
 92 | 
 93 |     -   Should the meeting be open to everyone or just the leads? Space for 25-30
 94 | 
 95 |     -   David Lee and David LeBauer to work with Rachel Shekar to develop an agenda
 96 | 
 97 | -   TERRAref needs sample analyses tom better create data products and develop pipelines.
 98 | 
 99 | -   Would like to better understand how the data will be used. Please provide feedback on specific applications that the different data types will be used, and in what formats will be most useful. Also discuss the scope of data that will be used. For example, will people just need to look in depth at a few plots, or want to regularly analyze the whole field.
100 | 
101 | > Committee expertise and interests
102 | 
103 | |                    | **genomics/ genetics**   | **sensor data**   | **image analysis**   | **robotics**   | **physiology**   | **modelling**   |
104 | |--------------------|--------------------------|-------------------|----------------------|----------------|------------------|-----------------|
105 | | Paul Bartlett      |                          | x                 |                      | x              |                  |                 |
106 | | Matt Colgan        |                          | x                 |                      |                | x                |                 |
107 | | Melba Crawford     |                          | x                 | x                    |                |                  |                 |
108 | | Michael Gore       | x                        |                   |                      |                | x                | x               |
109 | | Christer Janssen   | x                        |                   |                      |                | x                |                 |
110 | | Christine Laney    |                          | x                 |                      |                |                  |                 |
111 | | Shelly Petroy      |                          | x                 |                      |                |                  |                 |
112 | | Barnabas Poczos    |                          | x                 | x                    |                |                  |                 |
113 | | Cheryl Porter      |                          |                   |                      |                |                  | x               |
114 | | Shawn Serbin       |                          | x                 | x                    |                | x                | x               |
115 | | Alex Thomasson     |                          | x                 | x                    |                | x                |                 |
116 | | Jeff White         | x                        |                   |                      |                | x                |                 |
117 | | David Lee          | X                        |                   |                      |                | x                |                 |
118 | | Ed Delp            |                          | X                 | X                    |                |                  |                 |
119 | 


--------------------------------------------------------------------------------