├── h5_data └── .gitignore ├── raw_data └── download_igra.sh ├── CHANGELOG.md ├── LICENSE-DATA ├── +GCSAL ├── +H5 │ ├── recursive_load.m │ ├── create_and_write.m │ ├── fullpath.m │ └── load.m ├── +Map │ ├── world_map.m │ ├── find_nearest.m │ ├── multipatch.m │ ├── inpolygon2.m │ ├── map_stations_by_country.m │ └── find_in_lat_long_range.m ├── +IGRA │ ├── mat2h5.m │ ├── datafile2mat_dir.m │ ├── mat2h5_dir.m │ ├── datafile2mat.m │ ├── format_definitions.m │ └── Param.m └── GCSAL.m ├── LICENSE ├── CONTRIBUTING ├── IGRA_to_h5_example.m ├── CODE_OF_CONDUCT.md ├── GCSAL_ex2.m ├── README.md └── GCSAL_ex1.m /h5_data/.gitignore: -------------------------------------------------------------------------------- 1 | gcsal.h5 2 | gcsal.h5.info.mat 3 | -------------------------------------------------------------------------------- /raw_data/download_igra.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | # 5 | # This source code is licensed under the MIT license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | # Use wget to download all data 9 | wget -m ftp://ftp.ncdc.noaa.gov/pub/data/igra/data/data-por/ 10 | 11 | # cd to download directory 12 | cd ftp.ncdc.noaa.gov/pub/data/igra/data/data-por || exit 13 | 14 | # unzip all 15 | find ./ -name \*.zip -exec unzip -n {} \; 16 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | May 25, 2019 2 | - Updated license from BSD to MIT 3 | 4 | Oct 19, 2018 5 | - Added CODE_OF_CONDUCT.md and CHANGELOG.md files 6 | - Updated copyright information to all source code files 7 | 8 | Dec 5, 2017 9 | - Added functionality to search by latitude range and for nearest N stations. 10 | - Made searches faster with optional new modes that suppress plotting, waitbars, and text output. 11 | - Revised example file and added new one. 12 | - All changes are backward compatible. 13 | 14 | Oct 27, 2017 15 | - Initial release 16 | -------------------------------------------------------------------------------- /LICENSE-DATA: -------------------------------------------------------------------------------- 1 | For NOAA Integrated Global Radiosonde Archive (IGRA) data 2 | 3 | World Meteorological Organization (WMO) Resolution 40 NOAA Policy 4 | NCEI data and products that contain international data may have conditions 5 | placed on their international commercial use. They can be used within the United 6 | States or for noncommercial international activities without restriction. 7 | Redistribution of these data by others must provide this same notification. 8 | The non-U.S. data cannot be redistributed for commercial purposes. For details, 9 | please consult the WMO policy. 10 | -------------------------------------------------------------------------------- /+GCSAL/+H5/recursive_load.m: -------------------------------------------------------------------------------- 1 | function [ ] = recursive_load( h5_file, info ) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % Recursively load all data in h5_file that is a child of the Groups in 8 | % info. This is for test purpose only 9 | 10 | 11 | if isempty(info.Datasets) 12 | for i = 1:length(info.Groups) 13 | GCSAL.H5.recursive_load( h5_file, info.Groups(i)); 14 | end 15 | else 16 | GCSAL.H5.load(h5_file, info); 17 | end 18 | 19 | 20 | end 21 | -------------------------------------------------------------------------------- /+GCSAL/+H5/create_and_write.m: -------------------------------------------------------------------------------- 1 | function create_and_write(filename, datasetname, data) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [] = create_and_write(filename, datasetname, data) 8 | % Uses h5create and h5write to write data to datasetname in filename 9 | % If data is empty, does nothing 10 | 11 | 12 | if ~isempty(data) 13 | h5create(filename, datasetname, size(data), 'Datatype', class(data)) 14 | h5write( filename, datasetname, data) 15 | end 16 | 17 | end 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /+GCSAL/+Map/world_map.m: -------------------------------------------------------------------------------- 1 | function world_map(countries) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % world_map(countries) 8 | % Creates a cartesion map of the world by creating patches for each 9 | % country in countries. countries should be a struct array with each 10 | % element containing the fields Lat and Lon for the latitude/longitude of 11 | % the country borders in degrees. 12 | 13 | 14 | % Background patch for ocean in light blue 15 | patch([-180 -180 180 180], [-90 90 90 -90], [0 1 1]) 16 | 17 | % Loop through all countries and make a yellow patch with a grey border 18 | % multipatch handles the fact that the country borders may have NaN for 19 | % separating non-continuous borders 20 | for i = 1:length(countries) 21 | GCSAL.Map.multipatch(countries(i).Lon, countries(i).Lat, [1 1 0], ... 22 | 'EdgeColor', [.7 .7 .7]); 23 | end 24 | 25 | % axis equal so map does not distort 26 | axis equal 27 | 28 | % Limit axes by longitude and latitude min/max values 29 | axis([-180 180 -90 90]) 30 | 31 | end 32 | -------------------------------------------------------------------------------- /+GCSAL/+IGRA/mat2h5.m: -------------------------------------------------------------------------------- 1 | function [ ] = mat2h5( mat_filename, h5_filename, station_id ) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % Load mat_filename and write the contents in h5 format to h5_filename with 8 | % station_id as h5 path root 9 | 10 | 11 | % load mat file 12 | mat = load(mat_filename); 13 | 14 | % Loop through the fields 15 | flds = fieldnames(mat); 16 | for i = 1:length(flds) 17 | 18 | % Construct h5 path 19 | h5path = GCSAL.H5.fullpath(station_id, flds{i}); 20 | 21 | % h5write all fields 22 | h5write_all_params(mat.(flds{i}), h5_filename, h5path) 23 | end 24 | 25 | end 26 | 27 | function h5write_all_params(data, h5filename, datasetname) 28 | % Loop through all parameters in data and call h5write with h5filename and 29 | % datasetname. data should be a struct of GCSAL.IGRA.Param objects 30 | 31 | % Loop through fields 32 | flds = fieldnames(data); 33 | for i = 1:length(flds) 34 | % Extract Param object from data struct 35 | curr_param = data.(flds{i}); 36 | 37 | % Call h5write method of Param object 38 | curr_param.h5write(h5filename, datasetname); 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /+GCSAL/+Map/find_nearest.m: -------------------------------------------------------------------------------- 1 | function [stations, arclen] = find_nearest(all_stations, lat, lon, n) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [stations, arclen] = find_nearest(all_stations, lat, lon, n) 8 | % 9 | % Returns an array of station structs of the nearest n stations 10 | % relative to the specified lat/lon 11 | % 12 | % lat and lon must be single values each in degrees. 13 | % 14 | % INPUTS 15 | % all_stations - struct array, each elemetn contains lat, long, id 16 | % lat - scalar defining reference latitude in degrees. 17 | % lon - scalar defining reference longitude in degrees. 18 | % n - number of nearest stations 19 | % 20 | % OUTPUTS 21 | % stations - struct array, subset of all_stations located within the 22 | % lat/long search box 23 | % arclen - vector of distances in meters 24 | 25 | 26 | lats = [all_stations(:).lat]; 27 | lons = [all_stations(:).lon]; 28 | E = referenceEllipsoid('wgs84'); 29 | [arclen, ~] = distance(lats, lons, lat, lon, E); 30 | [~,idx] = sort(arclen); 31 | stations = all_stations(idx(1:n)); 32 | arclen = arclen(idx(1:n)); 33 | 34 | end 35 | -------------------------------------------------------------------------------- /+GCSAL/+Map/multipatch.m: -------------------------------------------------------------------------------- 1 | function patch_handles = multipatch( x, y, varargin ) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % patch_handles = multipatch( x, y, varargin ) 8 | % Like built in matlab function patch but allows for NaN values in x and 9 | % y to separate multiple patches. varargin allows for any additional 10 | % inputs to the patch function. 11 | % 12 | % returns patch_handles, a vector of handles returned by each call to 13 | % patch() 14 | 15 | 16 | if ~isequal(size(x), size(y)) 17 | error('x and y must be same size') 18 | end 19 | 20 | % Find nan indices 21 | xnan = isnan(x); 22 | ynan = isnan(y); 23 | 24 | % Find where either x or y is nan 25 | anynan = find(xnan | ynan); 26 | 27 | % anynan will be used for start/stop indices with any index in anynan being 28 | % skipped over. To make the for loop smooth, add indices 0 and length+1 to 29 | % anynan 30 | anynan = [0 anynan length(x)+1]; 31 | 32 | % initialize patch_handles 33 | patch_handles = []; 34 | 35 | % Loop through anynan 36 | for i = 2:length(anynan) 37 | 38 | % Choose idx between previous and next nan value 39 | idx = anynan(i-1)+1:anynan(i)-1; 40 | 41 | % Check that idx is not empty 42 | if ~isempty(idx) 43 | % Create a new patch with values at idx 44 | patch_handles(end+1) = patch(x(idx), y(idx), varargin{:}); %#ok 45 | end 46 | end 47 | 48 | end 49 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | # Contributing to GCSAL 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Our Development Process 6 | How changes are synced with internal changes to the project will be defined in 7 | the future. 8 | 9 | ## Pull Requests 10 | We actively welcome your pull requests. 11 | 12 | 1. Fork the repo and create your branch from `master`. 13 | 2. If you've added code that should be tested, add tests. 14 | 3. If you've changed APIs, update the documentation. 15 | 4. Ensure the test suite passes. 16 | 5. Make sure your code lints. 17 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 18 | 19 | ## Contributor License Agreement ("CLA") 20 | In order to accept your pull request, we need you to submit a CLA. You only need 21 | to do this once to work on any of Facebook's open source projects. 22 | 23 | Complete your CLA here: 24 | 25 | ## Issues 26 | We use GitHub issues to track public bugs. Please ensure your description is 27 | clear and has sufficient instructions to be able to reproduce the issue. 28 | 29 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 30 | disclosure of security bugs. In those cases, please go through the process 31 | outlined on that page and do not file a public issue. 32 | 33 | ## Coding Style 34 | * 2 spaces for indentation rather than tabs 35 | * 80 character line length 36 | 37 | ## License 38 | By contributing to GCSAL, you agree that your contributions will be licensed 39 | under the LICENSE file in the root directory of this source tree. 40 | -------------------------------------------------------------------------------- /+GCSAL/+H5/fullpath.m: -------------------------------------------------------------------------------- 1 | function out = fullpath(filepart1, varargin) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % F = fullpath(filepart1, filepart2, ..., filepartN) builds a full 8 | % path specification F from the folders specified using / for the 9 | % fileseparator regardless of operating system. Input 10 | % arguments FOLDERNAME1, FOLDERNAME2, must be strings. The output 11 | % of fullfile is equivalent to 12 | % 13 | % F = [filepart1 / filepart2 / ... / filepartN] 14 | % 15 | % except that care is taken to handle the cases when the folders begin 16 | % or end with a file separator. 17 | 18 | 19 | % Error check for number of inputs 20 | if length(varargin) < 1 21 | error('fullpath expects at least 2 inputs') 22 | end 23 | 24 | % Extract first input in varargin 25 | filepart2 = varargin{1}; 26 | 27 | % Check that inputs are strings 28 | if ~isa(filepart1, 'char') || ~isa(filepart2, 'char') 29 | error('fullpath expects string inputs') 30 | end 31 | 32 | % Remove / from end of filepart1 if it exists 33 | if strcmp(filepart1(end), '/') 34 | filepart1(end) = []; 35 | end 36 | 37 | % Remove / from start of filepart2 if it exists 38 | if strcmp(filepart2(1), '/') 39 | filepart2(1) = []; 40 | end 41 | 42 | % Concatenate fileparts with / 43 | out = [filepart1 '/' filepart2]; 44 | 45 | % Recurse on remaining inputs 46 | if length(varargin) > 1 47 | out = GCSAL.H5.fullpath(out, varargin{2:end}); 48 | end 49 | 50 | end 51 | -------------------------------------------------------------------------------- /+GCSAL/+Map/inpolygon2.m: -------------------------------------------------------------------------------- 1 | function [in] = inpolygon2(x,y,xv,yv) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % Function to determine if points (x,y) is inside or outside a polygon. 8 | % 9 | % INPUTS 10 | % (xv,yv): polygon is specified by (xv,yv) points 11 | % (x,y): point coordinates are specified as (x,y) pairs. x and y should be vectors of same size. 12 | % OUTPUTS: 13 | % in : is a logical array (0 means point is outside and 1 means point is inside) 14 | % 15 | % Implementation is based on winding algorithm explained in http://geomalgorithms.com/a03-_inclusion.html 16 | % Example 17 | % xv = rand(6,1); yv = rand(6,1); 18 | % xv = [xv ; xv(1)]; yv = [yv ; yv(1)]; 19 | % x = rand(1000,1); y = rand(1000,1); 20 | % in = inpolygon(x,y,xv,yv); 21 | % plot(xv,yv,x(in),y(in),'.r',x(~in),y(~in),'.b') 22 | 23 | 24 | if ((xv(1) ~= xv(end)) || (yv(1) ~= yv(end))) 25 | xv = [xv ; xv(1)]; 26 | yv = [yv ; yv(1)]; 27 | end 28 | 29 | n=length(xv)-1; % number of polygon corners 30 | np=length(x); % number of points to be evaluated 31 | wn=zeros(np,1); % starts with all points outside 32 | 33 | for j=1:np 34 | for i=1:n 35 | if (yv(i) <=y(j)) 36 | if (yv(i+1) > y(j)) 37 | if (is_point_on_left(x(j),y(j),xv(i),yv(i),xv(i+1),yv(i+1))>0) 38 | wn(j)=wn(j)+1; 39 | end 40 | end 41 | else 42 | if (yv(i+1) <= y(j)) 43 | if (is_point_on_left(x(j),y(j),xv(i),yv(i),xv(i+1),yv(i+1))<0) 44 | wn(j)=wn(j)-1; 45 | end 46 | end 47 | end 48 | end 49 | end 50 | 51 | in=logical(wn); % convert to logical 0-1 52 | 53 | 54 | function isleft=is_point_on_left(px,py,p1Lx,p1Ly,p2Lx,p2Ly) 55 | % Determine if point (px,py) is on the left | right | On the line. 56 | % points on the line is specified as (p1Lx,p1Ly) & (p2Lx,p2Ly) 57 | % isleft=1 if p is on the left , isleft= 0 if p is along the line 58 | % isleft=-1 if p is on the right. 59 | 60 | p1_to_p2=[p2Lx-p1Lx;p2Ly-p1Ly]; 61 | p1_to_p=[px-p1Lx;py-p1Ly]; 62 | isleft = det([p1_to_p2 p1_to_p]); 63 | end 64 | 65 | end 66 | -------------------------------------------------------------------------------- /+GCSAL/+IGRA/datafile2mat_dir.m: -------------------------------------------------------------------------------- 1 | function datafile2mat_dir(in_dir, out_dir, overwrite_mat, filespec) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [] = datafile2mat_dir(in_dir, out_dir, use_mat_if_found, filespec) 8 | % Parses IGRA data files that match filespec and saves a .mat file 9 | % containing the data in out_dir. The flag overwrite_mat controls whether 10 | % IGRA files that already have a matching .mat file will be skipped or 11 | % overwritten. 12 | % 13 | % All inputs are optional and will revert to a default value if not provided. 14 | % 15 | % INPUTS 16 | % in_dir - Directory to look for IGRA data files. 17 | % Default: current working directory 18 | % out_dir - Directory to save output .mat files 19 | % Default: current working directory 20 | % overwrite_mat - Flag whether to skip existing mat files or overwrite 21 | % Default: true 22 | % filespec - filespec used to identify IGRA data files 23 | % Default: '*-data.txt' 24 | 25 | 26 | % Set default values 27 | if ~exist('in_dir', 'var') 28 | in_dir = pwd; 29 | end 30 | 31 | if ~exist('out_dir', 'var') 32 | out_dir = pwd; 33 | end 34 | 35 | if ~exist('overwrite_mat', 'var') 36 | overwrite_mat = true; 37 | end 38 | 39 | if ~exist('filespec', 'var') 40 | filespec = '*-data.txt'; 41 | end 42 | 43 | % Find all files ending in "-data.txt" 44 | filespec = fullfile(in_dir, filespec); 45 | fileObj = dir(filespec); 46 | 47 | % % Sort files by size 48 | % [~, indices] = sort([fileObj.bytes], 'ascend'); 49 | % indices = indices([1:300]); 50 | % fileObj = fileObj(indices); 51 | 52 | % Calculate total size of all files in MB 53 | total_MB = sum([fileObj.bytes])/1e6; 54 | N_files = length(fileObj); 55 | 56 | fprintf('Reading %d files totalling %.1f MB in %s\n', N_files, total_MB, in_dir); 57 | 58 | % Initialize counters 59 | read_MB = 0; 60 | time_so_far = 0; 61 | t1 = tic; 62 | 63 | % Iterate through files and read data 64 | for i = 1:N_files 65 | t2 = tic; 66 | curr = fileObj(i); 67 | curr_name = curr.name; 68 | curr_MB = curr.bytes/1e6; 69 | fprintf('%04d: Reading %s, %5.1f MB', i, curr_name, curr_MB); 70 | 71 | GCSAL.IGRA.datafile2mat( fullfile(in_dir, curr_name), overwrite_mat, out_dir ); 72 | 73 | read_MB = read_MB + curr_MB; 74 | curr_time = toc(t2); 75 | time_so_far = time_so_far + curr_time; 76 | prct_complete = read_MB/total_MB; 77 | total_time = time_so_far/prct_complete; 78 | curr_rate = curr_MB/curr_time; 79 | avg_rate = read_MB/time_so_far; 80 | fprintf(', %.0f/%.0f MB, %5.2f%% %.1f curr MB/s, %.1f avg MB/s, %.1f/%.1f seconds\n', ... 81 | read_MB, total_MB, prct_complete*100, curr_rate, avg_rate, time_so_far, total_time); 82 | end 83 | toc(t1) 84 | -------------------------------------------------------------------------------- /+GCSAL/+IGRA/mat2h5_dir.m: -------------------------------------------------------------------------------- 1 | function mat2h5_dir(in_dir, h5_filename, append, filespec) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [] = mat2h5_dir(in_dir, h5_filename, append, filespec) 8 | % Loads data in .mat files that match filespec and writes the data to 9 | % h5_filename. The flag append controls whether data will be appended to 10 | % h5_filename or if a new h5_filename will be created from scratch. 11 | % 12 | % All inputs are optional and will revert to a default value if not provided. 13 | % 14 | % INPUTS 15 | % in_dir - Directory to look for .mat data files. 16 | % Default: current working directory 17 | % h5_filename - File to write data to 18 | % Default: './gcsal.h5' 19 | % append - Flag whether to append to existing h5 file or start new 20 | % Default: true 21 | % filespec - filespec used to identify .mat files 22 | % Default: '*-data.txt.mat' 23 | 24 | 25 | % Set default values 26 | if ~exist('in_dir', 'var') 27 | in_dir = pwd; 28 | end 29 | 30 | if ~exist('h5_filename', 'var') 31 | h5_filename = fullfile(in_dir, 'gcsal.h5'); 32 | end 33 | 34 | if ~exist('append', 'var') 35 | append = true; 36 | end 37 | 38 | if ~exist('filespec', 'var') 39 | filespec = '*-data.txt.mat'; 40 | end 41 | 42 | % Find all files matching filespec 43 | filespec = fullfile(in_dir, filespec); 44 | fileObj = dir(filespec); 45 | 46 | % Sort files by size 47 | [~, indices] = sort([fileObj.bytes], 'ascend'); 48 | fileObj = fileObj(indices); 49 | 50 | % Calculate total size of all files in MB 51 | N_files = length(fileObj); 52 | 53 | % Get h5 file info 54 | names = {}; 55 | if exist(h5_filename, 'file') 56 | if append 57 | info = h5info(h5_filename); 58 | names = {info.Groups.Name}; 59 | else 60 | delete(h5_filename) 61 | end 62 | end 63 | 64 | fprintf('Processing %d files in %s\n', N_files, in_dir); 65 | 66 | % Initialize counters 67 | time_so_far = 0; 68 | t1 = tic; 69 | 70 | % Iterate through files and read data 71 | for i = 1:N_files 72 | t2 = tic; 73 | curr = fileObj(i); 74 | curr_name = curr.name; 75 | fprintf('Reading %s %04d/%04d ', curr_name, i, N_files); 76 | 77 | % Construct station id from .mat filename 78 | station_id = ['/' curr_name(1:end-13)]; 79 | 80 | % Check that station_id is not already in H5 file 81 | if ~ismember(station_id, names) 82 | % Load .mat file and write all params in header and entries to h5 83 | mat_filename = fullfile(in_dir, curr_name); 84 | GCSAL.IGRA.mat2h5( mat_filename, h5_filename, station_id ) 85 | end 86 | 87 | curr_time = toc(t2); 88 | time_so_far = time_so_far + curr_time; 89 | prct_complete = i/N_files; 90 | total_time = time_so_far/prct_complete; 91 | avg_rate = time_so_far/i; 92 | fprintf('%3.0f%% Curr: %.2f sec, Avg: %.2f sec, %4.0f/%.0f sec\n', ... 93 | prct_complete*100, curr_time, avg_rate, time_so_far, total_time); 94 | end 95 | toc(t1) 96 | 97 | end 98 | -------------------------------------------------------------------------------- /IGRA_to_h5_example.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | % 6 | % This example script takes the GCSAL process from downloading the source 7 | % data from NOAA website through to creating a GCSAL Matlab object. 8 | % 9 | % The steps of this example script are only necessary if you want to 10 | % download and re-build the GCSAL library from the original source data. 11 | % Typically this is not necessary as you can use the provided 12 | % .h5 file available on the website. Reasons you might want to do this: 13 | % 1. Want to update the data with the latest measurements 14 | % 2. Want to make a change to the way data is stored in the .h5 file 15 | 16 | 17 | %% Set up paths and constants - Change the paths below as necessary 18 | clear all; close all; clc; 19 | 20 | %%%%%%%%%%%%%% CHANGE THESE %%%%%%%%%%%%%%%%%%%%%%%% 21 | % Base directory. Text files should be in a directory called txt in this 22 | % folder 23 | base_dir = './raw_data/'; 24 | 25 | % Directory to save .h5 file 26 | h5_dir = './h5_data/'; 27 | 28 | % Directory to code. The folder +GCSAL which contains this file should be 29 | % in this directory 30 | codebase_dir = './'; 31 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 32 | 33 | % Directory containing IGRA .txt files 34 | txt_dir = fullfile(base_dir, 'txt'); 35 | 36 | % filespec is used to find the source .txt files. 37 | filespec = '*-data.txt'; % Use this to find process all .txt files 38 | % filespec = 'BC*-data.txt'; % Use this for testing on a small subset files 39 | 40 | % Directory to store .mat files 41 | mat_dir = fullfile(base_dir, 'mat'); 42 | 43 | % Full path to .h5 file 44 | h5_file = fullfile(h5_dir, 'gcsal.h5'); 45 | 46 | % Set up Matlab path 47 | addpath(genpath(codebase_dir)) 48 | 49 | %% Step 1: Download IGRA data from NOAA 50 | 51 | % Run "bash ./raw_data/download_igra.sh" on the command line from this directory 52 | % This will take a while as you are downloading > 70 gb of data 53 | 54 | 55 | %% Step 2: Convert IGRA txt file to .mat 56 | 57 | % Set overwrite_mat true if you want to start from scratch and overwrite any 58 | % existing .mat files that have already been made and exists on your path 59 | overwrite_mat = true; %false; 60 | GCSAL.IGRA.datafile2mat_dir(txt_dir, mat_dir, overwrite_mat, filespec); 61 | 62 | %% Step 3: Convert .mat to .h5 63 | 64 | % Set append_flag false if you want to start from scratch and clear the .h5 65 | % file if it previously been made and exists on your path 66 | append_flag = false; %true; 67 | GCSAL.IGRA.mat2h5_dir(mat_dir, h5_file, append_flag, [filespec '.mat']); 68 | 69 | %% Step 4: Load GCSAL object from h5 file 70 | 71 | % The first time you create the object by pointing to the h5_file. This 72 | % will create a .mat file which can be used after the first time 73 | g = GCSAL.GCSAL(h5_file); 74 | 75 | 76 | %% 77 | % Now you can look in GCSAL.GCSAL_examples.m to learn about how to use the 78 | % GCSAL object 79 | 80 | % Get stations in Brazil 81 | stations = g.station_search(); 82 | 83 | % Histogram for all windspeeds 84 | t = tic; 85 | [N, entries] = g.counts(stations, 'wspd'); 86 | toc(t) 87 | -------------------------------------------------------------------------------- /+GCSAL/+Map/map_stations_by_country.m: -------------------------------------------------------------------------------- 1 | function countries = map_stations_by_country(all_stations, countries_data) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % countries_with_station = map_stations_by_country(all_stations, countries_data) 8 | % For each country in countries_data, finds stations in all_stations that 9 | % are within the borders of the country based on the latitude and 10 | % longitude of the stations and countries. 11 | % 12 | % Returns a struct array countries where each element contains the 13 | % country name and a list of stations ids that were found to be contained 14 | % by that country. 15 | % 16 | % If countries_data is not input, then the function will try to load 17 | % countries_data from ne_110m_admin_0_countries.mat if the file exists on 18 | % the path 19 | % 20 | % INPUTS 21 | % all_stations - struct array, each element should contain lat and long 22 | % of the station in degrees and id for the identifier of 23 | % the station 24 | % countries_data - struct array, each element should contain Lat and Lon of 25 | % the borders of the country in degrees as well as name to 26 | % identify the country 27 | % 28 | % OUTPUTS 29 | % countries - struct array, each element contains name, Lat, Lon, and 30 | % stations. name is a string. Lat and Lon are the borders 31 | % of the country in degrees. stations is a string matrix 32 | % with each row an id for a station contained in that 33 | % country. 34 | 35 | 36 | % If countries_data not given, try to load it form default .mat file 37 | if ~exist('countries_data', 'var') 38 | fname = 'ne_110m_admin_0_countries.mat'; 39 | if exist(fname, 'file') 40 | fprintf('Loading countries data from %s\n', which(fname)) 41 | countries_data = load(fname); 42 | countries_data = countries_data.worldData; 43 | else 44 | fprintf('Could not load country data. %s file not found\n', fname) 45 | countries = struct([]); 46 | return 47 | end 48 | end 49 | 50 | 51 | % Some data sets use name and others use NAME 52 | if isfield(countries_data, 'name') 53 | names = {countries_data.name}; 54 | elseif isfield(countries_data, 'NAME') 55 | names = {countries_data.NAME}; 56 | else 57 | error('countries_data is missing the name field') 58 | end 59 | 60 | % Initialize output with Lat, Lon, and name fields from countries_data 61 | countries = struct('Lat', {countries_data.Lat}, ... 62 | 'Lon', {countries_data.Lon}, ... 63 | 'name', names, ... 64 | 'stations', ''); 65 | 66 | 67 | % Extract vector from all_stations struct array 68 | stations_lat = [all_stations.lat]; 69 | stations_long = [all_stations.lon]; 70 | stations_id = vertcat(all_stations.id); 71 | 72 | % Loop through each country in countries_data 73 | for i = 1:length(countries) 74 | 75 | % Get logical array corresponding to stations found in country 76 | in_idx = GCSAL.Map.inpolygon2(stations_long, stations_lat, ... 77 | countries(i).Lon, countries(i).Lat); 78 | 79 | % Record station ids found in the country 80 | countries(i).stations = stations_id(in_idx,:); 81 | 82 | end 83 | 84 | 85 | end 86 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | 78 | -------------------------------------------------------------------------------- /+GCSAL/+H5/load.m: -------------------------------------------------------------------------------- 1 | function [ out ] = load( filename, info ) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [ out ] = load( filename, info ) 8 | % Load and uncompress data in h5 file filename at dataset find in info. 9 | % info is a specific Group found in the struct return by h5info() 10 | 11 | 12 | % List of expected flds 13 | flds = {'data', 'len', 'idx', 'i_unique'}; 14 | 15 | % Extract data set names, sort for stable comparison below 16 | dsets = {info.Datasets.Name}; 17 | 18 | % Error check for unexpected fields 19 | unexpected_flds = intersect(setxor(dsets, flds), dsets); 20 | if ~isempty(unexpected_flds) 21 | msg = sprintf(' %s\n', unexpected_flds{:}); 22 | error(['Unexpected datasets encountered:\n%s' ... 23 | 'Expected datasets are data, len, and idx.'], msg) %#ok 24 | end 25 | 26 | % Loop through expected flds and read data if available 27 | for i = 1:length(flds) 28 | if ismember(flds{i}, dsets) 29 | compressed_data.(flds{i}) = h5read(filename, GCSAL.H5.fullpath(info.Name, flds{i})); 30 | end 31 | end 32 | 33 | % Uncompress the data based on the datasets that were found 34 | out = uncompress_data(compressed_data); 35 | 36 | end 37 | 38 | 39 | function out = uncompress_data(in) 40 | % Returns uncompressed vector represented in uncompressed data 41 | % struct in. 42 | 43 | % First use i_unique to expand data if it was included 44 | if isfield(in, 'i_unique') 45 | 46 | % Use i_unique to expand data to size of idx 47 | in.data = in.data(in.i_unique,:); 48 | 49 | % Remove i_unique field 50 | in = rmfield(in, 'i_unique'); 51 | end 52 | 53 | % Extract fields from compressed data struct 54 | flds = sort(fieldnames(in)); 55 | 56 | % Select decompression method based on fields found in uncompressed data 57 | % struct 58 | if isequal(flds, {'data'; 'idx'; 'len'}) 59 | 60 | out = uncompress_data_idx_len(in.data, in.idx, in.len); 61 | 62 | elseif isequal(flds, {'data'; 'len'}) 63 | 64 | out = uncompress_data_len(in.data, in.len); 65 | 66 | elseif isequal(flds, {'data'}) 67 | % If only data is given, there is nothing to do 68 | out = in.data; 69 | elseif isequal(flds, {'len'}) 70 | % If only length is given return properly sized NaN vector 71 | out = NaN(in.len, 1); 72 | else 73 | msg = sprintf(' %s\n', flds{:}); 74 | error('Unexpected combination of datasets found:\n%s', msg) 75 | end 76 | 77 | end 78 | 79 | function out = uncompress_data_idx_len(data, idx, len) 80 | % Returns uncompressed vector from data, idx, and len compressed 81 | % representation 82 | 83 | % Extract size of data input 84 | [N_data_rows, N_data_cols] = size(data); 85 | 86 | % Initialize out to properly sized NaN 87 | out = NaN(len, N_data_cols); 88 | 89 | % Uncompress idx 90 | idx = GCSAL.IGRA.Param.uncompress_idx(idx, len); 91 | 92 | % data should be the same size as idx unless there was only a single 93 | % data value, in which case it should replicated to match the length of 94 | % idx 95 | if N_data_rows == 1 96 | data = repmat(data, length(idx),1); 97 | end 98 | 99 | if size(data, 1) ~= length(idx) 100 | error('Numer of rows in data does not match length of idx') 101 | end 102 | 103 | % index data into out 104 | out(idx,:) = data; 105 | end 106 | 107 | function out = uncompress_data_len(data, len) 108 | % Returns uncompressed vector from data, and len compressed 109 | % representation 110 | 111 | if size(data,1) == 1 112 | % Only a single value for data given so replicate to match len 113 | % out = repmat(data, len, 1); 114 | out = data; 115 | else 116 | % Verify that length of data matches len 117 | if size(data,1) ~= len 118 | error('Number of rows in data does not match len') 119 | end 120 | 121 | % Nothing else to do data is already uncompressed 122 | out = data; 123 | end 124 | end 125 | -------------------------------------------------------------------------------- /+GCSAL/+IGRA/datafile2mat.m: -------------------------------------------------------------------------------- 1 | function [ out ] = datafile2mat( filename, overwrite_mat, output_directory ) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [ out ] = datafile2mat( filename, overwrite_mat [opt], output_directory [opt] ) 8 | % Parses IGRA data in filename and saves the data to a .mat file in 9 | % output_directory. Additionally returns the data in out. 10 | % 11 | % If a .mat file with a matching name is found on the path and 12 | % overwrite_mat is set to false, this function returns an empty vector and 13 | % does not modify the existing .mat file or create a new one. 14 | % 15 | % INPUTS 16 | % filename - filename for IGRA data text file to be parsed 17 | % overwrite_mat - flag for whether to overwrite or ignore if a matching 18 | % .mat file is found to already exist 19 | % Default: true 20 | % output_directory - directory to save .mat file 21 | % Default: current working directory 22 | 23 | 24 | % Set default values 25 | if ~exist('overwrite_mat', 'var') 26 | overwrite_mat = true; 27 | end 28 | 29 | if ~exist('output_directory', 'var') 30 | output_directory = pwd; 31 | end 32 | 33 | % Format name of .mat file 34 | [~, file_no_path, ext] = fileparts(filename); 35 | mat_filename = fullfile(output_directory, [file_no_path ext '.mat']); 36 | 37 | % If use_mat_if_found is true, look for mat file and load from there if 38 | % found 39 | if exist(mat_filename, 'file') && ~overwrite_mat 40 | % out = load(mat_filename); 41 | out = []; 42 | return 43 | end 44 | 45 | % Get formatting definitions 46 | defs = GCSAL.IGRA.format_definitions( ); 47 | 48 | % open the file 49 | [fid, msg] = fopen(filename); 50 | 51 | if fid == (-1) 52 | error(message('MATLAB:fileread:cannotOpenFile', filename, msg)); 53 | end 54 | 55 | % Throw error on bad file 56 | if fid == -1; error('Could not find file: %s', filename); end 57 | 58 | % Read text file as uint8. Working directly in uint8 is more efficient for 59 | % operations on large datasets. Also we are safe to assume that the IGRA 60 | % data files contain only UTF-8 characters so all characters can be 61 | % represented with 1 byte instead of the 2 bytes of a char 62 | % 63 | % Additionally the entire text file is read in one line for speed. This is 64 | % significantly faster than a while loop with fgetl(). 65 | try 66 | % read file 67 | orig_txt = fread(fid,'char=>uint8'); 68 | catch exception 69 | % close file 70 | fclose(fid); 71 | throw(exception); 72 | end 73 | 74 | % close file 75 | fclose(fid); 76 | 77 | % Lines of text associate with header information begin with # 78 | i_hash = find(orig_txt == uint8('#')); 79 | 80 | % Form indices into orig_txt for the location of header text characters. 81 | % The start of each row of header_txt is given by i_hash and the width of 82 | % each row of header text is fixed 83 | header_idx = bsxfun(@plus, 0:defs.header.row_width, i_hash); 84 | header_txt = orig_txt(header_idx); 85 | 86 | % Find non-header text by simply removing the header text from the original 87 | % text array 88 | no_header_txt = orig_txt; 89 | no_header_txt(header_idx) = []; 90 | 91 | % Now reshape the non-header text so that each row is a line of text with 92 | % fixed width. 93 | if mod(length(no_header_txt), defs.entries.row_width+2) == 0 94 | entries_txt = reshape(no_header_txt, defs.entries.row_width+2, [])'; 95 | else 96 | error('File length not expected. Check for interrupted data') 97 | end 98 | 99 | % Parse header and entries text 100 | out.header = txt2params(defs.header, header_txt); 101 | out.entries = txt2params(defs.entries, entries_txt); 102 | 103 | % Save to mat file. Use -v6 for faster loading (v6 option does not compress 104 | % data so you get larger files but faster loading) 105 | save(mat_filename, '-v6', '-struct', 'out') 106 | 107 | end 108 | 109 | 110 | function out = txt2params(def, text_mat) 111 | % For each Param in def create a Param object with the character array 112 | % text_mat. 113 | 114 | flds = fieldnames(def.params); 115 | for i = 1:length(flds) 116 | out.(flds{i}) = GCSAL.IGRA.Param(def.params.(flds{i}), text_mat); 117 | end 118 | 119 | end 120 | -------------------------------------------------------------------------------- /GCSAL_ex2.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | % 6 | % This example file goes through the most commonly used function of the 7 | % Global Climate Statistical Analysis Library class. 8 | 9 | 10 | %% Step 1 is to download the source .h5 file and set up your paths correctly 11 | 12 | %clear; 13 | close all; clc; 14 | 15 | if (~exist('g','var') || ~isa(g,'GCSAL.GCSAL')) 16 | % The gcsal.h5 and gcsal.h5.info.mat files are available for download from the 17 | % website and should be placed in the h5_data directory. 18 | 19 | %%%%%%%%%%%%%% CHANGE THESE %%%%%%%%%%%%%%%%%% 20 | % Set this to wherever you put the gcsal.h5 file and gcsal.h5.info.mat 21 | % files downloaded from dropbox 22 | h5_dir = './h5_data/'; 23 | 24 | % Directory to code. The folder +GCSAL which contains this file should be 25 | % in this directory 26 | codebase_dir = './'; 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | 29 | % Full path to .mat file with h5 info 30 | h5_file = fullfile(h5_dir, 'gcsal.h5'); 31 | h5_mat_file = [h5_file '.info.mat']; 32 | 33 | % Set up Matlab path 34 | addpath(genpath(codebase_dir)) 35 | 36 | %% Load GCSAL object from .mat file 37 | 38 | % This requires about 6 gb of RAM just to hold all of the header 39 | % information in memory 40 | 41 | % Normally you should load the GCSAL object from the .mat file but if it 42 | % doesn't exist on your path you can use the .h5 file. After using the .h5 43 | % file a .mat file will be created automatically for subsequent use 44 | if ~exist(h5_mat_file, 'file') 45 | g = GCSAL.GCSAL(h5_file); 46 | else 47 | g = GCSAL.GCSAL(h5_mat_file); 48 | g.h5_fname = h5_file; 49 | end 50 | end 51 | 52 | % Find 20 nearest stations to Capetown, South Africa 53 | 54 | loc_lat = -33.938655; 55 | loc_lon = 18.63863; 56 | nsamples = 20; 57 | stations = g.station_search('Nearest', [loc_lat loc_lon], 'Number', nsamples); 58 | 59 | % Of the 20 nearest stations, grab the least number such that 60 | % at each altitude between 18 and 25 km, there are at least 61 | % 300 wind samples per month 62 | 63 | tooFew = true; 64 | i = 0; 65 | 66 | while tooFew 67 | i = i + 1; 68 | sttns = stations(1:i); 69 | lows = zeros(12,1); 70 | for k = 1:12 71 | [N, entries, stats] = g.counts2(sttns, 'gph', 'wspd', ... 72 | 'FilterFields', {'month'}, 'FilterRanges', {k}, ... 73 | 'Verbose', false, 'Plot', false); 74 | lows(k) = min(interp1(stats.x, sum(N,2), [18:25])); 75 | end 76 | if ((min(lows) >= 300) || (i == nsamples)) 77 | tooFew = false; 78 | end 79 | end 80 | 81 | stations = stations(1:i); 82 | fprintf('Need %d stations\n', length(stations)); 83 | 84 | figure; 85 | plot([stations(:).arclen]/1e3,'-x');grid; 86 | xlabel('Station Index'); 87 | ylabel('Station Distance [km]'); 88 | title('Station Distance from Capetown'); 89 | 90 | % Get wind speed/dir stats 91 | [N1, entries1, stats1] = g.counts2(stations, 'gph', 'wspd', 'Verbose', false); 92 | [N2, entries2, stats2] = g.counts2(stations, 'gph', 'wdir', 'Verbose', false); 93 | 94 | % stats2.x is gph vector 95 | % stats2.y is wdir vector 96 | 97 | % At constant altitude 98 | figure;plot(stats1.y,stats1.pdf(:,23));grid; 99 | title('Yearly Wind Speed PDF At 22.5 km'); 100 | figure;plot(stats2.y,stats2.pdf(:,23));grid; 101 | title('Yearly Wind Direction PDF At 22.5 km'); 102 | 103 | s = stats1.y; 104 | p = stats1.pdf(:,23)'; 105 | wspds = datasample(s, 1e4, 'Weights', p, 'Replace', true); 106 | figure;h1 = histogram(wspds,'BinMethod','sturges');grid; 107 | title('Sampled Yearly Wind Speed PDF At 22.5 km'); 108 | 109 | s = stats2.y; 110 | p = stats2.pdf(:,23)'; 111 | wdirs = datasample(s, 1e4, 'Weights', p, 'Replace', true); 112 | figure;h2 = histogram(wdirs,'BinMethod','sturges');grid; 113 | title('Sampled Yearly Wind Direction PDF At 22.5 km'); 114 | 115 | %% Reproduce distribution 116 | 117 | % y1max = max(stats1.y); 118 | % y1min = min(stats1.y); 119 | % 120 | % y2max = max(stats2.y); 121 | % y2min = min(stats2.y); 122 | % 123 | % n = 1000; 124 | % wind = zeros(n,1); 125 | % dir = zeros(n,1); 126 | % nsamples = 5; 127 | % for j = 1:n 128 | % p = rand(nsamples,1); % sample from uniform dist 129 | % y1 = p * (y1max - y1min) + y1min; % Get wind values from sample 130 | % probs = interp1(stats1.y,stats1.pdf(:,23),y1); % Probability of each wind 131 | % [~,idx] = sort(probs); % Find max probability 132 | % wind(j) = y1(idx(end)); 133 | % 134 | % p = rand(nsamples,1); % sample from uniform dist 135 | % y2 = p * (y2max - y2min) + y2min; % Get wind values from sample 136 | % probs = interp1(stats2.y,stats2.pdf(:,23),y2); % Probability of each wind 137 | % [~,idx] = sort(probs); % Find max probability 138 | % dir(j) = y2(idx(end)); 139 | % end 140 | % figure;histogram(wind);figure;histogram(dir); 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Global Climate Statistical Analysis Library (GCSAL) 2 | GCSAL is a software package in MATLAB that allows the user to 3 | - automatically download the Integrated Global Radiosonde Archive (IGRA) raw 4 | text data from the NOAA website 5 | - efficiently process and save the data in a h5 hierarchical file 6 | - quickly access the data, aggregate statistics, and generate plots 7 | 8 | ## Examples 9 | The following example files are provided in the root directory. 10 | * **GCSAL_ex1.m**, **GCSAL_ex2.m**, etc. go through the most commonly used functions 11 | of the Global Climate Statistical Analysis Library class. These examples include 12 | analysis of the probability distributions of different atmospheric data based on 13 | location and time of day or time of year. 14 | * **IGRA_to_h5_example.m** takes the GCSAL process from downloading the source 15 | data from NOAA website through to creating a GCSAL Matlab object. These steps 16 | are only necessary if you want to download and re-build the GCSAL library from 17 | the original source data. Typically this is not necessary as you can use the 18 | provided .h5 file on the website. Reasons you might want to do this are 1) want 19 | to update the data with the latest measurements or 2) want to make a change to the 20 | way the data is stored in the .h5 file. 21 | 22 | ## Requirements 23 | GCSAL requires MATLAB 2017 and runs on Mac, Linux, or Windows OS 24 | 25 | ## Building GCSAL 26 | No building required. 27 | 28 | ## Installing GCSAL 29 | No installation is required. The user needs to download two data files, 30 | *gcsal.h5* and *gcsal.h5.info.mat*, and put them in the *h5_data* directory. 31 | - gcsal.h5: https://www.dropbox.com/s/2m3glr0drhds33l/gcsal.h5?dl=0 32 | - gcsal.h5.info.mat: https://www.dropbox.com/s/ks9fs3xombb9xqs/gcsal.h5.info.mat?dl=0 33 | 34 | ## How GCSAL works 35 | GCSAL consists of 2 main groups of functions. The first group is for processing 36 | the text files efficiently and store the data in the h5 hierarchical file 37 | format. The second group of functions is for query the H5 data file and create 38 | maps and statistical plots. 39 | 40 | ## Full documentation 41 | The Global Climate Statistical Analysis Library (GCSAL) allows one to view 42 | climate statistics formulated from over 60 years of radiosonde data from weather 43 | balloons launched at more than 3000 locations around the world! The GCSAL efficiently 44 | processes and compresses the 80 GB of raw text data into 17 GB of data in the h5 45 | hierarchical file format. It provides a simple MATLAB interface to access the 46 | vast quantities of climate data. One can view statistical distributions and 47 | perform statistical operations on the following quantities from sea level to 48 | 30 km altitude: wind speed, wind direction, temperature, pressure, dewpoint 49 | depression, and relative humidity. 50 | 51 | ### List of functions 52 | - Text to Mat to H5 53 | - IGRA 54 | - format_definitions 55 | - datafile2mat 56 | - datafile2mat_dir 57 | - mat2h5 58 | - mat2h5_dir 59 | - Param 60 | - Methods 61 | - read_columns 62 | - h5write 63 | - h5write_param 64 | - Static 65 | - pad_left 66 | - convert_to_min_int 67 | - txt2data 68 | - compare_bytes_unique 69 | - str2int 70 | - char2numerals 71 | - str2float 72 | - bits2ints 73 | - ints2bits 74 | - compress_idx 75 | - uncompress_idx 76 | - Static (Private) 77 | - unique_inverse 78 | - compress_txt 79 | - remove_bad_vals 80 | - H5 81 | - create_and_write 82 | - fullpath 83 | - load 84 | - recursive_load 85 | - Query H5 and histograms 86 | - GCSAL 87 | - Methods 88 | - counts 89 | - counts2 90 | - countsN 91 | - query 92 | - stations_search 93 | - plot_world_map 94 | - find_countries 95 | - find_stations 96 | - find_headers 97 | - find_def 98 | - clear_entries_cache 99 | - Methods (Private) 100 | - add_header_params_to_entries 101 | - stations_from_latlong 102 | - stations_from_countries 103 | - stations_from_regex 104 | - load_all_headers 105 | - load_from_stations 106 | - load_group 107 | - load_param 108 | - read_from_cached_entries 109 | - cache_param 110 | - add_entry_idx_to_headers 111 | - Static 112 | - filter_data_by_range 113 | - histcounts 114 | - histcounts2 115 | - histcountsN 116 | - counts2pdf 117 | - get_bin_centers 118 | - find_keys 119 | - h5info_find_children 120 | - plot_stations 121 | - count_and_plot_entries 122 | - default_bin_edges 123 | - get_label 124 | - description_from_filters 125 | - stations_intersect 126 | - stations_union 127 | - stations_setxor 128 | - Static (Private) 129 | - struct_set_operation 130 | - initialize_stations 131 | - get_entry_idx_in_range 132 | - header_to_entry_idx 133 | - station_id_str 134 | - Map 135 | - find_in_lat_long_range 136 | - find_nearest 137 | - map_stations_by_country 138 | - multipatch 139 | - world_map 140 | - inpolygon 141 | 142 | 143 | ## Join the GCSAL community 144 | * POC: Greg Katz () and David Liu () 145 | 146 | ## License 147 | GCSAL is MIT-licensed. 148 | 149 | NOAA Integrated Global Radiosonde Archive (IGRA) data is licensed under the 150 | World Meteorological Organization (WMO) Resolution 40 NOAA Policy NCEI data and 151 | products. 152 | -------------------------------------------------------------------------------- /+GCSAL/+Map/find_in_lat_long_range.m: -------------------------------------------------------------------------------- 1 | function [stations, latbox, longbox] = ... 2 | find_in_lat_long_range(all_stations, latrange, longrange) 3 | % Copyright (c) Facebook, Inc. and its affiliates. 4 | % 5 | % This source code is licensed under the MIT license found in the 6 | % LICENSE file in the root directory of this source tree. 7 | % 8 | % [stations, longbox, latbox] = ... 9 | % find_in_lat_long_range(all_stations, latrange, longrange) 10 | % 11 | % Returns an array of station structs that fall within the 12 | % box defined by latrange and longrange from the list of stations in 13 | % all_stations. all_stations is a struct array with each element 14 | % containing lat, long, and id fields. 15 | % 16 | % Additionally returns longbox and latbox which can be used to plot the 17 | % the searchbox that was used. 18 | % 19 | % latrange and longrange must be two element vectors and are 20 | % in degrees. This function accounts for angle wraparound. So latrange 21 | % could be [-45 45] to find stations in latitudes between -45 and 45 22 | % degrees or it could be [45 -45] to find stations with latitude above 45 23 | % deg or below -45. The same goes for longrange 24 | % 25 | % INPUTS 26 | % all_stations - struct array, each elemetn contains lat, long, id 27 | % latrange - two element vector defining latitude limits in degrees. 28 | % Angle wraparound is OK. 29 | % longrange - two element vector defining longitude limits in degrees. 30 | % Angle wraparound is OK. 31 | % 32 | % OUTPUTS 33 | % stations - struct array, subset of all_stations located within the 34 | % lat/long search box 35 | % latbox - vector of latitude values in degrees that can be used to 36 | % make a plot representing the search box used. This 37 | % handles wraparound nicely for a map plot by inserting 38 | % NaNs for a discontinuous plot line. 39 | % longbox - see latbox 40 | 41 | 42 | % Handle angle wraparound 43 | % After this function lat/long ranges may have multiple rows for multiple 44 | % boxes to search. Multiple boxes occur when a lat/long range spans across 45 | % an edge of the map where the angle wraps around. 46 | [latranges, longranges] = latlongwrap(latrange, longrange); 47 | 48 | % Extract lat/long values from all_stations struct array 49 | all_stations_lat = [all_stations.lat]; 50 | all_stations_long = [all_stations.lon]; 51 | 52 | % Initialize logical array to false 53 | i_stations = false(size(all_stations)); 54 | 55 | % Initialize latbox and longbox output vectors 56 | latbox = []; 57 | longbox = []; 58 | 59 | % Loop through the wrapped lat/long ranges 60 | for i = 1:size(latranges, 1) 61 | 62 | % Get idx and lat/long box for the current range 63 | [idx_curr, latbox_curr, longbox_curr] = evaluate_range(... 64 | all_stations_lat, all_stations_long, latranges(i,:), longranges(i,:)); 65 | 66 | % Append idx_curr to i_stations 67 | i_stations = i_stations | idx_curr; 68 | 69 | % Append lat/long box 70 | latbox = [latbox latbox_curr]; %#ok 71 | longbox = [longbox longbox_curr]; %#ok 72 | end 73 | 74 | % Index into all_stations 75 | stations = all_stations(i_stations); 76 | 77 | end 78 | 79 | function [i_stations, latbox, longbox] = evaluate_range(... 80 | all_stations_lat, all_stations_long, latrange, longrange) 81 | 82 | % Find stations in latitude range 83 | ilat = find_in_range(all_stations_lat, latrange); 84 | 85 | % Find stations in longitude range 86 | ilong = find_in_range(all_stations_long, longrange); 87 | 88 | % Find stations in both lat and long ranges 89 | i_stations = (ilat & ilong); 90 | 91 | % Process lat/long range for pretty plotting 92 | [latbox, longbox] = latlongbox(latrange, longrange); 93 | 94 | end 95 | 96 | function in_range = find_in_range(val, range) 97 | % return logical index for values betweeen range(1) and range(2) inclusive 98 | 99 | in_range = val >= range(1) & val <= range(2); 100 | 101 | end 102 | 103 | function [lat, long] = latlongwrap(lat, long) 104 | 105 | % Error check that lat/long are vectors 106 | if ~isvector(lat) || ~isvector(long) 107 | error('lat and long must be vectors') 108 | end 109 | 110 | % Enforce lat/long are row vectors 111 | lat = lat(:)'; 112 | long = long(:)'; 113 | 114 | % Erroc check that lat/long are length 2 115 | if length(lat) ~= 2 || length(long) ~= 2 116 | error('lat and long must be length 2') 117 | end 118 | 119 | % Ensure lat/long are between -180 and 180 120 | lat = dmodpi(lat); 121 | long = dmodpi(long); 122 | 123 | % Error check that latitude is between -90 and 90 124 | if any(lat < -90 | lat > 90) 125 | error('lat must be between -90 and 90') 126 | end 127 | 128 | % Get booleans for whether lat/long ranges wrap around edges of map 129 | latwrap = lat(2) <= lat(1); 130 | longwrap = long(2) <= long(1); 131 | 132 | % If both lat and long wrap, then we need four search boxes going to the 133 | % edge of the map at all four courners 134 | if latwrap && longwrap 135 | lat = [lat(1) 90; 136 | lat(1) 90; 137 | -90 lat(2); 138 | -90 lat(2)]; 139 | 140 | long = [long(1) 180; 141 | -180 long(2); 142 | long(1) 180; 143 | -180 long(2)]; 144 | 145 | % If only lat is wrapped then we need two search boxes going to the top and 146 | % bottom of the map 147 | elseif latwrap 148 | lat = [lat(1) 90; 149 | -90 lat(2)]; 150 | 151 | long = [long; long]; 152 | 153 | % If only lat is wrapped then we need two search boxes going to the left 154 | % and right edges of the map 155 | elseif longwrap 156 | long = [long(1) 180; 157 | -180 long(2)]; 158 | 159 | lat = [lat; lat]; 160 | 161 | % Neither is wrapped 162 | else 163 | % In this case lat and long are good as is, just a single search box 164 | end 165 | 166 | 167 | end 168 | 169 | function ang_wrapped = dmodpi(ang_deg) 170 | % Ensure ang_deg is between -180 and 180 171 | 172 | ang_wrapped = mod(ang_deg + 180, 360) - 180; 173 | 174 | end 175 | 176 | function [lat_box, long_box] = latlongbox(latrange, longrange) 177 | % Return vectors that can be used to plot the search box made by latrange 178 | % and longrange but with lines on the edge of the map hidden to show how 179 | % the box is wrapped around to the other side of the map. 180 | % 181 | % To achieve this each of the lines of the box are constructed one at a 182 | % time and if the line is found to be on the edge of the map it is replaced 183 | % with NaN 184 | 185 | % These are the points of the search box with the first point repeated at 186 | % the end to complete the circuit 187 | long_pts = [longrange(1) longrange(2) longrange(2) longrange(1) longrange(1)]; 188 | lat_pts = [latrange(1) latrange(1) latrange(2) latrange(2) latrange(1)]; 189 | 190 | % initalize outputs 191 | long_box = []; 192 | lat_box = []; 193 | 194 | % Loop throught the 4 edges of the square 195 | for i = 1:4 196 | 197 | 198 | % For this edge pull the next two points form long_pts and then check 199 | % those points to see if they lie at the extreme and NaN if so. 200 | curr_long = NaN_if_all_same_extreme(long_pts(i:i+1), [-180 180]); 201 | curr_lat = NaN_if_all_same_extreme( lat_pts(i:i+1), [-90 90]); 202 | 203 | % Append 204 | long_box = [long_box curr_long NaN]; %#ok 205 | lat_box = [lat_box curr_lat NaN]; %#ok 206 | end 207 | 208 | end 209 | 210 | function vals = NaN_if_all_same_extreme(vals, extremes) 211 | % If vals are equal to each other and equal to some value in extremes then 212 | % return NaN 213 | 214 | L = length(vals); 215 | if vals(1) == vals(2:L) 216 | if any(vals(1) == extremes) 217 | vals(1:L) = NaN; 218 | end 219 | end 220 | 221 | end 222 | -------------------------------------------------------------------------------- /+GCSAL/+IGRA/format_definitions.m: -------------------------------------------------------------------------------- 1 | function defs = format_definitions( ) 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % [ defs ] = igra_format_definitions( ) 8 | % Returns a struct containing the format definitions for igra data and 9 | % stations list text files 10 | % 11 | % See the following references for more information on the IGRA text file 12 | % format. 13 | % Ref: https://www1.ncdc.noaa.gov/pub/data/igra/data/igra2-data-format.txt 14 | % Ref: https://www1.ncdc.noaa.gov/pub/data/igra/igra2-list-format.txt 15 | 16 | 17 | % Load up definitions in cell matrix format 18 | headers_cell = headers_format_definition_as_cell_matrix; 19 | entries_cell = entries_format_definition_as_cell_matrix; 20 | stations_cell = stations_format_definition_as_cell_matrix; 21 | 22 | % Convert definitions to structs 23 | defs.header = cell2struct(headers_cell); 24 | defs.entries = cell2struct(entries_cell); 25 | defs.stations = cell2struct(stations_cell); 26 | 27 | 28 | end 29 | 30 | function def = cell2struct(cell_matrix) 31 | 32 | %%%% Convert cell arrays to structs 33 | % field names for each column in the above cell matrices 34 | columns = {'varname', 'type', 'col_idx', 'bad_vals', ... 35 | 'function_handle', 'units', 'description'}; 36 | 37 | % Loop through the rows in the cell array 38 | def.row_width = 0; % initialize row_width 39 | for i_var = 1:size(cell_matrix, 1) 40 | curr_varname = cell_matrix{i_var, 1}; 41 | 42 | % Loop through the columns 43 | for i_col = 1:length(columns) 44 | curr_col_name = columns{i_col}; 45 | 46 | % Assign struct field to cell array element 47 | def.params.(curr_varname).(curr_col_name) = cell_matrix{i_var, i_col}; 48 | end 49 | 50 | % Get row_widht by finding the maximum col_idx value 51 | def.row_width = max(def.row_width, def.params.(curr_varname).col_idx(end)); 52 | end 53 | 54 | end 55 | 56 | function out = scale_func(x, scale_factor) 57 | % Helper function for applying a scale_factor and converting to single 58 | % This is defined her for use in the function_handle column of the 59 | % definitions below 60 | 61 | out = single(x)*scale_factor; 62 | 63 | end 64 | 65 | function def = headers_format_definition_as_cell_matrix() 66 | % Source: https://www1.ncdc.noaa.gov/pub/data/igra/data/igra2-data-format.txt 67 | % --------------------------------- 68 | % Variable Columns Type 69 | % --------------------------------- 70 | % HEADREC 1- 1 Character 71 | % ID 2- 12 Character 72 | % YEAR 14- 17 Integer 73 | % MONTH 19- 20 Integer 74 | % DAY 22- 23 Integer 75 | % HOUR 25- 26 Integer 76 | % RELTIME 28- 31 Integer 77 | % NUMLEV 33- 36 Integer 78 | % P_SRC 38- 45 Character 79 | % NP_SRC 47- 54 Character 80 | % LAT 56- 62 Integer 81 | % LON 64- 71 Integer 82 | % --------------------------------- 83 | def = { 84 | 'id', 'char', 2:12, {}, [], '', 'Station ID'; 85 | 'year', 'uint16', 14:17, {}, [], '', 'Year'; 86 | 'month', 'uint8', 19:20, {}, [], '', 'Month'; 87 | 'day', 'uint8', 22:23, {}, [], '', 'Day'; 88 | 'hour', 'uint8', 25:26, {}, [], '', 'Hour'; 89 | 'reltime_hr', 'uint8', 28:29, {'99'}, [], '', 'Release Time Hour'; 90 | 'reltime_min','uint8', 30:31, {'99'}, [], '', 'Release Time Minute'; 91 | 'numlevs', 'uint32', 33:36, {}, [], '', '# of levels in the sounding'; 92 | 'p_src', 'char', 38:45, {''}, [], '', 'Data Source Code for Pressure Levels'; 93 | 'np_src', 'char' 47:54, {''}, [], '', 'Data Source Code for Non-pressure Levels'; 94 | 'lat', 'int32', 56:62, {}, @(x)scale_func(x,1e-4), 'deg', 'Latittude'; 95 | 'lon', 'int32', 64:71, {}, @(x)scale_func(x,1e-4), 'deg', 'Longitude'; 96 | }; 97 | end 98 | 99 | function def = entries_format_definition_as_cell_matrix() 100 | % Source: https://www1.ncdc.noaa.gov/pub/data/igra/data/igra2-data-format.txt 101 | % ------------------------------- 102 | % Variable Columns Type 103 | % ------------------------------- 104 | % LVLTYP1 1- 1 Integer 105 | % LVLTYP2 2- 2 Integer 106 | % ETIME 4- 8 Integer 107 | % PRESS 10- 15 Integer 108 | % PFLAG 16- 16 Character 109 | % GPH 17- 21 Integer 110 | % ZFLAG 22- 22 Character 111 | % TEMP 23- 27 Integer 112 | % TFLAG 28- 28 Character 113 | % RH 29- 33 Integer 114 | % DPDP 35- 39 Integer 115 | % WDIR 41- 45 Integer 116 | % WSPD 47- 51 Integer 117 | % ------------------------------- 118 | % defs.entries = { 119 | % 'lvltyp1', 'uint8', 1, {}, [], ''; 120 | % 'lvltyp2', 'uint8', 2, {}, [], ''; 121 | % 'etime', 'int32', 4:8, {'-8888', '-9999'}, [], ''; 122 | % 'press', 'uint32', 10:15, {'-8888', '-9999'}, [], 'Pa'; 123 | % 'pflag', 'char', 16, {''}, [], ''; 124 | % 'gph', 'uint16', 17:21, {'-8888', '-9999'}, @(x)scale_func(x, 1e-3), 'km'; 125 | % 'zflag', 'char', 22, {''}, [], ''; 126 | % 'temp', 'uint16', 23:27, {'-8888', '-9999'}, @(x)scale_func(x, 0.1), 'deg C'; 127 | % 'tflag', 'char', 28, {''}, [], ''; 128 | % 'rh', 'uint16', 29:33, {'-8888', '-9999'}, @(x)scale_func(x, 0.1), '%'; 129 | % 'dpdp', 'uint16', 35:39, {'-8888', '-9999'}, @(x)scale_func(x, 0.1), 'deg C' 130 | % 'wdir', 'uint16', 41:45, {'-8888', '-9999'}, [], 'deg from North'; 131 | % 'wspd', 'uint16', 47:51, {'-8888', '-9999'}, @(x)scale_func(x,0.1), 'm/s'; 132 | % }; 133 | 134 | def = { 135 | 'lvltyp1', 'uint8', 1, {}, [], '', ''; 136 | 'lvltyp2', 'uint8', 2, {}, [], '', ''; 137 | 'etime_min', 'int32', 4:6, {'-88', '-99'}, [], '', ''; 138 | 'etime_sec', 'int32', 7:8, { '88', '99'}, [], '', ''; 139 | 'press', 'int32', 10:15, {'-8888', '-9999'}, [], 'PA', 'Pressure'; 140 | 'pflag', 'char', 16, {''}, [], '', 'Pressure Flag'; 141 | 'gph', 'int32', 17:21, {'-8888', '-9999'}, @(x)scale_func(x, 1e-3), 'km', 'Altitude'; 142 | 'zflag', 'char', 22, {''}, [], '', 'Altitude Flag'; 143 | 'temp', 'int16', 23:27, {'-8888', '-9999'}, @(x)scale_func(x, 0.1), 'deg C', 'Temperature'; 144 | 'tflag', 'char', 28, {''}, [], '', 'Temperature Flag'; 145 | 'rh', 'int16', 29:33, {'-8888', '-9999'}, @(x)scale_func(x, 0.1), '%', 'Relative Humidity'; 146 | 'dpdp', 'int16', 35:39, {'-8888', '-9999'}, @(x)scale_func(x, 0.1), 'deg C', 'Dewpoint Depresesion'; 147 | 'wdir', 'int16', 41:45, {'-8888', '-9999'}, [], 'deg from North', 'Wind direction (90 = East)'; 148 | 'wspd', 'int16', 47:51, {'-8888', '-9999'}, @(x)scale_func(x,0.1), 'm/s', 'Wind Speed'; 149 | }; 150 | end 151 | 152 | function def = stations_format_definition_as_cell_matrix() 153 | 154 | % Source: https://www1.ncdc.noaa.gov/pub/data/igra/igra2-list-format.txt 155 | % ------------------------------ 156 | % Variable Columns Type 157 | % ------------------------------ 158 | % ID 1-11 Character 159 | % LATITUDE 13-20 Real 160 | % LONGITUDE 22-30 Real 161 | % ELEVATION 32-37 Real 162 | % STATE 39-40 Character 163 | % NAME 42-71 Character 164 | % FSTYEAR 73-76 Integer 165 | % LSTYEAR 78-81 Integer 166 | % NOBS 83-88 Integer 167 | % ------------------------------ 168 | def = { 169 | 'id', 'char', 1:11, {}, [], '', 'Station ID'; 170 | 'lat', 'single', 13:20, {'-98.8888'}, [], '', 'Latitude'; 171 | 'long', 'single', 22:30, {'-998.8888'}, [], '', 'Longitude'; 172 | 'elev', 'single', 32:37, {'-998.8'}, [], '', 'Elevation'; 173 | 'state', 'char', 39:40, {}, [], '', 'U.S. State'; 174 | 'name', 'char', 42:71, {}, [], '', 'Name'; 175 | 'fstyear', 'int16', 73:76, {}, [], '', 'First Year'; 176 | 'lstyear', 'int16', 78:81, {}, [], '', 'Last Year'; 177 | 'nobs', 'int32', 83:88, {}, [], '', '# of Observations'; 178 | }; 179 | end 180 | -------------------------------------------------------------------------------- /GCSAL_ex1.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | % 6 | % This example file goes through the most commonly used function of the 7 | % Global Climate Statistical Analysis Library class. The examples show you 8 | % can do analysis of the probability distributions of different atmospheric 9 | % data based on location and time of day or time of year. 10 | 11 | 12 | %% Step 1 is to download the source .h5 file and set up your paths correctly 13 | 14 | clear; 15 | close all; 16 | clc; 17 | 18 | if (~exist('g','var') || ~isa(g,'GCSAL.GCSAL')) 19 | % The gcsal.h5 and gcsal.h5.info.mat files are available for download from the 20 | % website and should be placed in the h5_data directory. 21 | 22 | %%%%%%%%%%%%%% CHANGE THESE %%%%%%%%%%%%%%%%%% 23 | % Set this to wherever you put the gcsal.h5 file and gcsal.h5.info.mat 24 | % files downloaded from dropbox 25 | h5_dir = './h5_data/'; 26 | 27 | % Directory to code. The folder +GCSAL which contains this file should be 28 | % in this directory 29 | codebase_dir = './'; 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | 32 | % Full path to .mat file with h5 info 33 | h5_file = fullfile(h5_dir, 'gcsal.h5'); 34 | h5_mat_file = [h5_file '.info.mat']; 35 | 36 | % Set up Matlab path 37 | addpath(genpath(codebase_dir)) 38 | 39 | %% Load GCSAL object from .mat file 40 | 41 | % This requires about 6 gb of RAM just to hold all of the header 42 | % information in memory 43 | 44 | % Normally you should load the GCSAL object from the .mat file but if it 45 | % doesn't exist on your path you can use the .h5 file. After using the .h5 46 | % file a .mat file will be created automatically for subsequent use 47 | if ~exist(h5_mat_file, 'file') 48 | g = GCSAL.GCSAL(h5_file); 49 | else 50 | g = GCSAL.GCSAL(h5_mat_file); 51 | g.h5_fname = h5_file; 52 | end 53 | end 54 | 55 | % Introduction: Printout the list of variables that are in the header data 56 | % and the entries data. 57 | % header data is for a single balloon launch - things like time, date, and location 58 | % entries data is the measurements of the baloon - wind speed, pressure, etc. 59 | g.defs.header.params 60 | g.defs.entries.params 61 | 62 | % For more details look at a single parameter 63 | g.defs.entries.params.wspd 64 | g.defs.entries.params.gph 65 | 66 | 67 | %% Stations_search 68 | 69 | % Find stations within 25 degrees of the equator 70 | stations1 = g.station_search('LatLong', [-25 25 -180 180]); 71 | 72 | % Find stations from a bunch of countries 73 | country_names = {'Mexico', 'Brazil','Algeria', 'Burkina Faso', 'Ghana', 'Niger',... 74 | 'Nigeria', 'Egypt', 'Sudan', 'Ethiopia', 'Uganda',... 75 | 'Kenya', 'Tanzania', 'Madagascar', 'India', 'Sri Lanka',... 76 | 'Nepal', 'Bangladesh','Myanmar', 'Thailand', 'Vietnam', 'Cambodia',... 77 | 'Ukraine', 'Uzbekistan', 'Turkey',... 78 | 'Indonesia', 'Philippines'}; 79 | 80 | stations2 = g.station_search('Countries', country_names); 81 | 82 | % Find stations in countries AND within 25 degrees of equator 83 | stations3 = g.station_search('Countries', country_names, ... 84 | 'LatLong', [-25 25 -180 180]); 85 | 86 | % Find stations in countries OR within 25 degrees of equator 87 | stations4 = GCSAL.GCSAL.stations_union(stations1, stations2); 88 | 89 | % Plot stations 4 on wolrd map 90 | figure; hold all; g.plot_world_map(); 91 | GCSAL.GCSAL.plot_stations(stations4, 'r+'); 92 | 93 | % Find stations with IDs beginnign with the letter A. 94 | % Note that in regex ^ means beginning of the line 95 | stations5 = g.station_search('IDRegex', '^A'); 96 | 97 | % Find stations in Brazil or India AND within 25 degrees of 98 | % the equator AND with station IDs ending in 5 99 | % Note that in regex $ means end of the line 100 | stations6 = g.station_search('Countries', {'Brazil', 'India'}, ... 101 | 'LatLong', [-25 25 -180 180], ... 102 | 'IDRegex', '5$'); 103 | 104 | % Find stations within +/-2.5 deg latitude about Guatemala 105 | stations7 = g.station_search('Lat', [14.583323, -90.527309], 'Range', 2.5); 106 | 107 | %% Data query 108 | 109 | % Get stations located in Botswana 110 | stations = g.station_search('Countries', {'Botswana'}); 111 | 112 | % Get all geopotential height and windspeed data along with hour, month, 113 | % and year data 114 | entries1 = g.query(stations, {'gph', 'wspd', 'hour', 'month', 'year'}); 115 | 116 | % Plot distribution of hours and years for the data in entries1 117 | figure; histogram(vertcat(entries1.hour)); xlabel('Hour'); ylabel('# of occurences'); 118 | figure; histogram(vertcat(entries1.year)); xlabel('Year'); ylabel('# of occurences'); 119 | 120 | % Get gph and wspd data measured between 6 and 4 pm 121 | entries2 = g.query(stations, {'gph', 'wspd'}, 'hour', [6 16]); 122 | 123 | % Plot distribution of hours for the data in entries2 124 | figure; histogram(vertcat(entries2.hour)); xlabel('Hour'); ylabel('# of occurences'); 125 | 126 | % Get data corresponding only to measuresments taken in August between 127 | % 4am and Noon and in the years 1990 to 1999 128 | entries3 = g.query(stations, {'gph', 'wspd'}, ... 129 | {'month', 'hour', 'year'}, ... 130 | {8, [4 12], [1990 1999]}); 131 | 132 | % Plot distribution of years for the data in entries3 133 | figure; histogram(vertcat(entries3.year)); xlabel('Year'); ylabel('# of occurences'); 134 | 135 | % To save RAM clear out the entries cache. The entries cache holds data for 136 | % any data that has been loaded so far which makes it faster to the access 137 | % the data subsequently but also uses up RAM; 138 | g.clear_entries_cache(); 139 | 140 | %% One dimensional histograms 141 | 142 | % Get stations in Brazil 143 | stations = g.station_search('Countries', 'Brazil'); 144 | 145 | % Histogram for all windspeeds 146 | [N, entries] = g.counts(stations, 'wspd'); 147 | 148 | % Define custom bin_edges 149 | bin_edges = 0:1:80; 150 | 151 | % Do counts for windspeed filtered on geopotential 152 | % altitude between 20 and 30 km and with custom bin edges 153 | [N, entries] = g.counts(stations, 'wspd', 'FilterFields', {'gph'}, ... 154 | 'FilterRanges', {[20 30]}, 'Edges', bin_edges); 155 | 156 | % Now additionally filter on measurements taken in August 157 | % between 4 and 10 am 158 | [N, entries] = g.counts(stations, 'wspd', 'FilterFields', {'gph', 'month', 'hour'}, ... 159 | 'FilterRanges', {[20 30], 8, [4 10]}, 'Edges', bin_edges); 160 | 161 | %% Two dimensional histograms 162 | 163 | % Get some stations 164 | stations = g.station_search('Countries', 'Brazil'); 165 | 166 | % Do counts between gph and various other parameters 167 | [N, entries] = g.counts2(stations, 'gph', 'wspd'); 168 | [N, entries] = g.counts2(stations, 'gph', 'press'); 169 | [N, entries] = g.counts2(stations, 'gph', 'temp'); 170 | [N, entries] = g.counts2(stations, 'gph', 'rh'); 171 | [N, entries] = g.counts2(stations, 'gph', 'dpdp'); 172 | [N, entries] = g.counts2(stations, 'gph', 'wdir'); 173 | [N, entries] = g.counts2(stations, 'gph', 'wspd'); 174 | 175 | % Do counts between gph and pressure with custom bin edges 176 | [N, entries] = g.counts2(stations, 'gph', 'wspd', ... 177 | 'XEdges', 0:0.5:40, 'YEdges', 0:1:80); 178 | 179 | % Do counts for data measured between 6am and 4pm and in August 180 | [N, entries] = g.counts2(stations, 'gph', 'wspd', ... 181 | 'FilterFields', {'hour', 'month'}, 'FilterRanges', {[6 16], 8}); 182 | 183 | 184 | %% N dimensional counts 185 | 186 | % Get some stations 187 | stations = g.station_search('Countries', 'Brazil'); 188 | 189 | % Make 5-dimensional count matrix with default bin edges and no filtering 190 | resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'wspd'}); 191 | N = g.countsN(stations, resolutions); 192 | 193 | % Add custom bin edges to gph field and limit data to only data between 6 194 | % and 10 am 195 | resolutions(3).edges = 0:1:80; 196 | N = g.countsN(stations, resolutions, 'FilterFields', {'hour'}, 'FilterRanges', [6 10]); 197 | 198 | % Clear the cache to save RAM 199 | g.clear_entries_cache(); 200 | 201 | %% N dimensional counts on full library 202 | % This section has been commented out because it takes ~35 minutes and 30 203 | % gb to complete 204 | 205 | % % Get all stations 206 | % stations = g.stations; 207 | % 208 | % % Cache data for lat, lon, gph, and month 209 | % % This may take a few minutes and requires another 18 gb of RAM for a total 210 | % % of 24 gb of RAM including the header data that is loaded when the GCSAL 211 | % % object is initialized. 212 | % g.query(stations, {'lat', 'lon', 'gph', 'month'}); 213 | % 214 | % % Turn off cache so we don't use any more RAM going forward 215 | % g.do_cache_entries = false; 216 | % 217 | % % Make 5-dimensional count matrix with default bin edges and no filtering 218 | % % This may take about 5 minutes for each countsN call. 219 | % % Also temporarily uses another 6 gb of RAM for each countsN call for a 220 | % % grand total of ~30gb of RAM. As long as do_cache_entries is false this 221 | % % last 6 gb RAM is cleared between each call to countsN 222 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'wdir'}); 223 | % N1 = g.countsN(stations, resolutions); % This may take about 5 minutes 224 | % 225 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'wspd'}); 226 | % N2 = g.countsN(stations, resolutions); % This may take about 5 minutes 227 | % 228 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'temp'}); 229 | % N3 = g.countsN(stations, resolutions); % This may take about 5 minutes 230 | % 231 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'press'}); 232 | % N4 = g.countsN(stations, resolutions); % This may take about 5 minutes 233 | % 234 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'dpdp'}); 235 | % N5 = g.countsN(stations, resolutions); % This may take about 5 minutes 236 | % 237 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'rh'}); 238 | % N6 = g.countsN(stations, resolutions); % This may take about 5 minutes 239 | -------------------------------------------------------------------------------- /+GCSAL/+IGRA/Param.m: -------------------------------------------------------------------------------- 1 | classdef Param 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % Param(param_def, text_mat) 8 | % Reads the IGRA formatted text in text_mat according to the 9 | % format definition in param_def. IGRA text data uses fixed column 10 | % widths so a single Param reads only data from the fixed 11 | % columns specified in param_def corresponding with a single data 12 | % parameter. 13 | % 14 | % The param_def must supply the following struct fields: 15 | % varname - name used to store the data in an H5 file. Must 16 | % resolve to a valid Matlab struct field. 17 | % type - 'uint8', 'uint16', 'uint32', 'uint64', 'int8', 18 | % 'int16', 'int32', 'int64', 'single', 'double', or 19 | % 'char' 20 | % col_idx - index vector corresponding to the columns of the 21 | % source file relevant to the current entry 22 | % bad_vals - Cell array of strings listing the code used to 23 | % indicate missing or erroneous data in the IGRA file 24 | % 25 | % INPUTS 26 | % param_def - a struct containing varname, type, col_idx, 27 | % bad_vals 28 | % text_mat - a matrix of characters converted to uint8. Each 29 | % row corresponding to a row of text from an IGRA 30 | % formatted text file 31 | % 32 | % PROPERTIES 33 | % data - stored data representation of text. 34 | % idx - indexing vector for data 35 | % len - length of uncompressed data 36 | % def - format definition struct 37 | 38 | 39 | properties 40 | data % stored data representation of text 41 | idx % indexing vector for data 42 | len % length of uncompressed data 43 | def % format definition struct 44 | i_unique % indexing vector if data was compressed based on unique parameters 45 | end 46 | 47 | methods 48 | function obj = Param(param_def, text_mat) 49 | % Param Constructor 50 | % 51 | % INPUTS 52 | % param_def - a struct containing varname, type, col_idx, 53 | % bad_vals 54 | % text_mat - a matrix of characters converted to uint8. Each 55 | % row corresponding to a row of text from an IGRA 56 | % formatted text file 57 | 58 | % Error check fields in format definition struct 59 | found_flds = fieldnames(param_def); 60 | expected_flds = {'varname', 'type', 'col_idx', 'bad_vals'}; 61 | if ~all(ismember(expected_flds, found_flds)) 62 | error('def struct must contain varname, type, col_idx, and bad_vals fields') 63 | end 64 | 65 | % Store format definition for the data entry 66 | obj.def = param_def; 67 | 68 | % Error check the text_mat 69 | if ~isa(text_mat, 'uint8') 70 | error('text_mat must be a matrix of characters converted to uint8') 71 | end 72 | 73 | % Read the string matrix text_mat 74 | obj = obj.read_columns(text_mat); 75 | 76 | end 77 | 78 | function obj = read_columns(obj, text_mat) 79 | 80 | % Extract the relative columns of text 81 | txt = text_mat(:, obj.def.col_idx); 82 | 83 | % Compress txt by removing rows that match bad_vals and 84 | % creating an idx vector for mapping remaining rows (unless the 85 | % compression actually increases the data size in which case 86 | % leave it as is) 87 | [txt, obj.idx, obj.len] = GCSAL.IGRA.Param.compress_txt(txt, obj.def.bad_vals); 88 | 89 | % Convert txt to data based on the type in the format 90 | % definition 91 | [obj.data, obj.i_unique] = GCSAL.IGRA.Param.txt2data(txt, obj.def.type); 92 | 93 | end 94 | 95 | function h5write(obj, filename, dataset_prefix) 96 | % Write the data to h5 file filename with h5 path 97 | % dataset_prefix 98 | 99 | % Make h5 path for this varname 100 | h5_path = GCSAL.H5.fullpath(dataset_prefix, obj.def.varname); 101 | 102 | % Write data, idx, and len 103 | obj.h5_write_param(filename, h5_path, 'data') 104 | obj.h5_write_param(filename, h5_path, 'idx') 105 | obj.h5_write_param(filename, h5_path, 'i_unique') 106 | 107 | % Don't need len if data is already full length 108 | if length(obj.data) ~= obj.len 109 | obj.h5_write_param(filename, h5_path, 'len') 110 | end 111 | 112 | end 113 | 114 | function h5_write_param(obj, filename, dataset_prefix, var_name) 115 | % Helper for calling H5.create_and_write with proper h5 path 116 | h5_path = GCSAL.H5.fullpath(dataset_prefix, var_name); 117 | GCSAL.H5.create_and_write(filename, h5_path, obj.(var_name)) 118 | end 119 | 120 | end 121 | 122 | methods (Static) 123 | 124 | 125 | function str = pad_left(str, desired_length) 126 | % Prepends str with enough spaces to make a string of 127 | % desired_length 128 | 129 | str_length = length(str); 130 | if str_length > desired_length 131 | error('str: %s is already longer than str_length: %f', str, desired_length) 132 | end 133 | 134 | % Pad beginning of str with blanks 135 | prefix = blanks(desired_length - str_length); 136 | str = [prefix str]; 137 | 138 | end 139 | 140 | function val = convert_to_min_int(val) 141 | % Create an idx variable with values 1:max_value and of the 142 | % smallest type that can store max_value 143 | 144 | max_val = max(val(:)); 145 | if isempty(max_val) 146 | max_val = 1; 147 | end 148 | if max_val < 1 || rem(max_val, 1) ~= 0 149 | error('max_value: %f must be a whole number greater than 0') 150 | end 151 | 152 | if max_val < intmax('uint8') 153 | val = uint8(val); 154 | elseif max_val < intmax('uint16') 155 | val = uint16(val); 156 | elseif max_val < intmax('uint32') 157 | val = uint32(val); 158 | elseif max_val < intmax('uint64') 159 | val = uint64(val); 160 | else 161 | error(['max_val: %f exceeds the maximum value than ' ... 162 | 'can be stored as an integer'], max_val) 163 | end 164 | end 165 | 166 | function [data, i_unique] = txt2data(txt, type) 167 | % Convert txt to type 168 | 169 | % Return on empty txt 170 | if isempty(txt) 171 | data = []; i_unique = []; 172 | return 173 | end 174 | 175 | % Since txt may be very repetitive it is more efficient to find 176 | % unique rows before trying to convert 177 | [unique_txt, ~, i_unique] = unique(txt, 'rows'); 178 | i_unique = GCSAL.IGRA.Param.convert_to_min_int(i_unique); 179 | 180 | % Switch on whether data is int, real, or char 181 | switch type 182 | 183 | % Integer types 184 | case {'int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32'} 185 | unique_val = GCSAL.IGRA.Param.str2int(unique_txt, type); 186 | 187 | 188 | % Float types 189 | case {'single', 'double'} 190 | unique_val = GCSAL.IGRA.Param.str2float(unique_txt, type); 191 | 192 | % String types 193 | case 'char' 194 | unique_val = unique_txt; 195 | 196 | % Anything else 197 | otherwise 198 | error('Unrecognized type: %s', type) 199 | end 200 | 201 | 202 | if size(unique_val, 1) == 1 203 | % If there is only one unique value, return that value with 204 | % i_unique empty 205 | i_unique = []; 206 | data = unique_val; 207 | else 208 | % Otherwise determine if we can save space by representing 209 | % data in unique form 210 | 211 | if GCSAL.IGRA.Param.compare_bytes_unique(i_unique, unique_val) 212 | % keep data in unique form 213 | data = unique_val; 214 | else 215 | % Inverse the unique call and make data the full 216 | % uncompressed data 217 | data = unique_val(i_unique,:); 218 | 219 | % Revert i_unique to empty vector 220 | i_unique = []; 221 | end 222 | end 223 | end 224 | 225 | function do_compression = compare_bytes_unique(i_unique, unique_val) 226 | % do_compression = compare_bytes_unique(i_unique, unique_val) 227 | % Returns true if bytes needed to store i_unique and 228 | % unique_val is less than bytes needed to store 229 | % unique_val(i_unique,:). 230 | % 231 | % This is generally true if i_unique is a smaller data type 232 | % than unique_val and unique_val is significantly shorter 233 | % than unique_val(i_unique,:) 234 | 235 | % Figure out bytes requred to store data with i_unique and 236 | % unique_val 237 | bytes_i_unique= whos('i_unique'); 238 | bytes_i_unique = bytes_i_unique.bytes; 239 | 240 | bytes_unique = whos('unique_val'); 241 | bytes_unique = bytes_unique.bytes; 242 | 243 | bytes_compressed = bytes_i_unique + bytes_unique; 244 | 245 | % Figure out bytes to store uncompressed data 246 | % Do this by multiplying bytes_unique by the 247 | % ratio of the size of data for compressed and uncompressed 248 | length_uncompressed = length(i_unique); 249 | length_compressed = size(unique_val, 1); 250 | bytes_uncompressed = bytes_unique*length_uncompressed/length_compressed; 251 | 252 | % Return comparison of bytes_compressed and uncompressed 253 | do_compression = bytes_compressed < bytes_uncompressed; 254 | 255 | end 256 | 257 | function int = str2int(str_mat, type) 258 | % Custom vectorized str2int function. Processes the character 259 | % matrix str_mat by columns, converting the characters in each 260 | % column to a number and then adding up the numbers from each 261 | % column. This is faster than str2double or str2num because it 262 | % relies on the characters in str_mat being well behaved. 263 | % 264 | % Additional the character matrinx str_mat in this case is 265 | % represented as uint8 for efficiency 266 | % 267 | % Assumptions: 268 | % - The only characters in str_mat are ' -0123456789' 269 | % - Every row of str_mat is a single number 270 | % - Every row of str_mat is equal width with left padding 271 | % - Every row is ordered blanks, negative sign, numerals from 272 | % left to right 273 | 274 | % Convert char to uint8 if necessary 275 | if isa(str_mat, 'char') 276 | str_mat = uint8(str_mat); 277 | end 278 | 279 | % Get the size of str_mat for preallocation 280 | [rows, cols] = size(str_mat); 281 | 282 | % Pre-allocate int with zeros of the correct type 283 | int = zeros(rows,1, type); 284 | 285 | % Initialize place_factor with ones. This rep100resents the value 286 | % of the current column (like 1, 10, 100, 1000, etc.) 287 | place_factor = ones(rows,1, type); 288 | 289 | % Convert each character of str_mat to it's integer numeral 290 | % (blanks become 0 and negative sign becomes -1) 291 | numerals = GCSAL.IGRA.Param.char2numerals(str_mat); 292 | 293 | % Convert numerals to proper type 294 | numerals = cast(numerals, type); 295 | 296 | % Go column by column to sum each row vector of numerals into a 297 | % single value per row in a vectorized manner. Start with right 298 | % most column for the ones place 299 | for i = cols:-1:1 300 | 301 | % Extract the current column, going right to left 302 | curr_col = numerals(:,i); 303 | 304 | % Find any numerals in the current column that are 305 | % negative 306 | is_neg = curr_col == -1; 307 | 308 | % negate where isneg 309 | int(is_neg) = -1*int(is_neg); 310 | 311 | % Apply the place_factor multiplier to the curr_col where 312 | % not is_neg 313 | int(~is_neg) = int(~is_neg) + place_factor(~is_neg) .* curr_col(~is_neg); 314 | 315 | % Increment place_factor by x10 since we are in base 10 316 | place_factor = place_factor*10; 317 | end 318 | 319 | % error check on data type 320 | if any(int(:) >= intmax(type)) 321 | error('Data overflow for type: %s', type) 322 | end 323 | end 324 | 325 | function int = char2numerals(char_mat) 326 | % converts matrix of characters char_mat to a matrix of 327 | % numerals. The characters of char_mat are represented as uint8 328 | % for efficiency 329 | % 330 | % The only acceptable characters in char_mat are ' -0123456789' 331 | 332 | % Convert char to uint8 if necessary 333 | if isa(char_mat, 'char') 334 | char_mat = uint8(char_mat); 335 | end 336 | 337 | % Create key/value map for converting uint8 characters to 338 | % numerals. blanks become 0 and negative signs become -1 339 | key = uint8(' -0123456789'); 340 | val = [0 -1 0 1 2 3 4 5 6 7 8 9]; 341 | 342 | % Match all characters in char_mat with key 343 | [test, key_idx] = ismember(char_mat, key); 344 | if ~all(test(:)) 345 | error('Encountered an unrecognized character. The only recognized characters are -0123456789 and blank') 346 | end 347 | 348 | % Use the matching indices from ismember to index to the proper 349 | % values in val 350 | int = val(key_idx); 351 | 352 | % int defaults to a row vector if the char_mat is a column 353 | % vector, in this case transpose to size of in matches size of 354 | % char_mat 355 | if size(char_mat,2) == 1 356 | int = int'; 357 | end 358 | 359 | % Error check on size 360 | if ~all(size(int) == size(char_mat)) 361 | error('size error') 362 | end 363 | 364 | end 365 | 366 | function float = str2float(str_mat, type) 367 | % Uses str2num to convert the character matrix str_mat to a 368 | % number then applies the necessary type conversion 369 | 370 | float = str2num(char(str_mat)); %#ok 371 | float = cast(float, type); 372 | end 373 | 374 | function int_array = bits2ints(bits) 375 | % Convert a logical array bits to an array of integers. A 376 | % logical array actually uses 1 byte (8 bits) to represent each 377 | % boolean. By converting the logical array to integers you can 378 | % reduce the size in memory by 8 times. 379 | 380 | % convert to uint32 and column vector 381 | bits = uint32(logical(bits(:))); 382 | 383 | % Want to convert to array of uint32 whereeach integer has 32 384 | % bits. We will reshape bits be an Nx32 matrix, but bits may 385 | % not be divisible by 32 so we will pad with a prefix of zeros 386 | % as necessary. 387 | L = 32; 388 | 389 | % First, keep track of how many bits we started with so the 390 | % first bit can be disambiguated upon decoding 391 | N_bits = uint32(length(bits)); 392 | 393 | % Calculate how much padding is needed 394 | padding = L -rem(length(bits), L); 395 | if padding == L; padding = 0; end 396 | 397 | % Prepend padding 398 | bits = [zeros(padding, 1); bits]; 399 | 400 | % Reshape 401 | bits = reshape(bits, L, [])'; 402 | 403 | % convert each group of 32 bits to a uint32 by adding up each 404 | % column multiplied 2^i_col 405 | int_array = zeros(size(bits,1), 1, 'uint32'); 406 | for i_col = 1:L 407 | int_array = int_array + bits(:,L+1-i_col).*2.^(i_col-1); 408 | end 409 | 410 | % Finally prefix N_bits at the beginning so that this array can 411 | % be decoded without ambiguity about the bits that were padded 412 | int_array = [N_bits; int_array]; 413 | 414 | end 415 | 416 | function bits = ints2bits(int_array) 417 | % Convert an array of integers (assumed to be uint32 in this 418 | % implementation) to bits represented as a logical array. 419 | 420 | % First integer in int_array contains the # of bits stored 421 | N_bits = int_array(1); 422 | int_array(1) = []; 423 | 424 | % To convert an integer to bits we need to keep dividing by 2 425 | % and checking the remainder 426 | L = 32; 427 | int_array = double(int_array); % this ensures division by 2 works properly 428 | bits = zeros(32, length(int_array)); 429 | for i = 1:L 430 | % Check the remainder when dividing by 2 431 | bits(L+1-i, :) = mod(int_array, 2); 432 | 433 | % Reduce by half 434 | int_array = floor(int_array/2); 435 | end 436 | 437 | % Ensure column vector 438 | bits = bits(:); 439 | 440 | % We may have some extra bits that were added by the padding 441 | % process caused by N_bits not being exactly divisible by 32. 442 | % This step removes any extra bits that were prefixed 443 | bits_to_remove = length(bits) - N_bits; 444 | bits(1:bits_to_remove) = []; 445 | 446 | end 447 | 448 | function idx = compress_idx(idx_good, original_length) 449 | % idx can be represented as either a list of the good indices, 450 | % a list of the bad indices, or a logical array of bits. This 451 | % function calculates which one will be most efficient 452 | 453 | % If idx_good is empty, then we do not need to continue 454 | if isempty(idx_good) 455 | idx = []; 456 | return 457 | end 458 | 459 | % If idx_good is the same length as original_length then no 460 | % compression occured and idx can be returned empty 461 | if length(idx_good) == original_length 462 | idx = []; 463 | return 464 | end 465 | 466 | % Crete a logical index version of idx 467 | idx_logical = zeros(original_length,1, 'uint8'); 468 | idx_logical(idx_good) = 1; 469 | 470 | % Create the inverse idx 471 | idx_bad = find(~idx_logical); 472 | 473 | % Convert to the min int type to save space 474 | idx_bad = GCSAL.IGRA.Param.convert_to_min_int(idx_bad); %#ok 475 | 476 | % Convert logical from bits to int to save space 477 | idx_logical = GCSAL.IGRA.Param.bits2ints(idx_logical); 478 | 479 | % Find which idx corresponds to the fewest bytes 480 | var_info(1) = whos('idx_good'); 481 | var_info(2) = whos('idx_bad'); 482 | var_info(3) = whos('idx_logical'); 483 | [~, idx_type] = min([var_info.bytes]); 484 | 485 | % Set idx based on the idx_type found to be most efficient 486 | switch idx_type 487 | case 1 488 | idx = idx_good; 489 | case 2 490 | idx = idx_bad; 491 | case 3 492 | idx = idx_logical; 493 | end 494 | 495 | % Prepend the idx_type to the idx array 496 | idx = [idx_type; idx]; 497 | 498 | end 499 | 500 | function idx_out = uncompress_idx(idx_in, original_length) 501 | 502 | % First value in idx_in should be encoding of index type 503 | idx_type = idx_in(1); 504 | idx_in(1) = []; 505 | 506 | % Switch on index type 507 | switch idx_type 508 | 509 | case 1 510 | % idx_in is already good_vals 511 | idx_out = idx_in; 512 | 513 | case 2 514 | % idx_in is bad_vals and needs to be inversed 515 | logical_array = ones(original_length, 1); 516 | logical_array(idx_in) = 0; 517 | idx_out = find(logical_array); 518 | 519 | case 3 520 | % idx_in is logical bits represented as integer aray 521 | logical_array = GCSAL.IGRA.Param.ints2bits(idx_in); 522 | idx_out = find(logical_array); 523 | 524 | otherwise 525 | error('Unrecognized idx type') 526 | end 527 | 528 | end 529 | 530 | end 531 | 532 | methods (Static, Access = 'private') 533 | function data = unique_inverse(unique_val, i_unique) 534 | % Use i_unique to index unique_val and return the original 535 | % ordering of the data before unique was called. If, however, 536 | % there is only one unique element, then return just the unique 537 | % value 538 | 539 | if size(unique_val, 1) == 1 540 | data = unique_val; 541 | else 542 | data = unique_val(i_unique,:); 543 | end 544 | end 545 | 546 | function [txt, idx, original_length] = compress_txt(orig_txt, bad_vals) 547 | 548 | % Cache # of rows in orig_txt 549 | original_length = size(orig_txt, 1); 550 | original_length = GCSAL.IGRA.Param.convert_to_min_int(original_length); 551 | 552 | % Create compressed version of txt by removing rows that match 553 | % bad_vals and keeping track of the idx for the remaining rows 554 | [txt, idx] = GCSAL.IGRA.Param.remove_bad_vals(orig_txt, bad_vals); 555 | 556 | % Compress idx as efficiently as possible 557 | idx = GCSAL.IGRA.Param.compress_idx(idx, original_length); 558 | 559 | 560 | end 561 | 562 | function [txt, idx] = remove_bad_vals(txt, bad_vals) 563 | 564 | % Get size 565 | [N_rows, N_cols] = size(txt); 566 | 567 | % Initialize indexing to incldue all rows 568 | idx = (1:N_rows)'; 569 | idx = GCSAL.IGRA.Param.convert_to_min_int(idx); 570 | 571 | % Removes rows of txt that match any of the strings in 572 | % bad_vals 573 | 574 | % Loop through the list of bad_vals in the format definition 575 | % Each bad_val should be a string 576 | for i = 1:length(bad_vals) 577 | 578 | % Ensure bad_val is the correct width by padding 579 | curr_bad_val = GCSAL.IGRA.Param.pad_left(bad_vals{i}, N_cols); 580 | 581 | % Convert char to uint8 to match format of txt matrix 582 | curr_bad_val = uint8(curr_bad_val); 583 | 584 | % Find compare curr_bad_val to each row of txt 585 | matching_characters = bsxfun(@eq, curr_bad_val, txt); 586 | 587 | % Find rows where all characters match 588 | matching_rows = all(matching_characters, 2); 589 | 590 | % Remove matching_rows from txt and idx 591 | txt(matching_rows, :) = []; 592 | idx(matching_rows) = []; 593 | end 594 | end 595 | end 596 | end 597 | -------------------------------------------------------------------------------- /+GCSAL/GCSAL.m: -------------------------------------------------------------------------------- 1 | classdef GCSAL < handle 2 | % Copyright (c) Facebook, Inc. and its affiliates. 3 | % 4 | % This source code is licensed under the MIT license found in the 5 | % LICENSE file in the root directory of this source tree. 6 | % 7 | % GCSAL - Global Climate Statical Analysis Library 8 | % Inherits handle class making objects of this class pointers 9 | 10 | 11 | properties 12 | h5_fname % path to .h5 source file for loading data 13 | h5_info % struct returned by h5info() 14 | headers % struct array, each element containing header data from an IGRA station 15 | stations % struct array, each element containing id, latitude, and longitude information for an IGRA station 16 | defs % parameter format definitions 17 | countries % struct array, each element countaining infomration for a country including its name, lat/long of its borders, and which staitons are contained by that country 18 | entries % struct of entries data that has been cached 19 | do_cache_entries % boolean whether to cache entries are not. Normally true but if you are running out of RAM you can turn this off 20 | quiet_mode % Suppress status text messages and waitbars 21 | plot_mode % Suppress plots 22 | end 23 | 24 | methods 25 | function obj = GCSAL(in_file) 26 | % obj = GCSAL(in_file) 27 | % Create a GCSAL object from in_file. in_file should be the 28 | % path to either a .h5 or .mat file. The .h5 file could have 29 | % been created with GCSAL.IGRA.mat2h5_dir for example. A .mat 30 | % file would have been created by an earlier call to this 31 | % constructor function 32 | 33 | % Default to output all comments 34 | obj.quiet_mode = false; 35 | 36 | % Default to make all plots 37 | obj.plot_mode = true; 38 | 39 | % Set do_cache_entries true 40 | obj.do_cache_entries = true; 41 | 42 | % Load format definitions 43 | obj.defs = GCSAL.IGRA.format_definitions(); 44 | 45 | % Expected fields for loading from/saving to .mat file 46 | flds = {'h5_info', 'h5_fname', 'headers', 'countries', 'stations'}; 47 | 48 | % Initialize obj.entries to empty struct 49 | obj.clear_entries_cache(); 50 | 51 | % Extract extension from in_file 52 | [~,~, ext] = fileparts(in_file); 53 | 54 | % Switch on file extension of in_file 55 | switch ext 56 | 57 | % For .h5 file, load basic info from .h5 and save to .mat 58 | case '.h5' 59 | 60 | % in_file was .h5 so assign h5_fname 61 | obj.h5_fname = in_file; 62 | 63 | % Get h5info 64 | tic; fprintf('Parsing info from h5 file...'); 65 | obj.h5_info = h5info(obj.h5_fname); 66 | fprintf(' Complete in %.1f seconds\n', toc); 67 | 68 | % Load all headers to RAM for quicker data access 69 | tic; fprintf('Loading headers...'); 70 | obj.headers = obj.load_all_headers(); 71 | fprintf(' Complete in %.1f seconds\n', toc); 72 | 73 | % Create stations struct array from headers and create 74 | % countries from stations struct. These data 75 | % structures offer quick access to finding structures 76 | % based on their lat/long location or which country 77 | % they are in 78 | tic; fprintf('Initiazling stations struct and country map ...'); 79 | obj.stations = GCSAL.GCSAL.initialize_stations(obj.headers); 80 | obj.countries = GCSAL.Map.map_stations_by_country(obj.stations); 81 | fprintf(' Complete in %.1f seconds\n', toc); 82 | 83 | % Save the workspace to a .mat file for quicker loading 84 | % in the future 85 | tic; fprintf('Saving h5 info to .mat file for faster loading...'); 86 | for i = 1:length(flds) 87 | to_mat.(flds{i}) = obj.(flds{i}); %#ok 88 | end 89 | save([in_file '.info.mat'], '-struct', 'to_mat'); 90 | fprintf(' Complete in %.1f seconds\n', toc); 91 | 92 | 93 | % For .mat file, just load and check expected variables are 94 | % present 95 | case '.mat' 96 | 97 | % Load .mat file 98 | tic; fprintf('Loading h5 info from .mat file...\n'); 99 | mat_data = load(in_file); 100 | fprintf(' Complete in %.1f seconds\n', toc); 101 | 102 | % Error check .mat file had all necessary flds 103 | found_flds = fieldnames(mat_data); 104 | flds_not_found_idx = ~ismember(flds, found_flds); 105 | if ~all(ismember(flds, found_flds)) 106 | msg = sprintf(' %s\n', flds{flds_not_found_idx}); 107 | error('mat file is missing the following fields: %s', msg) 108 | end 109 | 110 | % Assign data in mat_data to obj 111 | for i = 1:length(found_flds) 112 | obj.(found_flds{i}) = mat_data.(found_flds{i}); 113 | end 114 | 115 | otherwise 116 | error('Unexpected file extension') 117 | end 118 | end 119 | 120 | function [N, entries] = counts(obj, stations, fld, varargin) 121 | % [N, entries] = counts(obj, stations, fld, varargin) 122 | % Returns counts from histcounts and makes 123 | % plots showing histogram, probability and cumulative 124 | % density functions for data in fld at stations 125 | % 126 | % Additional optional parameters can be given as Name/Value 127 | % pairs. See examples below. 128 | % 129 | % 'Edges' defines the edges of the bins. 130 | % 'FilterFields' and 'FilterRanges' together can be used to 131 | % filter the data to a subset where the parameter in 132 | % FilterFields matches the range in FilterRanges. 133 | % 134 | % 135 | % Example: 136 | % % Create GCSAL object 137 | % g = GCSAL.GCSAL('gcsal.h5.info.mat'); 138 | % 139 | % % Choose some stations. In this case stations within 2 140 | % % degrees of the equator 141 | % stations = g.station_search('LatLong', [-2 2 -180 180]); 142 | % 143 | % % Histogram for all windspeeds 144 | % [N, entries] = g.counts(stations, 'wspd'); 145 | % 146 | % % Define custom bin_edges 147 | % bin_edges = 0:1:80; 148 | % 149 | % % Do counts for windspeed filtered on geopotential 150 | % % altitude between 20 and 30 km and with custom bin edges 151 | % [N, entries] = g.counts(stations, 'wspd', 'FilterFields', {'gph'}, ... 152 | % 'FilterRanges', {[20 30]}, 'Edges', bin_edges); 153 | % 154 | % % Now additionally filter on measurements taken in August 155 | % % between 4 and 10 am 156 | % [N, entries] = g.counts(stations, 'wspd', 'FilterFields', {'gph', 'month', 'hour'}, ... 157 | % 'FilterRanges', {[0 8], 8, [4 10]}, 'Edges', bin_edges); 158 | 159 | % Return on empty stations 160 | if isempty(stations) 161 | warning('Stations is empty, cannot count') 162 | N = []; entries = []; 163 | return 164 | end 165 | 166 | % Parse Name/Value pairs from input 167 | p = inputParser; 168 | addOptional(p, 'Edges', GCSAL.GCSAL.default_bin_edges(fld)); 169 | addOptional(p, 'FilterFields', {}); 170 | addOptional(p, 'FilterRanges', {}); 171 | addOptional(p, 'Plot', []); 172 | addOptional(p, 'Verbose', []); 173 | parse(p, varargin{:}); 174 | 175 | % Rename for convenience 176 | edges = p.Results.Edges; 177 | fltr_flds = p.Results.FilterFields; 178 | fltr_ranges = p.Results.FilterRanges; 179 | obj.plot_mode = (~ismember('Plot', p.UsingDefaults) && ... 180 | (p.Results.Plot == true)) || ... 181 | ismember('Plot', p.UsingDefaults); 182 | obj.quiet_mode = (~ismember('Verbose', p.UsingDefaults) && ... 183 | (p.Results.Verbose == false)); 184 | 185 | % Load data from stations and all fields 186 | entries = obj.query(stations, fld, fltr_flds, fltr_ranges); 187 | 188 | % Get counts 189 | [N] = GCSAL.GCSAL.histcounts(entries, fld, edges, obj.quiet_mode); 190 | [pdf, cdf] = GCSAL.GCSAL.counts2pdf(N, 2); 191 | 192 | % calculate bin centers from edges 193 | centers = GCSAL.GCSAL.get_bin_centers(edges); 194 | 195 | % Extract parameter definitions 196 | def = obj.find_def(fld); 197 | 198 | % Construct labels from definitions 199 | label = GCSAL.GCSAL.get_label(def); 200 | 201 | % Get string describing current filters in place for use in 202 | % title 203 | title_str = GCSAL.GCSAL.description_from_filters(fltr_flds, fltr_ranges); 204 | 205 | if (obj.plot_mode) 206 | % Make figure 207 | figure; 208 | 209 | % Plot histogram 210 | subplot(3,1,1) 211 | histogram(vertcat(entries.(fld)), edges) 212 | title(sprintf('Histogram\n%s', title_str)) 213 | xlabel(label) 214 | ylabel('# of occurences') 215 | 216 | % Plot probability density function 217 | subplot(3,1,2) 218 | plot(centers, pdf, '-x') 219 | title(sprintf('Probability Density Function\n%s', title_str)) 220 | 221 | xlabel(label) 222 | ylabel('Probability of occuring') 223 | 224 | % Plot cumulative density funciton 225 | subplot(3,1,3) 226 | plot(centers, cdf, '-x') 227 | title(sprintf('Cumulative Density Function\n%s', title_str)) 228 | 229 | xlabel(label) 230 | ylabel('Probability of exceeding') 231 | end 232 | 233 | obj.plot_mode = true; 234 | obj.quiet_mode = false; 235 | end 236 | 237 | function [N, entries, stats] = counts2(obj, stations, x_fld, y_fld, varargin) 238 | % [N, entries] = counts2(obj, stations, x_fld, y_fld, varargin) 239 | % Returns two dimensional counts from histcounts2 and makes 240 | % plots showing two dimensional probability and cumulative 241 | % density functions comparing data in x_fld and y_fld of 242 | % stations. 243 | % 244 | % Additional optional parameters can be given as Name/Value 245 | % pairs. See examples below. 246 | % 247 | % 'XEdges', 'YEdges' defines the edges of the bins. 248 | % 'FilterFields' and 'FilterRanges' together can be used to 249 | % filter the data to a subset where the parameter in 250 | % FilterFields matches the range in FilterRanges. 251 | % 252 | % Example: 253 | % % Create GCSAL object 254 | % g = GCSAL.GCSAL('gcsal.h5.info.mat'); 255 | % 256 | % % Choose some stations. In this case stations within 2 257 | % % degrees of the equator 258 | % stations = g.station_search('LatLong', [-2 2 -180 180]); 259 | % 260 | % % Do counts between gph and wspd 261 | % [N, entries] = g.counts2(stations, 'gph', 'wspd'); 262 | % 263 | % % Do counts between gph and pressure with custom bin 264 | % % edges 265 | % [N, entries] = g.counts2(stations, 'gph', 'press', ... 266 | % 'XEdges', 0:0.5:40, 'YEdges', 0:2000:100000); 267 | % 268 | % % Do counts for data measured between 6 and 10 am in August 269 | % [N, entries] = g.counts2(stations, 'gph', 'wspd', ... 270 | % 'FilterFields', {'hour', 'month'}, ... 271 | % 'FilterRanges', {[6 10], [8 8]}); 272 | 273 | % Return on empty stations 274 | if isempty(stations) 275 | warning('Stations is empty, cannot count') 276 | N = []; entries = []; 277 | return 278 | end 279 | 280 | % Parse Name/Value pairs from input 281 | p = inputParser; 282 | addOptional(p, 'XEdges', GCSAL.GCSAL.default_bin_edges(x_fld)); 283 | addOptional(p, 'YEdges', GCSAL.GCSAL.default_bin_edges(y_fld)); 284 | addOptional(p, 'FilterFields', {}); 285 | addOptional(p, 'FilterRanges', {}); 286 | addOptional(p, 'Plot', []); 287 | addOptional(p, 'Verbose', []); 288 | parse(p, varargin{:}); 289 | 290 | % Rename for convenience 291 | x_edges = p.Results.XEdges; 292 | y_edges = p.Results.YEdges; 293 | fltr_flds = p.Results.FilterFields; 294 | fltr_ranges = p.Results.FilterRanges; 295 | obj.plot_mode = (~ismember('Plot', p.UsingDefaults) && ... 296 | (p.Results.Plot == true)) || ... 297 | ismember('Plot', p.UsingDefaults); 298 | obj.quiet_mode = (~ismember('Verbose', p.UsingDefaults) && ... 299 | (p.Results.Verbose == false)); 300 | 301 | % Load data from stations and all fields 302 | entries = obj.query(stations, {x_fld, y_fld}, fltr_flds, fltr_ranges); 303 | 304 | % Get counts 305 | [N] = GCSAL.GCSAL.histcounts2(entries, x_fld, y_fld, ... 306 | x_edges, y_edges, obj.quiet_mode); 307 | [pdf, cdf] = GCSAL.GCSAL.counts2pdf(N, 2); 308 | pdf = pdf'; 309 | cdf = cdf'; 310 | 311 | % calculate bin centers from edges 312 | x_centers = GCSAL.GCSAL.get_bin_centers(x_edges); 313 | y_centers = GCSAL.GCSAL.get_bin_centers(y_edges); 314 | 315 | % Extract parameter definitions 316 | x_def = obj.find_def(x_fld); 317 | y_def = obj.find_def(y_fld); 318 | 319 | % Construct labels from definitions 320 | x_label = GCSAL.GCSAL.get_label(x_def); 321 | y_label = GCSAL.GCSAL.get_label(y_def); 322 | 323 | stats.x = x_centers; 324 | stats.y = y_centers; 325 | stats.cdf = cdf; 326 | stats.pdf = pdf; 327 | 328 | if (obj.plot_mode) 329 | % Make figure 330 | figure; 331 | 332 | % subplot for contour of cumulative density function 333 | subplot(3,1,1) 334 | [C, h] = contourf(x_centers, y_centers, cdf, ... 335 | [0.05:0.05:0.95 0.99]); 336 | clabel(C,h, 0.1:0.2:0.9, 'LabelSpacing', 600, 'FontSize', 18); 337 | xlabel(x_label) 338 | ylabel(y_label) 339 | title('Percentile by Altitude') 340 | 341 | % subplot for surf of probability density function 342 | subplot(3,1,2) 343 | surf(x_centers, y_centers, pdf, 'LineStyle', 'None'); 344 | view([0 0 1]) 345 | xlabel(x_label) 346 | ylabel(y_label) 347 | title('Probability Density Function by Altitude') 348 | h_colorbar = colorbar; 349 | ylabel(h_colorbar, 'probability') 350 | 351 | % subplot for # of samples 352 | subplot(3,1,3) 353 | plot(x_centers, sum(N,2)/1000, '-x'); 354 | xlabel(x_label) 355 | ylabel('Thousands of Counts') 356 | title('Sample size') 357 | end 358 | 359 | obj.plot_mode = true; 360 | obj.quiet_mode = false; 361 | end 362 | 363 | function [N, entries] = countsN(obj, stations, resolutions, varargin) 364 | % [N, entries] = countsN(obj, stations, resolutions, varargin) 365 | % Returns N dimensional counts similar to histcounts but in 366 | % N dimensions. Counts are performed based on the fields in 367 | % the struct array resolutions. Each struct in resolutions 368 | % must contain a variable name in the field 'fld' and 369 | % optionally may have bin edges specified in the field 370 | % 'edges'. 371 | % 372 | % Additional optional parameters can be given as Name/Value 373 | % pairs. See examples below. 374 | % 375 | % 'FilterFields' and 'FilterRanges' together can be used to 376 | % filter the data to a subset where the parameter in 377 | % FilterFields matches the range in FilterRanges. 378 | % 379 | % Examples: 380 | % % Get some stations 381 | % stations = g.station_search('Countries', 'Brazil'); 382 | % 383 | % % Make 5-dimensional count matrix with default bin edges and no filtering 384 | % resolutions = struct('fld', {'lat', 'lon', 'gph', 'month', 'wspd'}); 385 | % N = g.countsN(stations, resolutions); 386 | % 387 | % % Add custom bin edges to gph field and limit data to only data between 6 388 | % % and 10 am 389 | % resolutions(3).edges = 0:1:80; 390 | % N = g.countsN(stations, resolutions, 'FilterFields', {'hour'}, ... 391 | % 'FilterRanges', [6 10]); 392 | 393 | 394 | % Return on empty stations 395 | if isempty(stations) 396 | warning('Stations is empty, cannot count') 397 | N = []; entries = []; 398 | return 399 | end 400 | 401 | % Parse Name/Value pairs from input 402 | p = inputParser; 403 | addOptional(p, 'FilterFields', {}); 404 | addOptional(p, 'FilterRanges', {}); 405 | addOptional(p, 'Plot', []); 406 | addOptional(p, 'Verbose', []); 407 | parse(p, varargin{:}); 408 | 409 | % Rename for convenience 410 | fltr_flds = p.Results.FilterFields; 411 | fltr_ranges = p.Results.FilterRanges; 412 | obj.plot_mode = (~ismember('Plot', p.UsingDefaults) && ... 413 | (p.Results.Plot == true)) || ... 414 | ismember('Plot', p.UsingDefaults); 415 | obj.quiet_mode = (~ismember('Verbose', p.UsingDefaults) && ... 416 | (p.Results.Verbose == false)); 417 | 418 | % fill in edges 419 | for i = 1:length(resolutions) 420 | if ~isfield(resolutions(i), 'edges') || isempty(resolutions(i).edges) 421 | resolutions(i).edges = GCSAL.GCSAL.default_bin_edges(resolutions(i).fld); 422 | end 423 | end 424 | 425 | flds = {resolutions.fld}; 426 | % edges = {resolutions.edges}; 427 | 428 | % Load data from stations and all fields 429 | entries = obj.query(stations, flds, fltr_flds, fltr_ranges); 430 | 431 | % Get counts 432 | [N] = GCSAL.GCSAL.histcountsN(entries, resolutions, obj.quiet_mode); 433 | 434 | obj.plot_mode = true; 435 | obj.quiet_mode = false; 436 | end 437 | 438 | function entries = query(obj, stations, params, fltr_flds, fltr_rngs) 439 | % entries = query(obj, station_ids, params, fltr_flds, fltr_rngs) 440 | % Returns a struct array with each element containing the 441 | % data for params for a station in stations. 442 | % 443 | % Finds the data either by reading from the H5 file located 444 | % at obj.h5_fname or by findind the data cached in 445 | % obj.entries or obj.headers. 446 | % 447 | % INPUTS 448 | % stations - A string, cell array of strings of struct array 449 | % containing the station ids from which to load 450 | % data 451 | % params - params can be a string or cell array of strings. 452 | % params can be either from the entry data or 453 | % header data but at least one string in params 454 | % must be from entry data 455 | % fltr_flds - (optional) cell array of filtering parameter names 456 | % fltr_rngs - (optional) cell array of filtering ranges 457 | % 458 | % For a list of available by params see g.defs.header.params 459 | % and g.defs.entries.params 460 | % 461 | % If params or staiton_ids is empty, returns an empty struct 462 | % 463 | % Examples: 464 | % % Create GCSAL object 465 | % g = GCSAL.GCSAL('gcsal.h5.info.mat'); 466 | % 467 | % % Get stations located in Botswana 468 | % stations = g.station_search('Countries', {'Botswana'}); 469 | % 470 | % % Get all geopotential height and windspeed data as well 471 | % % as hour, month, and year data 472 | % entries1 = g.query(stations, {'gph', 'wspd', 'hour', 'month', 'year'}); 473 | % 474 | % % Plot distribution of hours and years for the data in entries1 475 | % figure; histogram(vertcat(entries1.hour)) 476 | % figure; histogram(vertcat(entries1.year)) 477 | % 478 | % % Get gph and wspd data measured between 6 and 4 pm 479 | % entries2 = g.query(stations, {'gph', 'wspd'}, 'hour', [6 16]); 480 | % 481 | % % Plot distribution of hours for the data in entries2 482 | % figure; histogram(vertcat(entries2.hour)) 483 | % 484 | % % Get data corresponding only to measuresments taken 485 | % % in August between 4am and Noon and in the years 1990 486 | % % to 1999 487 | % entries3 = g.query(stations, {'gph', 'wspd'}, ... 488 | % {'month', 'hour', 'year'}, ... 489 | % {8, [4 12], [1990 1999]}); 490 | % 491 | % % Plot distribution of years for the data in entries3 492 | % figure; histogram(vertcat(entries3.year)) 493 | 494 | 495 | % Return empty struct array if no stations ids or params given 496 | if isempty(stations) || isempty(params) 497 | entries = struct([]); 498 | return 499 | end 500 | 501 | % Set filter parameters to empty cell arrays if not given 502 | if ~exist('fltr_flds', 'var') 503 | fltr_flds = {}; 504 | end 505 | 506 | if ~exist('fltr_rngs', 'var') 507 | fltr_rngs = {}; 508 | end 509 | 510 | % Ensure filter parameters are cell arrays 511 | fltr_flds = cellstr(fltr_flds); 512 | if ~iscell(fltr_rngs) 513 | fltr_rngs = {fltr_rngs}; 514 | end 515 | 516 | % Error check varargin was input in pairs 517 | if length(fltr_flds) ~= length(fltr_rngs) 518 | error('Expected length of filter_flds and filter_ranges to match') 519 | end 520 | 521 | % Add all restriction range fields to params since they need to 522 | % be loaded as well 523 | params = unique([params fltr_flds]); 524 | 525 | % Get all header and entries parameters 526 | all_header_params = fieldnames(obj.defs.header.params); 527 | all_entries_params = fieldnames(obj.defs.entries.params); 528 | 529 | % Verify all params can be found in either header or entries 530 | found = ismember(params, [all_header_params; all_entries_params]); 531 | if any(~found) 532 | msg = sprintf(' %s\n', params{~found}); 533 | error('The following params were invalid: %s\n', msg) 534 | end 535 | 536 | % Determine whether each param is part of the header data or 537 | % entry data 538 | curr_header_params = intersect(params, all_header_params); 539 | curr_entries_params = intersect(params, all_entries_params); 540 | 541 | % Make sure at least one param is in entries group 542 | if isempty(curr_entries_params) 543 | error('At least one param must be an entry') 544 | end 545 | 546 | %Ensure stations is a character array 547 | station_ids = GCSAL.GCSAL.station_id_str(stations); 548 | 549 | % Load data from entries 550 | entries = obj.load_from_stations('entries', station_ids, curr_entries_params); 551 | 552 | % Add data from headers 553 | entries = obj.add_header_params_to_entries(entries, curr_header_params); 554 | 555 | % Filter data according to range limits 556 | if (~obj.quiet_mode) 557 | tic; 558 | fprintf('Applying filters... '); 559 | end 560 | for i = 1:length(fltr_rngs) 561 | entries = GCSAL.GCSAL.filter_data_by_range(entries, fltr_flds{i}, fltr_rngs{i}); 562 | end 563 | if (~obj.quiet_mode) 564 | fprintf('Complete in %.1f seconds\n', toc); 565 | end 566 | 567 | % Clear entries if nargout is 0 so we don't use any RAM in the 568 | % base workspace for "ans" 569 | if nargout == 0 570 | entries = []; 571 | end 572 | end 573 | 574 | function stations_match = station_search(obj, varargin) 575 | % stations_match = station_search(obj, varargin) 576 | % Search for stations by Latitude and Longitude, or by 577 | % Country, or by name. Search criteria are given in 578 | % Name/Value pairs with the key words 'LatLong, 'Countries', 579 | % and "IDRegex". If multiple criteria are given they are 580 | % combined with AND. 581 | % 582 | % Examples: 583 | % % Create GCSAL object 584 | % g = GCSAL.GCSAL('gcsal.h5.info.mat'); 585 | % 586 | % % Find stations within 25 degrees of the equator 587 | % stations1 = g.station_search('LatLong', [-25 25 -180 180]); 588 | % 589 | % % Find stations in Brazil or India and within 25 degrees of 590 | % % the equator 591 | % stations2 = g.station_search('Countries', {'Brazil', 'India'}, ... 592 | % 'LatLong', [-25 25 -180 180]); 593 | % 594 | % % Find stations with IDs beginnign with the letter A. 595 | % % Note that in regex ^ means beginning of the line 596 | % stations3 = g.station_search('IDRegex', '^/A'); 597 | % 598 | % % Find stations in Brazil or India AND within 25 degrees of 599 | % % the equator AND with station IDs ending in 5 600 | % % Note that in regex $ means end of the line 601 | % stations4 = g.station_search('Countries', {'Brazil', 'India'}, ... 602 | % 'LatLong', [-25 25 -180 180], ... 603 | % 'IDRegex', '5$'); 604 | 605 | % Parse varargin 606 | p = inputParser; 607 | addOptional(p, 'Countries', []); 608 | addOptional(p, 'IDRegex', []); 609 | addOptional(p, 'Lat', []); 610 | addOptional(p, 'LatLong', []); 611 | addOptional(p, 'Nearest', []); 612 | addOptional(p, 'Number', []); 613 | addOptional(p, 'Range', []); 614 | addOptional(p, 'Plot', []); 615 | addOptional(p, 'Verbose', []); 616 | parse(p, varargin{:}); 617 | 618 | obj.plot_mode = (~ismember('Plot', p.UsingDefaults) && ... 619 | (p.Results.Plot == true)) || ... 620 | ismember('Plot', p.UsingDefaults); 621 | obj.quiet_mode = (~ismember('Verbose', p.UsingDefaults) && ... 622 | (p.Results.Verbose == false)); 623 | 624 | % Plot the map of world with all stations marked 625 | figure; hold all; 626 | obj.plot_world_map(); 627 | 628 | % Initialize station_ids_match to all station ids 629 | ids_match = GCSAL.GCSAL.station_id_str(obj.stations); 630 | Lmax = length(ids_match); 631 | 632 | % If Nearest specified: 633 | if ~ismember('Nearest', p.UsingDefaults) 634 | 635 | % Find stations in lat/long range 636 | num = uint16(p.Results.Number); 637 | if (isempty(num) || (num < 1)) 638 | num = 1; 639 | elseif (num > Lmax) 640 | num = Lmax; 641 | end 642 | [stations_nearest, arclen] = ... 643 | obj.stations_near_latlong(p.Results.Nearest, num); 644 | 645 | % Report # stations found 646 | L = length(stations_nearest); 647 | if (~obj.quiet_mode) 648 | fprintf('%d stations found near lat/long\n', L) 649 | end 650 | 651 | % Find intersect of stations found so far and currently 652 | % found stations 653 | curr_ids = GCSAL.GCSAL.station_id_str(stations_nearest); 654 | ids_match = intersect(ids_match, curr_ids, 'rows'); 655 | end 656 | 657 | % If LatLong specified: 658 | if (~ismember('LatLong', p.UsingDefaults) || ... 659 | ~ismember('Lat', p.UsingDefaults)) 660 | 661 | if ~ismember('Lat', p.UsingDefaults) 662 | % Find stations in lat/long range 663 | range = single(p.Results.Range); 664 | if (isempty(range) || (range < 1.2)) 665 | range = 1.2; 666 | end 667 | % Find stations in lat/long range 668 | lat = p.Results.Lat(1); 669 | lon = p.Results.Lat(2); 670 | box = [(lat - range) (lat + range) lon lon]; 671 | else 672 | box = p.Results.LatLong; 673 | end 674 | 675 | % Find stations in lat/long range 676 | [stations_in_range, latbox, longbox] = ... 677 | obj.stations_from_latlong(box); 678 | 679 | % Report # stations found 680 | L = length(stations_in_range); 681 | if (~obj.quiet_mode) 682 | fprintf('%d stations found in lat/long range\n', L) 683 | end 684 | 685 | % Find intersect of stations found so far and currently 686 | % found stations 687 | curr_ids = GCSAL.GCSAL.station_id_str(stations_in_range); 688 | ids_match = intersect(ids_match, curr_ids, 'rows'); 689 | 690 | % Highlight lat/long search box 691 | plot(longbox, latbox, 'b-', 'LineWidth', 2) 692 | end 693 | 694 | % If Countries specified: 695 | if ~ismember('Countries', p.UsingDefaults) 696 | 697 | % Find stations in countries 698 | [stations_in_countries, countries_match] = ... 699 | obj.stations_from_countries(p.Results.Countries); 700 | 701 | % Report # stations found 702 | L = length(stations_in_countries); 703 | if (~obj.quiet_mode) 704 | fprintf('%d stations found in countries\n', L) 705 | end 706 | 707 | % Find intersect of stations found so far and currently 708 | % found stations 709 | curr_ids = GCSAL.GCSAL.station_id_str(stations_in_countries); 710 | ids_match = intersect(ids_match, curr_ids, 'rows'); 711 | 712 | % Highlight border of countries searched 713 | for i = 1:length(countries_match) 714 | plot(countries_match(i).Lon, countries_match(i).Lat, ... 715 | 'b-', 'linewidth', 2) 716 | end 717 | end 718 | 719 | % IfIDRegex specified: 720 | if ~ismember('IDRegex', p.UsingDefaults) 721 | 722 | % Found stations matching IDRegex 723 | stations_from_regex = obj.stations_from_regex(p.Results.IDRegex); 724 | 725 | % Report # stations found 726 | L = length(stations_from_regex); 727 | if (~obj.quiet_mode) 728 | fprintf('%d stations found matching search_str\n', L) 729 | end 730 | 731 | % Find intersect of stations found so far and currently 732 | % found stations 733 | curr_ids = GCSAL.GCSAL.station_id_str(stations_from_regex); 734 | ids_match = intersect(ids_match, curr_ids, 'rows'); 735 | 736 | % Highlight stations matching IDRegex 737 | GCSAL.GCSAL.plot_stations(stations_from_regex, ... 738 | 'bo', 'MarkerSize', 6); 739 | 740 | end 741 | 742 | % Convert station ids to stations struct array 743 | stations_match = obj.find_stations(ids_match); 744 | 745 | % Report # stations found 746 | if (~obj.quiet_mode) 747 | fprintf('%d stations found combined\n', length(stations_match)) 748 | end 749 | 750 | % Highlight stations found 751 | GCSAL.GCSAL.plot_stations(stations_match, 'r+'); 752 | 753 | if ~ismember('Nearest', p.UsingDefaults) 754 | for i = 1:num 755 | stations_match(i).arclen = arclen(i); 756 | end 757 | end 758 | 759 | obj.plot_mode = true; 760 | obj.quiet_mode = false; 761 | end 762 | 763 | function plot_world_map(obj, include_stations) 764 | % plot_world_map(obj, include_stations) 765 | % Plots a map of the world based on the country borders in 766 | % obj.countries. If include_stations is true then will also 767 | % put a mark for each station in obj.stations. If 768 | % include_stations is not given then it defaults to true. 769 | 770 | 771 | % Set default value 772 | if ~exist('include_stations', 'var') 773 | include_stations = true; 774 | end 775 | 776 | % Plot world map 777 | GCSAL.Map.world_map(obj.countries); 778 | 779 | % Plot stations 780 | if include_stations 781 | obj.plot_stations(obj.stations, 'k.'); 782 | end 783 | end 784 | 785 | function country_matches = find_countries(obj, country_names) 786 | % country_matches = find_countries(obj, country_names) 787 | % Returns a struct array corresponding to elements of 788 | % obj.countries with a name matching any string in 789 | % country_names. country_names can be a string or cell array 790 | % of strings. Ignores case. 791 | 792 | % Ensure country_names is lower case 793 | country_names = lower(country_names); 794 | 795 | % Get all names in countries and ensure lower case 796 | all_countries = lower({obj.countries.name}); 797 | 798 | % Find matches 799 | i_country_match = GCSAL.GCSAL.find_keys(all_countries, country_names); 800 | 801 | % index into countries 802 | country_matches = obj.countries(i_country_match); 803 | end 804 | 805 | function station_matches = find_stations(obj, station_ids) 806 | % station_matches = find_stations(obj, station_ids) 807 | % Returns a struct array corresponding to elements of 808 | % obj.stations with an id matching any string in 809 | % station_ids. station_ids can be a string or cell array 810 | % of strings. 811 | 812 | % Get all station ids in obj.stations 813 | all_station_ids = GCSAL.GCSAL.station_id_str(obj.stations); 814 | 815 | % Find matches 816 | i_station_match = GCSAL.GCSAL.find_keys(all_station_ids, station_ids); 817 | 818 | % Index into stations 819 | station_matches = obj.stations(i_station_match); 820 | 821 | end 822 | 823 | function [header_matches, i_header_match] = find_headers(obj, stations) 824 | % header_matches = find_headers(obj, ids) 825 | % Returns a struct array corresponding to elements of 826 | % obj.headers with an id matching any string in 827 | % ids. ids can be a string or cell array 828 | % of strings. 829 | 830 | % Get all header ids in obj.headers 831 | all_ids = GCSAL.GCSAL.station_id_str(obj.headers); 832 | 833 | station_ids = GCSAL.GCSAL.station_id_str(stations); 834 | % Find matches 835 | i_header_match = GCSAL.GCSAL.find_keys(all_ids, station_ids); 836 | 837 | % Index into headers 838 | header_matches = obj.headers(i_header_match); 839 | end 840 | 841 | function def = find_def(obj, varname) 842 | % def = find_def(obj, varname) 843 | % Searches through all parameters in obj.defs and returns the 844 | % struct whose name matches varname 845 | 846 | % Loop through all groups in obj.defs 847 | groups = fieldnames(obj.defs); 848 | for i = 1:length(groups) 849 | 850 | % Pull out the parameter names in param 851 | param_names = fieldnames(obj.defs.(groups{i}).params); 852 | 853 | % Use ismember to search for a match 854 | [~, idx ] = ismember(varname, param_names); 855 | 856 | % If a match is found return 857 | if idx 858 | def = obj.defs.(groups{i}).params.(varname); 859 | return 860 | end 861 | end 862 | 863 | % If we got here without hitting a return, no match was ever found 864 | error('Could not find defition for %s', varname) 865 | end 866 | 867 | function clear_entries_cache(obj) 868 | % clears the cached data in obj.entries. Do this if you are 869 | % running out of RAM 870 | 871 | obj.entries = struct(); 872 | end 873 | 874 | end 875 | 876 | methods (Access = 'private') 877 | 878 | function entries = add_header_params_to_entries(obj, entries, params) 879 | % entries = add_header_params_to_entries(obj, entries, flds) 880 | % For each entry in entries and each param in params, adds 881 | % the data in header.(param) to entry.(param). The header is 882 | % found based on matching station id. The data in 883 | % header.(param) is expanded to match the length of data in 884 | % entry based on a correspondence index. 885 | % 886 | % If params is not given, then all params in header will be 887 | % added. 888 | 889 | 890 | % If params is empty, then there is nothing to add, just return 891 | if isempty(params) 892 | return 893 | end 894 | 895 | 896 | % Check if params was an input 897 | if ~exist('params', 'var') 898 | % Params not given so add all params 899 | add_all_params = true; 900 | else 901 | % Params specific so do not add all params 902 | add_all_params = false; 903 | 904 | % Ensure params is a cell array 905 | params = cellstr(params); 906 | end 907 | 908 | % Find indices of headers that match station id with entries 909 | [~, i_header_match] = obj.find_headers(GCSAL.GCSAL.station_id_str(entries)); 910 | 911 | % Convert logical array to indices 912 | i_header_match = find(i_header_match); 913 | 914 | % Add entry_idx to all headers that match entries 915 | obj.add_entry_idx_to_headers(i_header_match); %#ok 916 | 917 | % Determine whether to do waitbar 918 | L = length(entries); 919 | do_waitbar = (L > 1) && ~obj.quiet_mode; 920 | if do_waitbar 921 | h = waitbar(0, 'Adding header fields to entries'); 922 | end 923 | 924 | if (~obj.quiet_mode) 925 | tic; 926 | fprintf('Adding header fields to entries... '); 927 | end 928 | 929 | % Loop through stations 930 | for i = 1:L 931 | 932 | % Get header for current station 933 | header = obj.find_headers(entries(i).id); 934 | 935 | % If add_all_params set params to all fields in header 936 | if add_all_params 937 | params = fieldnames(header); 938 | end 939 | 940 | % Loop through params 941 | for j = 1:length(params) 942 | 943 | % Get the current field name from params 944 | fld = params{j}; 945 | 946 | % Try reading from cached 947 | val = obj.read_from_cached_entries(header.id, fld); 948 | 949 | % If not found in cache, read from header 950 | if isempty(val) 951 | 952 | % Get the data in header at fld 953 | val = header.(fld); 954 | 955 | 956 | if size(val, 1) == 1 957 | % If val is a single row we need to duplicate it to the 958 | % size of the entry data 959 | val = repmat(val, length(header.entry_idx), 1); 960 | else 961 | % Apply entry_idx to val to expand val data to 962 | % match length of entry data with correct 963 | % correspondence 964 | val = val(header.entry_idx); 965 | end 966 | 967 | % Set entry data to val 968 | obj.cache_param(header.id, fld, val); 969 | 970 | end 971 | 972 | % add data to entries for return struct array 973 | entries(i).(fld) = val; 974 | 975 | end 976 | 977 | % Update waitbar 978 | if do_waitbar && mod(i, ceil(L/50)) == 0 979 | msg = sprintf('%d/%d: Adding header fields to entries for %s', i, L, header.id); 980 | waitbar(i/L, h, msg); 981 | end 982 | end 983 | 984 | if (~obj.quiet_mode) 985 | fprintf('Complete in %.1f seconds\n', toc); 986 | end 987 | 988 | % Close waitbar 989 | if do_waitbar 990 | close(h); 991 | end 992 | 993 | end 994 | 995 | function [stations_nearest, arclen] = ... 996 | stations_near_latlong(obj, latlonposn, n) 997 | % [stations_nearest, arclen] = ... 998 | % stations_near_latlong(obj, latlongrange) 999 | % Returns an array of n-station structs for stations that 1000 | % are nearest the queried position. 1001 | % Additionally returns an array of arclength distances in meters 1002 | % 1003 | % latlonposn must be a 2 element vector and is 1004 | % in degrees. Example: 1005 | % latlonposn = [9.999924 -84.205753] 1006 | % 1007 | 1008 | % Return on empty input 1009 | if isempty(latlonposn) 1010 | stations_nearest = struct(); 1011 | return 1012 | end 1013 | 1014 | % error check lat/long range 1015 | if length(latlonposn) ~= 2 1016 | error('Lat Lon Position must be a 2 element vector') 1017 | end 1018 | 1019 | % Find stations in range as well as getting lat/long vectors 1020 | % for plotting the search box 1021 | [stations_nearest, arclen] = GCSAL.Map.find_nearest(... 1022 | obj.stations, latlonposn(1), latlonposn(2), n); 1023 | 1024 | end 1025 | 1026 | function [stations_in_range, latbox, longbox] = ... 1027 | stations_from_latlong(obj, latlongrange) 1028 | % [stations_in_range, latbox, longbox] = ... 1029 | % stations_from_latlong(obj, latlongrange) 1030 | % Returns an array of station structs for stations that are 1031 | % located within the box defined by latlongrange. 1032 | % 1033 | % Additionally returns longbox and latbox which can be used 1034 | % to plot the the searchbox that was used. 1035 | % 1036 | % latlongrange must be a four element vector and is 1037 | % in degrees. Example: 1038 | % latlongrange = [-45 45 -180 180] 1039 | % would find all stations between -45 and 45 deg latitude 1040 | % 1041 | % latlongrange does account for angle wrap around. Example: 1042 | % latlongrange = [45 -45 -180 180] 1043 | % would finda all stations with latitude above 45 deg or 1044 | % below -45. 1045 | 1046 | % Return on empty input 1047 | if isempty(latlongrange) 1048 | stations_in_range = struct(); 1049 | return 1050 | end 1051 | 1052 | % error check lat/long range 1053 | if length(latlongrange) ~= 4 1054 | error('latlongrange must be a 4 element vector') 1055 | end 1056 | 1057 | % Find stations in range as well as getting lat/long vectors 1058 | % for plotting the search box 1059 | [stations_in_range, latbox, longbox] = GCSAL.Map.find_in_lat_long_range(... 1060 | obj.stations, latlongrange(1:2), latlongrange(3:4)); 1061 | 1062 | end 1063 | 1064 | function [stations_in_countries, countries_match] = ... 1065 | stations_from_countries(obj, country_names) 1066 | % [stations_in_countries, countries_match] = stations_from_countries(obj, country_names) 1067 | % Returns an array of station structs for stations that 1068 | % are located within the countries listed in country_names. 1069 | % Additionally returns a struct array for countries that 1070 | % match country_names 1071 | 1072 | % Get struct array of countries from countries matching 1073 | % country_names 1074 | countries_match = obj.find_countries(country_names); 1075 | 1076 | % Get list of all stations in matching countries 1077 | station_ids = vertcat(countries_match.stations); 1078 | 1079 | % In case where no stations or countries were found ensure 1080 | % station_ids is an empyt string 1081 | if isempty(station_ids); station_ids = ''; end 1082 | 1083 | % Convert station ids to stations struct array 1084 | stations_in_countries = obj.find_stations(station_ids); 1085 | 1086 | end 1087 | 1088 | function station_matches = stations_from_regex(obj, search_str) 1089 | % station_matches = find_stations_regex(obj, search_str) 1090 | % Returns an array of station structs for stations whose ids 1091 | % match the regex pattern in search_str 1092 | 1093 | % Get all station ids in obj.stations 1094 | all_station_ids = GCSAL.GCSAL.station_id_str(obj.stations); 1095 | 1096 | % Convert to cell array 1097 | all_station_ids = cellstr(all_station_ids); 1098 | 1099 | % Call regexp 1100 | regex_out = regexp(all_station_ids, search_str); 1101 | 1102 | % Use cellfun to find which elements in all_station_ids had a 1103 | % match 1104 | i_station_match = ~cellfun(@isempty, regex_out); 1105 | 1106 | % Index into stations 1107 | station_matches = obj.stations(i_station_match); 1108 | 1109 | end 1110 | 1111 | function headers = load_all_headers(obj) 1112 | % Find all headers in the h5_info struct and load the data from 1113 | % the h5 file for those headers 1114 | 1115 | % Find all station names in h5 info struct 1116 | all_station_ids = {obj.h5_info.Groups.Name}; 1117 | 1118 | % Remove / from beginning of station ids 1119 | all_station_ids(:,1) = []; 1120 | 1121 | % Use empty params to indicate we want to load all parameters 1122 | params = {}; 1123 | headers = obj.load_from_stations('header', all_station_ids, params); 1124 | end 1125 | 1126 | 1127 | 1128 | 1129 | function out = load_from_stations(obj, group, station_ids, params) 1130 | % Load the parameters listed in params from the data in group from 1131 | % the H5 file for all stations in station_ids. 1132 | % 1133 | % If params is empty then all parameters will be loaded 1134 | 1135 | % Set default params to empty cell which will revert to loading 1136 | % all parameters 1137 | if ~exist('params', 'var') 1138 | params = {}; 1139 | end 1140 | 1141 | % Handle case where station_ids are empty 1142 | if isempty(station_ids) 1143 | out = []; 1144 | return 1145 | end 1146 | 1147 | % Ensure station_ids is a cell array 1148 | station_ids = cellstr(station_ids); 1149 | 1150 | % Initialize counter 1151 | count = 1; 1152 | 1153 | % Decide whether to do wait bar 1154 | L = length(station_ids); 1155 | do_waitbar = (L > 1) && ~obj.quiet_mode; 1156 | 1157 | % Open waitbar 1158 | if do_waitbar 1159 | h = waitbar(0, 'Loading data from stations'); 1160 | end 1161 | 1162 | if (~obj.quiet_mode) 1163 | tic; 1164 | fprintf('Loading data from stations... '); 1165 | end 1166 | 1167 | % Loop through all station ids 1168 | for i = 1:L 1169 | 1170 | % Attempt to read group data 1171 | tmp = obj.load_group(group, station_ids{i}, params); 1172 | 1173 | % Assign data to out struct if tmp is not empty 1174 | if ~isempty(tmp) 1175 | % Assign data 1176 | out(count) = tmp; %#ok 1177 | 1178 | % Increment counter 1179 | count = count + 1; 1180 | end 1181 | 1182 | % Update waitbar 1183 | if do_waitbar && mod(i, ceil(L/50)) == 0 1184 | msg = sprintf('%d/%d: Loading data for %s/%s', i, L, station_ids{i}, group); 1185 | waitbar(i/L, h, msg); 1186 | end 1187 | end 1188 | if (~obj.quiet_mode) 1189 | fprintf('Complete in %.1f seconds\n', toc); 1190 | end 1191 | 1192 | % Close wait bar 1193 | if do_waitbar 1194 | close(h) 1195 | end 1196 | 1197 | % If counter never incremented, return empty struct array 1198 | if count == 1 1199 | out = struct([]); 1200 | end 1201 | end 1202 | 1203 | function out = load_group(obj, group, station_id, params) 1204 | % Load the parameters listed in params from the data in group 1205 | % and from the station in station_id from the H5 file 1206 | % 1207 | % If params is empty then all parameters are loaded 1208 | 1209 | 1210 | % Extract parameter definitions for the current group 1211 | param_defs = obj.defs.(group); 1212 | 1213 | % Set default params to all parameters 1214 | if ~exist('params', 'var') || isempty(params) 1215 | params = fieldnames(param_defs.params); 1216 | end 1217 | 1218 | % Initialize output 1219 | out = []; 1220 | 1221 | % Find info for the current station_id in the top level h5_info 1222 | station_info = GCSAL.GCSAL.h5info_find_children(obj.h5_info, station_id); 1223 | 1224 | % If station_info is empty return with warning that station_id 1225 | % was not found 1226 | if isempty(station_info) 1227 | fprintf('%s not found\n', station_id) 1228 | return 1229 | end 1230 | 1231 | % Find group in station_info 1232 | group_info = GCSAL.GCSAL.h5info_find_children(station_info, group); 1233 | 1234 | % Throw error if group not found 1235 | if isempty(group_info) 1236 | error('Group not found: %s', group) 1237 | end 1238 | 1239 | % For each parameter in params, load the param 1240 | for i = 1:length(params) 1241 | curr = param_defs.params.(params{i}); 1242 | out.(curr.varname) = obj.load_param(curr, group_info); 1243 | end 1244 | 1245 | % Add id to struct so that the header data for this struct can 1246 | % be easily found 1247 | if ~isfield(out, 'id') 1248 | out.id = station_id; 1249 | end 1250 | end 1251 | 1252 | 1253 | function data = load_param(obj, param_def, group_info) 1254 | % Load the data corresponding to param_def and group_info. 1255 | % Apply data conversion and function_handle as 1256 | % specified in param_def 1257 | % 1258 | % INPUTS 1259 | % param_def - struct containing varname, type, and 1260 | % function_handle for the parameter to be read 1261 | % group_info - Child struct from h5info call on h5_fname that 1262 | % points to the data of interest 1263 | 1264 | 1265 | % Get station id from group info 1266 | info_for_fileparts = group_info.Name; 1267 | info_for_fileparts = strrep(info_for_fileparts, '/', filesep); 1268 | [id, group] = fileparts(info_for_fileparts); 1269 | id(1) = []; 1270 | 1271 | % Try reading data from cached entries 1272 | data = obj.read_from_cached_entries(id, param_def.varname); 1273 | 1274 | % If data is not empty, it param was found in cached entries so 1275 | % we can return 1276 | if ~isempty(data); return; end 1277 | 1278 | % Find info for the param in group_info based on its varname 1279 | param_info = GCSAL.GCSAL.h5info_find_children(group_info, param_def.varname); 1280 | 1281 | % If param not found, return empty 1282 | if isempty(param_info) 1283 | data = []; 1284 | return 1285 | end 1286 | 1287 | % load the parameter from the H5 file using param_info 1288 | data = GCSAL.H5.load(obj.h5_fname, param_info); 1289 | 1290 | % Parameter is a char, convert uint8 to char 1291 | if strcmp(param_def.type, 'char') 1292 | data = char(data); 1293 | end 1294 | 1295 | % If parameter was returned as a double from H5.load, but is 1296 | % not defined as a double then convert to a single for 1297 | % efficiency 1298 | if isa(data, 'double') && ~strcmp(param_def.type, 'double') 1299 | data = single(data); 1300 | end 1301 | 1302 | % Apply function from parameter definition 1303 | if ~isempty(param_def.function_handle) 1304 | data = param_def.function_handle(data); 1305 | end 1306 | 1307 | % Cache the data in entries 1308 | % Since obj is a pointer (inherits handle class) we can cache 1309 | % the data without returning it 1310 | if ~strcmp(group, 'header') 1311 | obj.cache_param(id, param_def.varname, data); 1312 | end 1313 | 1314 | end 1315 | 1316 | 1317 | function out = read_from_cached_entries(obj, id, param) 1318 | % Read data from cached entries if it exists otherwise return 1319 | % empty vector 1320 | 1321 | out = []; 1322 | if isfield(obj.entries, id) 1323 | if isfield(obj.entries.(id), param) 1324 | out = obj.entries.(id).(param); 1325 | end 1326 | end 1327 | 1328 | end 1329 | 1330 | function cache_param(obj, id, param, value) 1331 | % Keep data in memory in entries struct for fast loading 1332 | % Since obj is a pointer (inherits handle class) we can set the 1333 | % obj.entries without returning it 1334 | 1335 | if obj.do_cache_entries 1336 | obj.entries.(id).(param) = value; 1337 | end 1338 | end 1339 | 1340 | function add_entry_idx_to_headers(obj, i_headers) 1341 | % add_entry_idx_to_headers(obj, i_headers) 1342 | % For the structs in obj.headers(i_headers), add the 1343 | % entry_idx field. This field is an indexing vector for 1344 | % the correspondence between header data and entry data. 1345 | 1346 | 1347 | % Decide whether to do wait bar 1348 | L = length(i_headers); 1349 | do_waitbar = (L > 1) && ~obj.quiet_mode; 1350 | 1351 | % Open waitbar 1352 | if do_waitbar 1353 | h = waitbar(0, 'Calculating header to entry idx'); 1354 | end 1355 | 1356 | % Loop through indices in i_headres 1357 | for i = 1:length(i_headers) 1358 | 1359 | % Extract the current header 1360 | header = obj.headers(i_headers(i)); 1361 | 1362 | % Check if entry_idx has already been added to this header 1363 | if ~isfield(obj.headers, 'entry_idx') || isempty(header.entry_idx) 1364 | 1365 | % Get the entry_idx that corresponds header to entry 1366 | % data 1367 | entry_idx = GCSAL.GCSAL.header_to_entry_idx(header); 1368 | 1369 | % Convert entry_idx to the smallest possible type 1370 | obj.headers(i_headers(i)).entry_idx = GCSAL.IGRA.Param.convert_to_min_int(entry_idx); 1371 | 1372 | % Update waitbar 1373 | if do_waitbar && mod(i, ceil(L/50)) == 0 1374 | msg = sprintf('%d/%d: Calculating header to entry idx for %s', i, L, header.id); 1375 | waitbar(i/L, h, msg); 1376 | end 1377 | end 1378 | end 1379 | 1380 | % Close wait bar 1381 | if do_waitbar 1382 | close(h) 1383 | end 1384 | end 1385 | end 1386 | 1387 | methods (Static) 1388 | 1389 | function entries = filter_data_by_range(entries, range_fld, range) 1390 | % entries = filter_data_by_range(entries, range_fld, range) 1391 | % Filter the data in entries to keep only instances where 1392 | % entries.(range_fld) is in range. 1393 | % 1394 | % range can be a two element vector in which case data can be 1395 | % anywhere between range(1) and range(2) inclusive. Or range 1396 | % can be a scalar in which case data must be exactly equal to 1397 | % range. 1398 | 1399 | 1400 | % Error check on length of range 1401 | if length(range) ~= 1 && length(range) ~=2 1402 | error('range should be length 1 or 2') 1403 | end 1404 | 1405 | % Loop through stations 1406 | L = length(entries); 1407 | for i = 1:L 1408 | 1409 | % Extract the data in range_fld 1410 | val = entries(i).(range_fld); 1411 | 1412 | % Get index vector for values that are in range 1413 | if isscalar(range) 1414 | % If range is a scalar match exactly 1415 | idx = val == range; 1416 | else 1417 | % If range is two element vector match between 1418 | % range(1) and range(2) inclusive 1419 | idx = val >= range(1) & val <= range(2); 1420 | end 1421 | 1422 | % Loop through each parameter in the current entry and apply index 1423 | params = fieldnames(entries(i)); 1424 | for j = 1:length(params) 1425 | 1426 | % Apply index 1427 | if size(entries(i).(params{j}), 1) ~= 1 1428 | entries(i).(params{j}) = entries(i).(params{j})(idx); 1429 | end 1430 | end 1431 | 1432 | end 1433 | 1434 | end 1435 | 1436 | function [counts] = histcounts(entries, fld, edges, quiet_mode) 1437 | % counts = histcounts(entries, edges, x_fld) 1438 | % Pulls the data located in fld for every element 1439 | % in entries and returns the counts in each bin constructed by 1440 | % bin edges 1441 | 1442 | if (quiet_mode) 1443 | tic; 1444 | fprintf('Counting %s...', fld ); 1445 | end 1446 | 1447 | % Force first and last bins to include -/+ inf 1448 | edges(1) = -inf; 1449 | edges(end) = inf; 1450 | 1451 | % Pre-allocate counts matrix with all zeros. There are one 1452 | % fewer bins than edges on each side of bin grid 1453 | Nrows = length(edges)-1; 1454 | counts = zeros(1, Nrows); 1455 | 1456 | % Loop through each entry 1457 | for i = 1:length(entries) 1458 | 1459 | % Extract data for x and y from entries 1460 | x = entries(i).(fld); 1461 | 1462 | % Count x with bins defined by edges and add the 1463 | % result to the existing counts 1464 | counts = counts + histcounts(x, edges); 1465 | 1466 | end 1467 | 1468 | if (quiet_mode) 1469 | fprintf('Complete in %.1f seconds\n', toc); 1470 | end 1471 | 1472 | % The following is a simpler and more vectorized way to do the 1473 | % same as above but surpisingly, testing proved that the above 1474 | % is faster 1475 | % counts = histcounts(vertcat(entries.(fld)), edges); 1476 | 1477 | end 1478 | 1479 | function [counts] = histcounts2(entries, x_fld, y_fld, x_edges, y_edges, quiet_mode) 1480 | % counts = histcounts(entries, x_edges, y_edges, x_fld, y_fld) 1481 | % Pulls the data located in x_fld and y_fld for every element 1482 | % in entries and returns the counts in each bin constructed by 1483 | % bin edges defined in x_edges and y_edges 1484 | 1485 | if (~quiet_mode) 1486 | tic; 1487 | fprintf('Counting %s vs %$s...', x_fld, y_fld ); 1488 | end 1489 | 1490 | % Force first and last bins to include -/+ inf 1491 | x_edges(1) = -inf; 1492 | x_edges(end) = inf; 1493 | y_edges(1) = -inf; 1494 | y_edges(end) = inf; 1495 | 1496 | % Pre-allocate counts matrix with all zeros. There are one 1497 | % fewer bins than edges on each side of bin grid 1498 | Nrows = length(x_edges)-1; 1499 | Ncols = length(y_edges)-1; 1500 | counts = zeros(Nrows, Ncols); 1501 | 1502 | % Loop through each entry 1503 | for i = 1:length(entries) 1504 | 1505 | % Extract data for x and y from entries 1506 | x = entries(i).(x_fld); 1507 | y = entries(i).(y_fld); 1508 | 1509 | % Count x and y in grid made of x/y edges and add the 1510 | % result to the existing counts 1511 | counts = counts + histcounts2(x, y, x_edges, y_edges); 1512 | end 1513 | 1514 | if (~quiet_mode) 1515 | fprintf('Complete in %.1f seconds\n', toc); 1516 | end 1517 | 1518 | % The following is a simpler and more vectorized way to do the 1519 | % same as above but surpisingly, testing proved that the above 1520 | % is faster 1521 | % x = vertcat(entries.(x_fld)); 1522 | % y = vertcat(entries.(y_fld)); 1523 | % counts = histcounts2(x, y, edges); 1524 | 1525 | end 1526 | 1527 | function [counts] = histcountsN(entries, resolutions, quiet_mode) 1528 | % counts = histcounts(entries, edges, x_fld) 1529 | % Pulls the data located in fld for every element 1530 | % in entries and returns the counts in each bin constructed by 1531 | % bin edges 1532 | 1533 | if (quiet_mode) 1534 | tic; 1535 | msg = sprintf(' %s\n', resolutions.fld); 1536 | fprintf('Counting... \n%s', msg) 1537 | end 1538 | 1539 | % Pre-allocate counts matrix with all zeros. There are one 1540 | % fewer bins than edges on each side of bin grid 1541 | N = cell(size(resolutions)); 1542 | for j = 1:length(resolutions) 1543 | N{j} = length(resolutions(j).edges) - 1; 1544 | end 1545 | counts = zeros(N{:}, 'uint32'); 1546 | 1547 | 1548 | % Determine whether to do waitbar 1549 | L = length(entries); 1550 | do_waitbar = (L > 1) && ~quiet_mode; 1551 | if do_waitbar 1552 | h = waitbar(0, 'Doing counts'); 1553 | end 1554 | 1555 | % Loop through each entry 1556 | for i = 1:length(entries) 1557 | 1558 | % Initialize bins to cell array of proper size 1559 | bins = cell(size(resolutions)); 1560 | 1561 | % Loop through parameter resolutions 1562 | for j = 1:length(resolutions) 1563 | fld = resolutions(j).fld; 1564 | edges = resolutions(j).edges; 1565 | 1566 | % Force first and last bins to include -/+ inf 1567 | edges(1) = -inf; 1568 | edges(end) = inf; 1569 | 1570 | % Extract data for x and y from entries 1571 | x = entries(i).(fld); 1572 | 1573 | % Use discretize to determine bin index for every data 1574 | % point in x 1575 | bins{j} = discretize(x, edges); 1576 | 1577 | end 1578 | 1579 | % Add to counts for each idx made by bins 1580 | try 1581 | idx = sub2ind(size(counts), bins{:}); 1582 | idx(isnan(idx)) = []; 1583 | for j = 1:length(idx) 1584 | counts(idx(j)) = counts(idx(j)) + 1; 1585 | end 1586 | catch e 1587 | fprintf(['Counts encountered an error with the following ' ... 1588 | 'station so it was skipped: %s\n'], entries(i).id); 1589 | disp(e.identifier) 1590 | disp(e.message) 1591 | % keyboard 1592 | end 1593 | 1594 | % Update waitbar 1595 | if do_waitbar && mod(i, ceil(L/50)) == 0 1596 | msg = sprintf('%d/%d: Doing counts for %s', i, L, entries(i).id); 1597 | waitbar(i/L, h, msg); 1598 | end 1599 | 1600 | end 1601 | 1602 | if (quiet_mode) 1603 | fprintf('complete in %.1f seconds\n', toc) 1604 | end 1605 | 1606 | % Close wait bar 1607 | if do_waitbar 1608 | close(h) 1609 | end 1610 | 1611 | % Convert to smallest possible integer data type to save space 1612 | counts = GCSAL.IGRA.Param.convert_to_min_int(counts); 1613 | 1614 | end 1615 | 1616 | function [pdf, cdf] = counts2pdf(counts, dim) 1617 | % [pdf, cdf] = counts2percentile_pdf(counts, dim) 1618 | % For a matrix of counts, calculates the probability density 1619 | % function along dimension dim. 1620 | % 1621 | % Conceptually if counts were a vector then 1622 | % pdf = counts / sum(counts) 1623 | % 1624 | % This function does the calculation but in a vectorized way 1625 | % along dimension dim. 1626 | % 1627 | % Additionally calculates the cumulative density function as 1628 | % cdf = cumsum(pdf, dim) 1629 | 1630 | % Get total counts in each row/column 1631 | total_counts = sum(counts, dim); 1632 | 1633 | % Normalize counts to get probability density function 1634 | pdf = bsxfun(@rdivide, counts, total_counts); 1635 | 1636 | % Accumulat pdf to get cdf 1637 | cdf = cumsum(pdf, dim); 1638 | 1639 | end 1640 | 1641 | function bin_centers = get_bin_centers(bin_edges) 1642 | % bin_centers = get_bin_centers(bin_edges) 1643 | % Returns bin_centers corresponding to midway value between 1644 | % each pair of edges in bin_edges 1645 | 1646 | left_edge = bin_edges(1:end-1); 1647 | right_edge = bin_edges(2:end); 1648 | 1649 | bin_centers = (right_edge + left_edge) / 2; 1650 | 1651 | end 1652 | 1653 | function idx = find_keys(key_index, keys_to_find) 1654 | % idx = find_keys(key_index, keys_to_find) 1655 | % Returns logical array idx indicating which elements in 1656 | % key_index match any string in keys_to_find. 1657 | % 1658 | % key_index and keys_to_find can be strings, string matrices, 1659 | % or cell arrays of strings. 1660 | % 1661 | % Warning is thrown if not all elements in keys_to_find are 1662 | % found in key_index 1663 | 1664 | % Handle case where keys_to_find is empty. This is 1665 | % required because cellstr turns emptys strings into {''} which 1666 | % is not an empty cell but rather has length 1 1667 | if isempty(keys_to_find) 1668 | keys_to_find = {}; 1669 | end 1670 | 1671 | % Ensure keys and key_array are cell arrays 1672 | keys_to_find = cellstr(keys_to_find); 1673 | key_index = cellstr(key_index); 1674 | 1675 | % Use ismember to get logical array for existence of each 1676 | % element of key_index present in keys 1677 | idx = ismember(key_index, keys_to_find); 1678 | 1679 | % Warning check that all keys_to_find were found 1680 | keys_not_found_idx = ~ismember(keys_to_find, key_index(idx)); 1681 | 1682 | if any(keys_not_found_idx) 1683 | keys_not_found = keys_to_find(keys_not_found_idx); 1684 | msg = sprintf(' %s\n', keys_not_found{:}); 1685 | warning('The following keys were not found: \n%s', msg) 1686 | end 1687 | end 1688 | 1689 | function info_matches = h5info_find_children(info, child_name) 1690 | % info_matches = h5info_find_children(info, child_name) 1691 | % Returns a struct array corresponding to elements of 1692 | % info.Groups with a Name matching any string in 1693 | % child_name. child_name can be a string or cell array 1694 | % of strings. info should be part of the data structure 1695 | % returned by h5info. 1696 | 1697 | % Form search key by combining info.Name with subfolder 1698 | search_key = GCSAL.H5.fullpath(info.Name, child_name); 1699 | 1700 | % Get all children names in info 1701 | all_children_names = {info.Groups.Name}; 1702 | 1703 | % Find matches 1704 | i_children_match = GCSAL.GCSAL.find_keys( all_children_names, search_key); 1705 | 1706 | % Index in info.Groups 1707 | info_matches = info.Groups(i_children_match); 1708 | end 1709 | 1710 | function p = plot_stations(stations_to_plot, varargin) 1711 | % p = plot_stations(stations_to_plot, varargin) 1712 | % Plots the lat/long coordinates of stations_to_plot. Any 1713 | % additional inputs to the plot function can be included in 1714 | % varargin. Returns a handle to the line object for the plot 1715 | % call. 1716 | % 1717 | % stations_to_plot can either by a struct array with lat/long 1718 | % as fields or a list of station id strings 1719 | 1720 | p = plot([stations_to_plot.lon], [stations_to_plot.lat], varargin{:}); 1721 | 1722 | end 1723 | 1724 | function edges = default_bin_edges(param_name) 1725 | % edges = default_bin_edges(param_name) 1726 | % Returns default values for bin edges given a parameter name 1727 | 1728 | switch param_name 1729 | case 'gph' 1730 | edges = 0:1:30; 1731 | case 'press' 1732 | edges = 0:1000:100000; 1733 | case 'temp' 1734 | edges = -100:1:40; 1735 | case 'rh' 1736 | edges = 0:1:100; 1737 | case 'dpdp' 1738 | edges = 0:1:100; 1739 | case 'wspd' 1740 | edges = 0:2:60; 1741 | case 'wdir' 1742 | edges = 0:15:360; 1743 | case 'month' 1744 | edges = 1:12; 1745 | case 'day' 1746 | edges = 1:31; 1747 | case 'hour' 1748 | edges = 0:24; 1749 | case 'lat' 1750 | edges = -90:2:90; 1751 | case 'lon' 1752 | edges = -180:4:180; 1753 | otherwise 1754 | error('unrecognized fld: %s', param_name) 1755 | end 1756 | 1757 | end 1758 | 1759 | function label = get_label(def) 1760 | % label = get_label(def) 1761 | % Returns the from a definition struct. This label 1762 | % can be used for and x or y labels on a plot. 1763 | 1764 | label = def.description; 1765 | if ~isempty(def.units) 1766 | label = [label ' (' def.units ')']; 1767 | end 1768 | end 1769 | 1770 | function title_str = description_from_filters(fltr_flds, fltr_rngs) 1771 | % Returns a string describing the filters in fltr_flds and 1772 | % fltr_ranges that can be used on a plot title 1773 | 1774 | % Ensure fltr_flds and fltr_ranges are cell arrays 1775 | fltr_flds = cellstr(fltr_flds); 1776 | if ~iscell(fltr_rngs) 1777 | fltr_rngs = {fltr_rngs}; 1778 | end 1779 | 1780 | % Initialize string as empty 1781 | title_str = ''; 1782 | 1783 | % Loop through each filter element 1784 | for i = 1:length(fltr_flds) 1785 | 1786 | % Create a string describing the fitler applied based on 1787 | % whether filter range was min/max or single value 1788 | if length(fltr_rngs{i}) == 1 1789 | msg = sprintf('%s = %g, ', fltr_flds{i}, fltr_rngs{i}); 1790 | elseif length(fltr_rngs{i}) == 2 1791 | msg = sprintf('%s = [%g to %g], ', fltr_flds{i}, ... 1792 | fltr_rngs{i}(1), fltr_rngs{i}(2)); 1793 | else 1794 | error('fltr_ranges length expected to be 1 or 2') 1795 | end 1796 | 1797 | % Append msg to the title_str 1798 | title_str = [title_str msg]; %#ok 1799 | end 1800 | 1801 | % If title_str is empty remove the new line character at the 1802 | % end of the string 1803 | if ~isempty(title_str) 1804 | title_str(end-1:end) = []; 1805 | end 1806 | end 1807 | 1808 | function stations_out = stations_intersect(stations1, stations2) 1809 | % out = stations_intersect(stations1, stations2) 1810 | % Returns the intersect of struct arrays stations1 and 1811 | % stations2 based on their id fields 1812 | 1813 | 1814 | [~, idx] = GCSAL.GCSAL.struct_set_operation(... 1815 | stations1, stations2, 'id', @intersect); 1816 | stations_out = stations1(idx); 1817 | 1818 | end 1819 | 1820 | function stations_out = stations_union(stations1, stations2) 1821 | % out = stations_union(stations1, stations2) 1822 | % Returns the union of struct arrays stations1 and 1823 | % stations2 based on their id fields 1824 | 1825 | [~, ia, ib] = GCSAL.GCSAL.struct_set_operation(... 1826 | stations1, stations2, 'id', @union); 1827 | stations_out = [stations1(ia) stations2(ib)]; 1828 | 1829 | end 1830 | 1831 | function stations_out = stations_setxor(stations1, stations2) 1832 | % out = stations_setxor(stations1, stations2) 1833 | % Returns the setxor of struct arrays stations1 and 1834 | % stations2 based on their id fields 1835 | 1836 | [~, ia, ib] = GCSAL.GCSAL.struct_set_operation(... 1837 | stations1, stations2, 'id', @setxor); 1838 | stations_out = [stations1(ia) stations2(ib)]; 1839 | 1840 | end 1841 | 1842 | end 1843 | 1844 | 1845 | methods (Static, Access = 'private') 1846 | 1847 | function [c, ia, ib] = struct_set_operation(struct1, struct2, fld, operation) 1848 | % Set the set operation on the data at struct1.(fld) and 1849 | % struct2.(fld). operation can be intersect, union, or setxor. 1850 | % Returns the outputs of the operation 1851 | 1852 | str1 = vertcat(struct1.(fld)); 1853 | str2 = vertcat(struct2.(fld)); 1854 | [c, ia, ib] = operation(str1, str2, 'rows'); 1855 | 1856 | end 1857 | 1858 | function stations = initialize_stations(headers) 1859 | % stations = initialize_stations(obj) 1860 | % Creates the stations struct array from 1861 | 1862 | % Initialize stations struct with headers.id and NaN for lat 1863 | % and lon 1864 | stations = struct('id', {headers.id}, 'lat', NaN, 'lon', NaN); 1865 | 1866 | % Loop through all headers 1867 | for i = 1:length(headers) 1868 | 1869 | % Get current header 1870 | header = headers(i); 1871 | 1872 | % If lat and lon are sclars then use them 1873 | if isscalar(header.lat) && isscalar(header.lon) 1874 | stations(i).lat = header.lat; 1875 | stations(i).lon = header.lon; 1876 | else 1877 | % If lat and lon are not scalars then check how big the 1878 | % the biggest difference is 1879 | x = abs(max(header.lat) - min(header.lat)); 1880 | y = abs(max(header.lon) - min(header.lon)); 1881 | d = sqrt(x^2 + y^2); 1882 | 1883 | % As long as the difference isn't too big, use the mode 1884 | if d < .2 1885 | stations(i).lat = mode(header.lat); 1886 | stations(i).lon = mode(header.lon); 1887 | else 1888 | % Otherwise we will the station lat/lon as NaN. 1889 | % Stations that begin with ZZ are expected to move 1890 | % around a lot, but any station besides ZZ, report 1891 | % tit. 1892 | if ~strcmp(header.id([1 2]), 'ZZ') 1893 | fprintf(['Location for station %s was not used ' ... 1894 | 'because it moved around by ~%g deg\n'], header.id, d); 1895 | end 1896 | end 1897 | end 1898 | end 1899 | end 1900 | 1901 | function idx = get_entry_idx_in_range(header, range, range_fld) 1902 | 1903 | % Get header values at range_fld 1904 | val = header.(range_fld); 1905 | 1906 | % Find index in header where value is in range 1907 | idx_header = val >= range(1) & val <= range(2); 1908 | 1909 | % Extract array of entry lengths for header values in range 1910 | numlevs_in_range = header.numlevs(idx_header); 1911 | 1912 | % Next we want to calculate the index offset for the start of 1913 | % each header in range. First we need the index to the start of 1914 | % every entry which we get by first getting the cumulative sum 1915 | % of all numlevs 1916 | cumulative_numlevs_all = cumsum(header.numlevs); 1917 | 1918 | % Then to get an idx offset we just start at 1 and remove the 1919 | % last element of the cumulative sum 1920 | idx_offset_all = [1; cumulative_numlevs_all(1:end-1)]; 1921 | 1922 | 1923 | % Now getting the index offset to the in range headers is just 1924 | % indexing into idx_offset_all 1925 | idx_offset = idx_offset_all(idx_header); 1926 | 1927 | % Initialize idx to be size of total from numlevs 1928 | idx = zeros(sum(numlevs_in_range), 1); 1929 | 1930 | % initialize start index to 1 1931 | start = 1; 1932 | for i = 1:length(numlevs_in_range) 1933 | 1934 | % finish index is start + current # of entries - 1 1935 | finish = start + numlevs_in_range(i) - 1; 1936 | 1937 | % Assign idx based on idx_offset and current number of 1938 | % entries 1939 | idx(start:finish) = (1:numlevs_in_range(i)) + idx_offset(i); 1940 | 1941 | % For next loop start where we left off 1942 | start = finish+1; 1943 | 1944 | end 1945 | 1946 | end 1947 | 1948 | function entry_idx = header_to_entry_idx(header) 1949 | % entry_idx = header_to_entry_idx(header) 1950 | % Returns an indexing vector for the correspondence between 1951 | % header data and entry data. 1952 | % 1953 | % Each element in header corresponds to many elements in the 1954 | % entry data for the same station. This function determines 1955 | % the indexing vector that allows you to expand the header 1956 | % data to the length of the entry data with the header data 1957 | % copied for each instance of entry data for which it 1958 | % corresponds. 1959 | 1960 | 1961 | % Initialize idx to be size of total from numlevs 1962 | entry_idx = zeros(sum(header.numlevs), 1); 1963 | 1964 | % initialize start index to 1 1965 | start = 1; 1966 | for i = 1:length(header.numlevs) 1967 | 1968 | % finish index is start + current # of entries - 1 1969 | finish = start + header.numlevs(i) - 1; 1970 | 1971 | % Assign idx based on idx_offset and current number of 1972 | % entries 1973 | entry_idx(start:finish) = i; 1974 | 1975 | % For next loop start where we left off 1976 | start = finish+1; 1977 | end 1978 | end 1979 | 1980 | function station_ids = station_id_str(stations) 1981 | % station_ids = station_id_str(stations) 1982 | % Returns a character array whose rows correspond to the the 1983 | % id field from the struct array stations. 1984 | % 1985 | % If stations is already a character array returns stations. 1986 | 1987 | % Handle empty stations 1988 | if ~exist('stations', 'var') || isempty(stations) 1989 | station_ids = ''; 1990 | return 1991 | end 1992 | 1993 | % Handle case where station_ids is input as a struct array by 1994 | % extracting station id character array 1995 | if isstruct(stations) 1996 | station_ids = vertcat(stations.id); 1997 | elseif ischar(stations) 1998 | station_ids = stations; 1999 | else 2000 | error('stations expeted to be struct or char') 2001 | end 2002 | end 2003 | 2004 | 2005 | end 2006 | end 2007 | --------------------------------------------------------------------------------