├── 1.COMPILE.WIND.PROFILE (merra1).R ├── 1.COMPILE.WIND.PROFILE (merra2).R ├── 2.CALCULATE.R ├── 3.PROGRESS.MONITOR.R ├── CITATION ├── LICENSE ├── README.md ├── data ├── BASE.OPTIONS.R ├── INPUT_CH.CSV ├── INPUT_CH.PNG └── INPUT_CH.R ├── lib ├── VWF.EXTRAPOLATE.R ├── VWF.EXTRAS.R ├── VWF.FARMS.R ├── VWF.MAIN.PREP.R ├── VWF.MAIN.PROGRESS.R ├── VWF.MAIN.RESULTS.R ├── VWF.MAIN.WINDPOWER.R ├── VWF.MAIN.WINDSPEED.R ├── VWF.NCDF.R ├── VWF.PLOTS.R ├── VWF.R └── VWF.STDLIB.R └── power_curves ├── LICENSE ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.00 w smoother).csv ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.10 w smoother).csv ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.20 w smoother).csv ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.30 w smoother).csv └── Wind Turbine Power Curves ~ 5 (0.01ms with 0.40 w smoother).csv /1.COMPILE.WIND.PROFILE (merra1).R: -------------------------------------------------------------------------------- 1 | ## 2 | ##### 3 | ## ################################################################################################## 4 | ##### 5 | ## 6 | ## this requires us to have a template NetCDF file with the right variables available 7 | ## can't do that within R easily, so need to download NCO from http://nco.sourceforge.net/ 8 | ## save an single MERRA SLV file as MERRA_IN.nc and run the following commands: 9 | ## 10 | #### MERRA 1 11 | # ncks -x -v disph,t10m,t2m,ts,u10m,u50m,v10m,v50m MERRA_IN.nc MERRA_OUT.nc 12 | # ncrename -h -O -v u2m,A MERRA_OUT.nc 13 | # ncrename -h -O -v v2m,z MERRA_OUT.nc 14 | # ncatted -a long_name,A,o,c,"Scale factor for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc 15 | # ncatted -a long_name,z,o,c,"Ref height for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc 16 | # 17 | ## 18 | ## 19 | ##### 20 | ## ################################################################################################## 21 | ##### 22 | ## 23 | ## this runs through all merra files in the specified folders 24 | ## reads the wind speed data and calculates the extrapolation parameters 25 | ## then saves the A and z values to NetCDF files 26 | ## 27 | ## after running, you should move the resulting files into their own folder 28 | ## 29 | ## 30 | 31 | 32 | ##### 33 | ## ## SETUP 34 | ##### 35 | 36 | # the folders we wish to read merra data from 37 | merraBase = 'W:/MERRA_WIND/' 38 | 39 | # the folders we wish to write profiles to 40 | outputBase = 'W:/MERRA_PROFILES/' 41 | 42 | # our blank NetCDF template file to be filled 43 | templateFile = 'M:/WORK/Wind Modelling/VWF CODE/TOOLS/MERRA.log.law.template.A.z.nc' 44 | 45 | # path to the VWF model 46 | VWFMODEL = 'Q:/VWF/lib/VWF.R' 47 | 48 | 49 | 50 | 51 | 52 | ##### 53 | ## ## READ IN DATA 54 | ##### 55 | 56 | # load the VWF model 57 | source(VWFMODEL) 58 | 59 | # find and prepare all our merra files 60 | merra = prepare_merra_files(merraFolder) 61 | 62 | # prepare a NetCDF file handler so our format is known 63 | f = merra$files[1] 64 | nc = NetCdfClass(f, 'MERRA1', TRUE) 65 | 66 | # subset if necessary 67 | if (exists('region')) 68 | nc$subset_coords(region) 69 | 70 | # close this input file 71 | nc$close_file() 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | ##### 80 | ## ## PREPARE OUR CLUSTER 81 | ##### 82 | 83 | # build a parallel cluster 84 | # note that it doesn't make sense going much beyond 8 cores 85 | library(doParallel) 86 | cl = makeCluster(8) 87 | registerDoParallel(cl) 88 | 89 | # provide the extrapoalte function to each core 90 | clusterExport(cl, varlist=c('extrapolate_log_law')) 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | ##### 99 | ## ## RUN 100 | ##### 101 | 102 | # run through each input merra file 103 | for (f in merra$files) 104 | { 105 | # decide the filename for this output data 106 | o = gsub('prod.assim.tavg1_2d_slv', 'wind_profile', f) 107 | o = gsub('tavg1_2d_slv_Nx', 'wind_profile', f) 108 | o = gsub(merraBase, outputBase, o) 109 | 110 | # skip if this already exists 111 | clear_line() 112 | if (file.exists(o)) 113 | { 114 | clear_line("Skipping", o) 115 | next 116 | } 117 | 118 | 119 | 120 | # get this file's time attributes 121 | nc$open_file(f) 122 | myTime = ncatt_get(nc$ncdf, "time", "units")$value 123 | nc$close_file() 124 | 125 | 126 | 127 | # do the extrapolation, getting the A,z parameters 128 | profile = extrapolate_ncdf(f) 129 | 130 | # safety checks 131 | err = which(profile$z > 100) 132 | profile$z[err] = 100 133 | 134 | err = which(profile$z < 10^-10) 135 | profile$z[err] = 10^-10 136 | 137 | err = which(profile$A < 0) 138 | profile$A[err] = 0 139 | 140 | 141 | 142 | 143 | # create the NetCDF file for this month 144 | file.copy(templateFile, o) 145 | 146 | # open this file for writing 147 | ncout = nc_open(o, write=TRUE) 148 | 149 | # set its time attributes 150 | ncatt_put(ncout, "time", "units", myTime) 151 | 152 | # put our data in 153 | for (var in names(profile)) 154 | { 155 | ncvar_put(ncout, var, profile[[var]]) 156 | } 157 | 158 | # save and close 159 | nc_close(ncout) 160 | cat("Written", o, "\n") 161 | } 162 | 163 | 164 | 165 | cat("\n\n\nFLAWLESS!\n\n") 166 | -------------------------------------------------------------------------------- /1.COMPILE.WIND.PROFILE (merra2).R: -------------------------------------------------------------------------------- 1 | ## 2 | ##### 3 | ## ################################################################################################## 4 | ##### 5 | ## 6 | ## this requires us to have a template NetCDF file with the right variables available 7 | ## can't do that within R easily, so need to download NCO from http://nco.sourceforge.net/ 8 | ## save an single MERRA SLV file as MERRA_IN.nc and run the following commands: 9 | ## 10 | #### MERRA 2 11 | # ncks -x -v CLDPRS,CLDTMP,DISPH,H1000,H250,H500,H850,OMEGA500,PBLTOP,PS,Q250,Q500,Q850,QV10M,QV2M,SLP,T10M,T250,T2M,T2MDEW,T2MWET,T500,T850,TO3,TOX,TQI,TQL,TQV,TROPPB,TROPPT,TROPPV,TROPQ,TROPT,TS,U10M,U250,U500,U50M,U850,V10M,V250,V500,V50M,V850,ZLCL MERRA_IN.nc4 MERRA_OUT.nc4 12 | # ncrename -h -O -v U2M,A MERRA_OUT.nc4 13 | # ncrename -h -O -v V2M,z MERRA_OUT.nc4 14 | # ncatted -a long_name,A,o,c,"Scale factor for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc4 15 | # ncatted -a long_name,z,o,c,"Ref height for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc4 16 | # 17 | ## 18 | ## 19 | ##### 20 | ## ################################################################################################## 21 | ##### 22 | ## 23 | ## this runs through all merra files in the specified folders 24 | ## reads the wind speed data and calculates the extrapolation parameters 25 | ## then saves the A and z values to NetCDF files 26 | ## 27 | ## after running, you should move the resulting files into their own folder 28 | ## 29 | ## 30 | 31 | 32 | ##### 33 | ## ## SETUP 34 | ##### 35 | 36 | 37 | # the folders we wish to read data from 38 | merraFolder = 'X:/MERRA2_WIND/' 39 | 40 | # the folders we wish to write to 41 | outputFolder = 'X:/MERRA2_PROFILES/' 42 | 43 | # our blank NetCDF template file to be filled 44 | templateFile = 'M:/WORK/Wind Modelling/VWF CODE/TOOLS/MERRA2.log.law.template.A.z.nc4' 45 | 46 | # path to the VWF model 47 | VWFMODEL = 'Q:/VWF/lib/VWF.R' 48 | 49 | 50 | 51 | ##### 52 | ## ## READ IN DATA 53 | ##### 54 | 55 | # load the VWF model 56 | source(VWFMODEL) 57 | 58 | # find and prepare all our merra files 59 | merra = prepare_merra_files(merraFolder) 60 | 61 | # prepare a NetCDF file handler so our format is known 62 | f = merra$files[1] 63 | nc = NetCdfClass(f, 'MERRA2', TRUE) 64 | 65 | # subset if necessary 66 | if (exists('region')) 67 | nc$subset_coords(region) 68 | 69 | # close this input file 70 | nc$close_file() 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | ##### 79 | ## ## PREPARE OUR CLUSTER 80 | ##### 81 | 82 | # build a parallel cluster 83 | # note that it doesn't make sense going much beyond 8 cores 84 | library(doParallel) 85 | cl = makeCluster(8) 86 | registerDoParallel(cl) 87 | 88 | # provide the extrapoalte function to each core 89 | clusterExport(cl, varlist=c('extrapolate_log_law')) 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | ##### 98 | ## ## RUN 99 | ##### 100 | 101 | # run through each input merra file 102 | for (f in merra$files) 103 | { 104 | 105 | # get this file's time attributes 106 | nc$open_file(f) 107 | myTime = ncatt_get(nc$ncdf, "time", "units")$value 108 | nc$close_file() 109 | 110 | 111 | 112 | # do the extrapolation, getting the A,z parameters 113 | profile = extrapolate_ncdf(f) 114 | 115 | # safety checks 116 | err = which(profile$z > 100) 117 | profile$z[err] = 100 118 | 119 | err = which(profile$z < 10^-10) 120 | profile$z[err] = 10^-10 121 | 122 | err = which(profile$A < 0) 123 | profile$A[err] = 0 124 | 125 | 126 | 127 | 128 | # build a filename for this output data 129 | o = gsub('prod.assim.tavg1_2d_slv', 'wind_profile', f) 130 | o = gsub('tavg1_2d_slv_Nx', 'wind_profile', f) 131 | o = gsub(merraFolder, outputFolder, o) 132 | 133 | # create the NetCDF file for this month 134 | file.copy(templateFile, o) 135 | 136 | # open this file for writing 137 | ncout = nc_open(o, write=TRUE) 138 | 139 | # set its time attributes 140 | ncatt_put(ncout, "time", "units", myTime) 141 | 142 | # put our data in 143 | for (var in names(profile)) 144 | { 145 | ncvar_put(ncout, var, profile[[var]]) 146 | } 147 | 148 | # save and close 149 | nc_close(ncout) 150 | cat("Written", o, "\n") 151 | } 152 | 153 | 154 | 155 | cat("\n\n\nFLAWLESS!\n\n") 156 | -------------------------------------------------------------------------------- /2.CALCULATE.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ########################## 4 | ## ## 5 | ## GENERAL OPTIONS ## 6 | ## ## 7 | ########################## 8 | 9 | 10 | # our wind farms file with coordinates, turbine models, etc. 11 | farmOptionsFile = 'Q:/VWF/data/INPUT_CH.R' 12 | 13 | # our base options file with model parameters, etc. 14 | baseOptionsFile = 'Q:/VWF/data/BASE.OPTIONS.R' 15 | 16 | 17 | 18 | 19 | ########################## 20 | ## ## 21 | ## PREPARE ## 22 | ## ## 23 | ########################## 24 | 25 | 26 | # load the VWF model and required libraries 27 | # 28 | source('Q:/VWF/lib/VWF.R') 29 | 30 | # prepare the VWF model with your parameters 31 | # check ability to save output files 32 | # read in wind farms and power curves 33 | # prep the merra data for reading in 34 | # launch multi-core cluster 35 | # establish spatial interpolation and bias correction code 36 | # 37 | source(baseOptionsFile) 38 | source('Q:/VWF/lib/VWF.MAIN.PREP.R') 39 | 40 | # key things you now have: 41 | # windFarms - dataframe containing our wind farms 42 | # farmCurve - dataframe containing our power curves 43 | # merra_wind - object holding our merra filenames 44 | # nc_wind - object for handling netcdf files 45 | # cl - multicore cluster object 46 | 47 | 48 | 49 | 50 | ########################## 51 | ## ## 52 | ## GET WIND SPEED ## 53 | ## ## 54 | ########################## 55 | 56 | # read in pre-calculated speeds if we have them, otherwise go and interpolate & extrapolate them 57 | # this is a very slow stage - go fire up 3.PERFORMANCE.MONITOR.R to see how it's going.. 58 | source('Q:/VWF/lib/VWF.MAIN.WINDSPEED.R') 59 | 60 | # key things you now have: 61 | # windSpeed - dataframe containing hourly wind speeds for each farm 62 | # this is also saved into baseSaveFolder 63 | 64 | 65 | 66 | ########################## 67 | ## ## 68 | ## GET WIND POWER ## 69 | ## ## 70 | ########################## 71 | 72 | source('Q:/VWF/lib/VWF.MAIN.WINDPOWER.R') 73 | 74 | # key things you now have: 75 | # windSpeed - now modified for bias correction 76 | # loadFactor - dataframe containing the hourly capacity factors for each farm 77 | # powerMW - dataframe containing the hourly power output for each farm 78 | # parms - dataframe containing the parameters used for each farm 79 | 80 | 81 | 82 | ########################## 83 | ## ## 84 | ## SAVE RESULTS ## 85 | ## ## 86 | ########################## 87 | 88 | source('Q:/VWF/lib/VWF.MAIN.RESULTS.R') 89 | flush('\n\nFLAWLESS\n\n') 90 | -------------------------------------------------------------------------------- /3.PROGRESS.MONITOR.R: -------------------------------------------------------------------------------- 1 | 2 | # the options files being used in your VWF calculation 3 | farmOptionsFile = 'Q:/VWF/data/INPUT_CH.R' 4 | baseOptionsFile = 'Q:/VWF/data/BASE.OPTIONS.R' 5 | 6 | 7 | # update frequency in seconds 8 | update_time = 4 9 | 10 | # establish the progress monitor 11 | # this will continually update until you exit this R instance 12 | source(baseOptionsFile) 13 | source('Q:/VWF/lib/VWF.MAIN.PROGRESS.R') 14 | -------------------------------------------------------------------------------- /CITATION: -------------------------------------------------------------------------------- 1 | To reference VWF in publications, please cite the following paper: 2 | 3 | I Staffell and S Pfenninger, 2016. Using bias-corrected reanalysis to simulate current and future wind power output. Energy, 114, 1224–1239. https://dx.doi.org/10.1016/j.energy.2016.08.068 4 | 5 | 6 | 7 | To reference the Renewables.ninja model (i.e. if you use the renewables.ninja website) please cite both of the following papers: 8 | 9 | S Pfenninger and I Staffell, 2016. Long-term patterns of European PV output using 30 years of validated hourly reanalysis and satellite data. Energy, 114, 1251-1265. https://dx.doi.org/10.1016/j.energy.2016.08.060 10 | 11 | I Staffell and S Pfenninger, 2016. Using bias-corrected reanalysis to simulate current and future wind power output. Energy, 114, 1224–1239. https://dx.doi.org/10.1016/j.energy.2016.08.068) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | Copyright (c) 2012-2017, Iain Staffell 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | * Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Virtual Wind Farm (VWF) model 2 | 3 | `vwf` is an R project to simulate the power output from wind farms based on NASA weather data. The wind energy simulations on [Renewables.ninja](https://www.renewables.ninja/) are based on the VWF model. 4 | 5 | 6 | 7 | ## REQUIREMENTS 8 | 9 | An unsound mind and plenty of alcohol. 10 | 11 | [R](https://www.r-project.org/) or [MRO](https://mran.revolutionanalytics.com/open/) version 3+ 12 | 13 | Required libraries: 14 | * ncdf4 15 | * lubridate 16 | * rworldmap 17 | * doParallel 18 | * data.table 19 | * akima 20 | * MASS 21 | * fields 22 | 23 | 24 | 25 | ## SETUP 26 | 27 | ### Download wind speed data 28 | First, download the necessary input data from NASA's [MERRA-2 reanalysis](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/), specifically the [SLV tables](http://dx.doi.org/10.5067/VJAFPLI1CSIV) (M2T1NXSLV). 29 | 30 | The easiest (and most wasteful) option is to use the 'Online archive' link and download the complete files (approx 400 MB per day). Alternatively you could use the various subsetting tools available, selecting the DISPH, U2M, V2M, U10M, V10M, U50M and V50M variables, and whatever region and time period you are interested in. Good luck, they rarely function correctly! 31 | 32 | ### Set up the VWF model 33 | With the required libraries installed, the VWF code should hopefully just require you to edit the paths to the include files and your data files. These are dotted all over the place, so you're best to search for ":/" in all the files to check you have found all paths. 34 | 35 | ### Process wind speed data 36 | Performing the wind speed extrapolation (working out wind speed at the specific height of your turbine) is very computationally expensive, so the VWF calculation engine uses pre-compiled wind speed profile data, in the form of the `A` and `z` parameters of the logarithmic profile law. 37 | 38 | First, you need to run one of the `1.COMPILE.WIND.PROFILE.R` files, editing the paths to your input, template and output files. On a standard desktop, this takes around a week per year of input data. I said it was computationally expensive. Alternatively, perhaps we could collaborate on some fun research together? 39 | 40 | This process should yield a set of NetCDF files, one per day, containing the wind profile parameters for each location and hour of the day. These are about 30 MB each, so around 10 GB per year. 41 | 42 | 43 | 44 | ## USAGE INSTRUCTIONS 45 | 46 | First, run either `1.COMPILE.WIND.PROFILE (merra1).R` or `1.COMPILE.WIND.PROFILE (merra2).R` to generate your wind speed profiles, as detailed above. 47 | 48 | Second, create a simulation input file describing the wind farms you wish to simulate. An example covering 39 wind farms in Switzerland is provided in `data\INPUT_CH.CSV` and `data\INPUT_CH.R` 49 | 50 | Third, decide on the model parameters you wish to use. There are various nobs and switches to tweak, with a set of 'safe' defaults listed in `data\BASE.OPTIONS.R`. Some of these alter the calculation engine (such as how much smoothing to apply to power curves, or how to do the bias correction of wind speeds). Some control what output you receive (do you want to save corrected wind speeds, monthly average farm output, output aggregated by turbine model, by country, etc.). Hopefully there are enough comments in that file to help you along. 51 | 52 | Finally, setup the model to run. Change the path names to your specific options files in `2.CALCULATE.R`, and off you go. You may find this is also quite slow to run (a few hours to simulate 100 farms over 30 years perhaps). As it runs on multiple cores in parallel it isn't easy to provide a progress indicator in the main code (at least, I don't know how to). So, you can optionally run `3.PROGRESS.MONITOR.R` in another R session, it will paint pretty pictures showing how much you have simulated... 53 | 54 | 55 | ## LICENSE 56 | BSD 3-Clause License 57 | Copyright (C) 2012-2017 Iain Staffell 58 | All rights reserved. 59 | 60 | This excludes all contents of the `power_curves` folder which remains the property of the respective copyright holders. 61 | 62 | See `LICENSE` for more detail 63 | 64 | 65 | ## CREDITS & CONTACT 66 | 67 | The VWF code is developed by Iain Staffell. You can try emailing me at i.staffell@imperial.ac.uk 68 | 69 | VWF is part of the [Renewables.ninja](https://renewables.ninja) project, developed by Stefan Pfenninger and Iain Staffell. Use the [contacts page](https://www.renewables.ninja/about) there. 70 | 71 | ## Citation 72 | 73 | I Staffell and S Pfenninger, 2016. Using bias-corrected reanalysis to simulate current and future wind power output. *Energy*, 114, 1224–1239. [doi: 10.1016/j.energy.2016.08.068](https://dx.doi.org/10.1016/j.energy.2016.08.068) 74 | -------------------------------------------------------------------------------- /data/BASE.OPTIONS.R: -------------------------------------------------------------------------------- 1 | 2 | # some tips.. 3 | # save your results to a different physical drive to the one you read merra data from 4 | # don't assume more cores = more speed... 16 cores seems ok, beyond that split your job up 5 | 6 | 7 | ## MOST COMMON OPTIONS 8 | 9 | # where to save everything - filenames are automatically generated 10 | baseSaveFolder = 'L:/NINJA_GLOBAL/US_STATES/' 11 | baseSaveFile = get_text_before(basename(farmOptionsFile), '.', last=TRUE) %&% '.' 12 | 13 | # what format to save as: csv rds or rdata 14 | xtn = 'rds' 15 | 16 | # which years to calculate 17 | yearRange = 1980:2016 18 | 19 | # set how many cores you wish to use 20 | n.cores = 10 21 | 22 | 23 | 24 | 25 | 26 | ## MERRA EXTRAPOLATED WIND DATA 27 | 28 | # do you want to use custom pre-compiled windspeeds? 29 | # set this to NULL to use the default speeds from a previous model run, or to calculate new ones 30 | windSpeedFile = NULL 31 | 32 | # the folders we wish to read data from 33 | merraFolders = 'X:/MERRA2_PROFILES/' %&% yearRange %&% '/' 34 | 35 | # process most recent data first? 36 | processNewestFirst = FALSE 37 | 38 | # which model are we using 'MERRA1' or 'MERRA2' 39 | reanalysis = 'MERRA2' 40 | 41 | # how to group files ('monthly' or 'quarterly') 42 | merra_grouping = 'quarterly' 43 | 44 | 45 | 46 | 47 | 48 | 49 | ## POWER CONVERSION PARAMETERS 50 | 51 | # the parameters we use to determine the scalar and offset for each farm 52 | # scalar = (scalar_alpha * PR) + scalar_beta 53 | scalar_alpha = 1/2 54 | scalar_beta = 1/3 55 | 56 | # specify the parameters for multi-turbine farm curve 57 | convolverStdDev = 0.20 # must be > 0 58 | convolverSpeedSmooth = 0.20 # must be 0.00, 0.10, 0.20, 0.30, 0.40 59 | 60 | # the files our power curves live in (raw turbine curves and smoothed farm curves) 61 | turbCurveFile = 'Q:/VWF/power_curves/Wind Turbine Power Curves ~ 5 (0.01ms with 0.00 w smoother).csv') 62 | farmCurveFile = 'Q:/VWF/power_curves/Wind Turbine Power Curves ~ 5 (0.01ms with ' %&% sprintf("%.2f", convolverSpeedSmooth) %&% ' w smoother).csv') 63 | 64 | # should we inflate offshore farm PR by 1.16 (i.e. you haven't done it in your farms file, or you have separately calibrated onshore/offshore) 65 | inflate_offshore_pr = TRUE 66 | 67 | 68 | 69 | 70 | 71 | ## HOW TO TRANSFORM WIND SPEEDS 72 | 73 | # do we require speeds interpolated to half-hourly? 74 | halfHourly = FALSE 75 | 76 | # if so, inject noise into the half-hourly wind speeds to represent the distribution of power swings more closely 77 | # stdev of normal distribution that things are multiplied by 78 | noise_stdev = 0.04 79 | 80 | # do you want to factor air density into the calculations? 81 | # if so airDensityFile must be specified 82 | doAirDensity = FALSE 83 | airDensityFile = NULL 84 | 85 | # should we transform unfathomably low wind speeds? 86 | # i.e. sites with a long-run average < 4 m/s ... just wouldn't have a wind farm! 87 | doTransformLowestSpeeds = TRUE 88 | 89 | 90 | 91 | ## RESULTS FILES 92 | 93 | # should the original wind speeds for each farm be saved? 94 | # this will let you skip the first (slow) part of processing if you want to rerun 95 | save_original_wind_speeds = TRUE 96 | 97 | # should the modified wind speeds for each farm be saved? 98 | # these are the 'bias corrected' speeds which give the desired energy output 99 | save_modified_wind_speeds = FALSE 100 | 101 | # should the hourly power output / capacity factor for each farm be saved? 102 | # note, this file will be similar in size to the wind speeds (i.e. big) 103 | save_hourly_farm_mw = FALSE 104 | save_hourly_farm_cf = FALSE 105 | 106 | 107 | # should MW/CF be aggregated across farms? 108 | # use '*' to aggregate all farms together (MW is the sum of all, CF is the capacity-weighted average) 109 | # use other columns names from windFarms to split into groups 110 | # use '' to do no aggregation at all 111 | # 112 | # e.g. c('iso', 'offshore') will give one file with columns for each country, one file with all onshore and offshore seperated 113 | # if you'd like onshore/offshore seperated *in* each country, create a new column in windFarms (windFarms$iso_off = windFarms$iso %&% ifelse(tolower(windfarms$offshore) == 'no', 'ON', 'OFF')) 114 | # 115 | # '*' is quick... others are slow as they also create the 'evolving' total (where farms don't produce before they were born) for validation purposes 116 | # 117 | save_files_split_by = c('*') 118 | 119 | 120 | 121 | 122 | ## GLORIOUS GUI TYPE STUFF 123 | 124 | # do you want a graphical / interactive experience (at the expense of speed) 125 | lots_of_plots = FALSE 126 | 127 | # do you any plots at all (at the expense of R claiming the foreground when redrawing) 128 | a_few_plots = FALSE 129 | -------------------------------------------------------------------------------- /data/INPUT_CH.CSV: -------------------------------------------------------------------------------- 1 | id,country,name,lon,lat,offshore,height,capacity,power_curve,PR 2 | CH1,CH,Feldmoos,8.06,46.99,no,60,0.9,NEG.Micon.NM52.900,0.983 3 | CH2,CH,Gatsch,8.594,46.647,no,45,0.6,Enercon.E40.600,0.983 4 | CH3,CH,Collonges,7.03,46.17,no,100,2,Enercon.E70.2000,0.983 5 | CH4,CH,Gatsch,8.594,46.647,no,55,0.9,Enercon.E44.900,0.983 6 | CH5,CH,Oberer Grenchenberg,7.4,47.23,no,30,0.15,Bonus.B23.150,0.983 7 | CH6,CH,Martigny,7.03,46.17,no,100,2,Enercon.E82.2000,0.983 8 | CH7,CH,Saint-Brais,7.113,47.305,no,80,4,Enercon.E82.2000,0.983 9 | CH8,CH,Calandawind,9.526,46.879,no,120,3,Vestas.V112.3000,0.983 10 | CH9,CH,Feldmoos,8.06,46.99,no,50,0.95,NEG.Micon.NM52.900,0.983 11 | CH10,CH,Mount Crosin,7.017,47.18,no,95,16,Vestas.V90.2000,0.983 12 | CH11,CH,Peuchapatte,6.978,47.246,no,110,6.9,Enercon.E82.2300,0.983 13 | CH12,CH,Gatsch,8.594,46.647,no,55,1.8,Enercon.E44.900,0.983 14 | CH13,CH,Gries,8.374,46.463,no,85,2.3,Enercon.E70.2300,0.983 15 | CH14,CH,Lutersarni,8.108,46.992,no,80,2.3,Enercon.E82.2300,0.983 16 | CH15,CH,Charrat,7.145,46.131,no,100,3,Enercon.E101.3000,0.983 17 | CH16,CH,Mount Crosin,7.013,47.177,no,95,8,Vestas.V90.2000,0.983 18 | CH17,CH,Mount Crosin,7.06,47.198,no,100,6.6,Vestas.V112.3000,0.983 19 | CH18,CH,Mount Crosin,6.983,47.161,no,110,6.6,Vestas.V112.3000,0.983 20 | CH19,CH,Gries,8.374,46.463,no,115,7.05,Enercon.E92.2300,0.983 21 | CH20,CH,Extra 1,7.954166667,47.20416667,no,100,0.419,Enercon.E101.3000,0.983 22 | CH21,CH,Extra 2,10.3375,46.72916667,no,80,0.419,Enercon.E66.2000,0.983 23 | CH22,CH,Extra 3,10.2625,46.90416667,no,105,0.419,Enercon.E101.3000,0.983 24 | CH23,CH,Extra 4,9.354166667,47.0375,no,60,0.419,Enercon.E82.2000,0.983 25 | CH24,CH,Extra 5,8.770833333,46.90416667,no,70,0.419,REPower.MD77.1500,0.983 26 | CH25,CH,Extra 6,8.9125,47.44583333,no,30,0.419,Vestas.V112.3000,0.983 27 | CH26,CH,Extra 7,9.1875,46.37083333,no,55,0.419,Enercon.E101.3000,0.983 28 | CH27,CH,Extra 8,9.279166667,47.57083333,no,80,0.419,Enercon.E82.2000,0.983 29 | CH28,CH,Extra 9,6.120833333,46.30416667,no,60,0.419,Vestas.V112.3000,0.983 30 | CH29,CH,Extra 10,9.520833333,46.67916667,no,30,0.419,Enercon.E44.900,0.983 31 | CH30,CH,Extra 11,8.354166667,47.5375,no,80,0.419,Nordtank.NTK500,0.983 32 | CH31,CH,Extra 12,8.3625,46.8125,no,100,0.419,Enercon.E101.3000,0.983 33 | CH32,CH,Extra 13,8.1875,46.87083333,no,100,0.419,Enercon.E82.2000,0.983 34 | CH33,CH,Extra 14,8.345833333,46.72916667,no,100,0.419,Enercon.E82.2300,0.983 35 | CH34,CH,Extra 15,8.304166667,46.7375,no,80,0.419,Gamesa.G47.660,0.983 36 | CH35,CH,Extra 16,8.929166667,46.1625,no,120,0.419,Enercon.E70.2300,0.983 37 | CH36,CH,Extra 17,9.720833333,46.57916667,no,55,0.419,Enercon.E70.2300,0.983 38 | CH37,CH,Extra 18,8.754166667,47.29583333,no,75,0.419,Enercon.E40.600,0.983 39 | CH38,CH,Extra 19,8.970833333,47.4625,no,85,0.419,Siemens.SWT.3.0.101,0.983 40 | CH39,CH,Extra 20,7.345833333,46.1625,no,120,0.419,Enercon.E92.2300,0.983 41 | -------------------------------------------------------------------------------- /data/INPUT_CH.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/renewables-ninja/vwf/5ce182d09f9d50c10be03cb7ebdc0bf41e18d86e/data/INPUT_CH.PNG -------------------------------------------------------------------------------- /data/INPUT_CH.R: -------------------------------------------------------------------------------- 1 | 2 | # our wind farms file with coordinates, turbine models, etc. 3 | windFarmFile = 'Q:/VWF/data/INPUT_CH.CSV' 4 | 5 | # the simplified column names used in these files (must be the same across all files) 6 | windFarmCols = c('id', 'iso', 'name', 'lon', 'lat', 'offshore', 'height', 'count', 'capacity', 'power_curve', 'PR') 7 | -------------------------------------------------------------------------------- /lib/VWF.EXTRAPOLATE.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | 12 | ######################################################################################################################## 13 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 14 | ######################################################################################################################## 15 | 16 | ##### 17 | ## ## WIND PROFILE EXTRAPOLATION ==== VERSION FOR PRE-COMPILING 18 | ##### 19 | 20 | 21 | # calculate the wind extrapolation parameters for a given data point (one location and height) 22 | # takes four parameters: wind speed at 2, 10, 50 metres, and the displacement height 23 | # returns two parameters: the scale factor (A) and reference height (z) 24 | # wind speed at any height can be calculated from: v = A * log(h / z) 25 | extrapolate_log_law = function(w2m, w10m, w50m, dh) 26 | { 27 | # assemble our three heights and wind speeds 28 | h = c(2+dh, 10+dh, 50) 29 | v = c(w2m, w10m, w50m) 30 | 31 | # linearise and perform a ls fit 32 | # weight the data at 50m more strongly 33 | logh = log(h) 34 | fit = lm(v ~ logh, weights=c(1,1,2)) 35 | 36 | # extract our coefficients 37 | # v = A log(h) - A log(z) therefore slope = A, exp(-intercept / A) = z 38 | A = coef(fit)[[2]] 39 | z = exp(-coef(fit)[[1]] / A) 40 | 41 | return( c(A, z) ) 42 | } 43 | 44 | 45 | 46 | # calculate the wind extrapolation parameters for a given data point (one location and height) 47 | # takes four parameters: wind speed at 2, 10, 50 metres, and the displacement height 48 | # returns two parameters: the scale factor (epsilon) and shear coefficient (alpha) 49 | # wind speed at any height can be calculated from: v = epsilon * h ^ alpha 50 | extrapolate_power_law = function(w2m, w10m, w50m, dh) 51 | { 52 | # assemble our three heights and wind speeds 53 | h = c(2+dh, 10+dh, 50) 54 | v = c(w2m, w10m, w50m) 55 | 56 | # linearise and perform a ls fit 57 | # weight the data at 50m more strongly 58 | logh = log(h) 59 | logv = log(v) 60 | 61 | fit = lm(logv ~ logh, weights=c(1,1,2)) 62 | 63 | # extract our coefficients 64 | # v2 / v1 = (h2 / h1) ^ alpha, therefore v2 = epsilon * h ^ alpha 65 | epsilon = exp(coef(fit)[[1]]) 66 | alpha = coef(fit)[[2]] 67 | 68 | return( c(epsilon, alpha) ) 69 | } 70 | 71 | 72 | 73 | # wrapper function to calculate the extrapolation parameters for all data points 74 | # takes the filename to be processed 75 | # returns a list of arrays, one containing each parameter returned by the chosen extrapolation function 76 | # 77 | # note - if you want to change which extrapolation function is used, change the mapply() statement, and the last segment of code (extracting & returning parameters) 78 | # 79 | extrapolate_ncdf = function(nc.filename) 80 | { 81 | 82 | ##### 83 | ## ## READ THE NECESSARY NETCDF DATA 84 | ##### 85 | 86 | # open the file 87 | nc$open_file(nc.filename) 88 | flush("Calculating wind shear using the log law -", format(nc$date)) 89 | 90 | # extract wind speed at 2, 10 and 50 metres - format is speed2m[lon, lat, time] 91 | wind2m = nc$get_speed02m() 92 | wind10m = nc$get_speed10m() 93 | wind50m = nc$get_speed50m() 94 | 95 | # extract displacement height 96 | if (nc$model == 'MERRA1') disph = nc$get_var('disph') 97 | if (nc$model == 'MERRA2') disph = nc$get_var('DISPH') 98 | 99 | # close this input file 100 | nc$close_file() 101 | 102 | 103 | 104 | 105 | 106 | ##### 107 | ## ## CALCULATE ALL EXTRAPOLATION PARAMETERS 108 | ##### 109 | 110 | # create our results storage 111 | profile = NULL 112 | 113 | # run through each hour in the data set 114 | for (h in 1:length(nc$hour)) 115 | { 116 | flush(sprintf(" %02d:00", h-1)) 117 | 118 | # process all locations in parallel 119 | wp = foreach (y = 1:length(nc$lat), .combine=cbind) %dopar% 120 | { 121 | mapply(FUN=extrapolate_log_law, wind2m[ , y, h], wind10m[ , y, h], wind50m[ , y, h], disph[ , y, h]) 122 | } 123 | 124 | # bind the results together 125 | profile = cbind(profile, wp) 126 | 127 | flush("\b\b\b\b\b\b") 128 | } 129 | 130 | clear_line() 131 | 132 | 133 | # extract the parameters from extrapolate_one(), and reshape to match our input arrays 134 | A = array(profile[1, ], dim=dim(wind2m), dimnames=dimnames(wind2m)) 135 | z = array(profile[2, ], dim=dim(wind2m), dimnames=dimnames(wind2m)) 136 | 137 | # return as a list 138 | list(A=A, z=z) 139 | } 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | ######################################################################################################################## 149 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 150 | ######################################################################################################################## 151 | 152 | 153 | ##### 154 | ## ## WIND PROFILE EXTRAPOLATION ==== OLD VERSION 155 | ##### 156 | 157 | # function that takes our speed and height data and extrapolates the wind speed up to hub height 158 | # uses the log law, deriving the roughness length from the set of observations 159 | extrapolateToHubHeight = function(row) 160 | { 161 | # assemble our data 162 | h = as.numeric(c(row["height2m"], row["height10m"], row["height50m"])) 163 | v = as.numeric(c(row["speed2m"], row["speed10m"], row["speed50m"])) 164 | 165 | hx = as.numeric(row["hubHeight"]) 166 | 167 | 168 | ### FIX ME ### 169 | # 170 | # ## this might be faster, and cleaner 171 | # h = c(row$height2m, row$height10m, row$height50m) 172 | # v = c(row$speed2m, row$speed10m, row$speed50m) 173 | # hx = row$hubHeight 174 | # 175 | # 176 | ### FIX ME ### 177 | 178 | 179 | # extrapolate to our hub height 180 | vx = extrapolateUsingLogLaw(h, v, hx) 181 | 182 | #plot(h, v, xlim=c(0,100), ylim=c(min(v)/1.3, max(v)*1.3)) 183 | #lines(1:100, A*log(1:100 / z), col="red3") 184 | #lines(1:100, epsilon*(1:100)^alpha, col="blue") 185 | #points(hx, vx, pch=16, col="purple") 186 | 187 | if (is.infinite(vx) | is.na(vx)) 188 | vx = 0 189 | 190 | return (vx) 191 | } 192 | 193 | 194 | # h = list of heights for which we have measurements 195 | # v = list of speeds at those heights 196 | # hx = hub height at which you want to know the speed 197 | extrapolateUsingLogLaw = function(h, v, hx) 198 | { 199 | # linearise and perform a ls fit 200 | # weight the data at 50m more strongly 201 | log_h = log(h) 202 | 203 | fit = lm(v ~ log_h, weights=c(1,1,2)) 204 | 205 | # extract our coefficients 206 | # v = A log(h) - A log(z) therefore slope = A, exp(-intercept/ / A) = z 207 | A = coef(fit)[[2]] 208 | z = exp(-coef(fit)[[1]] / A) 209 | 210 | # extrapolate to the desired height 211 | vx = A * log(hx / z) 212 | 213 | return (vx) 214 | } 215 | 216 | 217 | 218 | # h = list of heights for which we have measurements 219 | # v = list of speeds at those heights 220 | # hx = hub height at which you want to know the speed 221 | extrapolateUsingPowerLaw = function(h, v, hx) 222 | { 223 | # linearise and perform a ls fit 224 | # weight the data at 50m more strongly 225 | log_h = log(h) 226 | log_v = log(v) 227 | 228 | fit = lm(log_v ~ log_h, weights=c(1,1,2)) 229 | 230 | # extract our coefficients 231 | # v2 / v1 = (h2 / h1) ^ alpha, therefore v2 = epsilon * h ^ alpha 232 | epsilon = exp(coef(fit)[[1]]) 233 | alpha = coef(fit)[[2]] 234 | 235 | # extrapolate to the desired height 236 | vx = epsilon * hx ^ alpha 237 | 238 | return (vx) 239 | } 240 | 241 | 242 | 243 | -------------------------------------------------------------------------------- /lib/VWF.EXTRAS.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | 12 | # 13 | # correct wind speeds based on air density 14 | # 15 | # w = wind speed at hub height (m/s) 16 | # rho_0 = air density at ground level (kg/m^3) 17 | # z = hub height (m) 18 | # 19 | air_density_correction = function(w, rho_0, z) 20 | { 21 | # estimate air density at hub height using the international standard atmosphere 22 | rho = rho_0 - 0.00012*z 23 | 24 | # decide the exponent based on the wind speed 25 | # 1/3 up to 8 m/s, then a smooth-step function up to 2/3 above 13 m/s 26 | # this was fitted in matlab from the form x = [0, 1]; y = -2x^3 + 3x^2; 27 | m = -2/375 * w^3 + 21/125 * w^2 - 208/125 * w + 703/125 28 | m[ w < 8 ] = 1/3 29 | m[ w > 13 ] = 2/3 30 | 31 | # modify the wind speed 32 | w = w * (1.225 / rho)^m 33 | w 34 | } 35 | 36 | 37 | 38 | ######################################################################################################################## 39 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 40 | ######################################################################################################################## 41 | 42 | 43 | ##### 44 | ## ## SPEED TO POWER CONVERSION THINGS 45 | ##### 46 | 47 | 48 | 49 | # 50 | # take a time series of wind speeds and convert to load factors 51 | # 52 | # pass: 53 | # a vector of wind speeds 54 | # the power curve of the wind turbine / farm 55 | # the scale factor and offset for adjusting wind speeds 56 | # 57 | wind_speed_to_power_output = function(windSpeed, powerCurve, myScalar=1.00, myOffset=0.00) 58 | { 59 | # transform the wind speeds 60 | windSpeed = (windSpeed + myOffset) * myScalar 61 | #windSpeed = (windSpeed * myScalar) + myOffset 62 | 63 | # convert to a power curve index 64 | # (i.e. which element of powerCurve to access) 65 | index = (100 * windSpeed) + 1 66 | 67 | # constrain this to within acceptable limits 68 | index[ index < 1 ] = 1 69 | index[ index > 4000 ] = 4000 70 | 71 | return(powerCurve[index]) 72 | } 73 | 74 | 75 | # 76 | # find the fixed offset to add to wind speeds such that the resulting load factor equals energyTarget 77 | # 78 | # pass: 79 | # speed = the wind speed time series 80 | # farmCurve = the power curve you want to use for this farm 81 | # myScalar = the scalar you want to use for this farm 82 | # energyInitial = the initial load factor coming from unmodified MERRA 83 | # energyTarget = the desired load factor that we are fitting to 84 | find_farm_offset = function(speed, farmCurve, myScalar, energyInitial, energyTarget, verbose=FALSE) 85 | { 86 | myOffset = 0 87 | 88 | # decide our initial search step size 89 | stepSize = -0.64 90 | if (energyTarget > energyInitial) 91 | stepSize = 0.64 92 | 93 | repeat 94 | { 95 | # change our offset 96 | myOffset = myOffset + stepSize 97 | 98 | # calculate the yield with our farm curve 99 | mylf = wind_speed_to_power_output(speed, farmCurve, myScalar, myOffset) 100 | energyGuess = mean(mylf) 101 | 102 | if (verbose) cat("target =", sprintf("%2.2f%%", 100*energyTarget), "~ guess =", sprintf("%2.2f%%", 100*energyGuess), "~ error =", sprintf("%+2.3f%%", 100 * (energyGuess - energyTarget) / energyTarget), "% with offset =", myOffset, "\n") 103 | 104 | # if we have overshot our target, then repeat, searching the other direction 105 | # if ((guess < target & sign(step) < 0) | (guess > target & sign(step) > 0)) 106 | if (sign(energyGuess - energyTarget) == sign(stepSize)) 107 | stepSize = -stepSize / 2 108 | 109 | # if we have done enough loops then step-size is smaller than our power curve's resolution 110 | if (abs(stepSize) < 0.002) 111 | break 112 | 113 | # if we have ended up in a very strange place, quit, and spit an error later on 114 | if (myOffset < -20 | myOffset > 20) 115 | break 116 | } 117 | 118 | return( myOffset ) 119 | } 120 | 121 | 122 | # 123 | # find the scalar to multiply wind speeds by such that the resulting load factor equals energyTarget 124 | # 125 | # pass: 126 | # speed = the wind speed time series 127 | # farmCurve = the power curve you want to use for this farm 128 | # myOffset = the offset you want to use for this farm 129 | # energyInitial = the initial load factor coming from unmodified MERRA 130 | # energyTarget = the desired load factor that we are fitting to 131 | find_farm_scalar = function(speed, farmCurve, myOffset, energyInitial, energyTarget, verbose=FALSE) 132 | { 133 | myScalar = 1.00 134 | 135 | # decide our initial search step size 136 | stepSize = -0.128 137 | if (energyTarget > energyInitial) 138 | stepSize = 0.128 139 | 140 | repeat 141 | { 142 | # change our scalar 143 | myScalar = myScalar + stepSize 144 | 145 | # calculate the yield with our farm curve 146 | mylf = wind_speed_to_power_output(speed, farmCurve, myScalar, myOffset) 147 | energyGuess = mean(mylf) 148 | 149 | if (verbose) cat("target =", sprintf("%2.2f%%", 100*energyTarget), "~ guess =", sprintf("%2.2f%%", 100*energyGuess), "~ error =", sprintf("%+2.3f%%", 100 * (energyGuess - energyTarget) / energyTarget), "% with scalar =", myScalar, "\n") 150 | 151 | # if we have overshot our target, then repeat, searching the other direction 152 | # if ((guess < target & sign(step) < 0) | (guess > target & sign(step) > 0)) 153 | if (sign(energyGuess - energyTarget) == sign(stepSize)) 154 | stepSize = -stepSize / 2 155 | 156 | # if we have done enough loops then step-size is smaller than our power curve's resolution 157 | if (abs(stepSize) < 0.0002) 158 | break 159 | 160 | # if we have ended up in a very strange place, quit, and spit an error later on 161 | if (myScalar <= 0 | myScalar > 5) 162 | break 163 | } 164 | 165 | return( myScalar ) 166 | } 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | # 176 | # do what it says on the tin 177 | # 178 | plot_power_curve_and_wind_speed = function(speed, myCurve, myOffset, mySpread) 179 | { 180 | x = seq(0, 40, 0.01) 181 | y = myCurve 182 | y[y==0] = NA 183 | 184 | # plot the density of wind speeds 185 | ds = density(speed) 186 | plot(ds$x, 4*ds$y, type='h', col='grey85', xlab="MERRA wind speed (m/s)", ylab="Load factor", lwd=2, main=paste(myName, "=", myModel), xlim=c(0, 30), ylim=c(0, 1.01)) 187 | 188 | # the original power curve 189 | lines(x, y, lwd=2) 190 | 191 | # our convoluted power curve 192 | farmCurve = convoluteFarmCurve(myCurve, myOffset, mySpread) 193 | x = x / myScalar 194 | lines(x, farmCurve, col="red3") 195 | } 196 | 197 | 198 | # plot_power_curve_and_wind_speed_alt = function(speed, myCurve, myOffset, mySpread) 199 | # { 200 | # x = seq(0, 40, 0.01) 201 | # y = myCurve 202 | # y[y==0] = NA 203 | # 204 | # # plot the density of wind speeds 205 | # ds = density(speed) 206 | # plot(ds$x, 4*ds$y, type='h', col='grey85', xlab="MERRA wind speed (m/s)", ylab="Load factor", lwd=2, main=paste(myName, "=", myModel), xlim=c(0, 30), ylim=c(0, 1.01)) 207 | # 208 | # modspeed = (speed + myOffset) * myScalar 209 | # ds = density(modspeed) 210 | # lines(ds$x, 4*ds$y, col='grey45', lwd=3) 211 | # 212 | # # the original power curve 213 | # lines(x, y, lwd=2) 214 | # 215 | # # our convoluted power curve 216 | # farmCurve = convoluteFarmCurve(myCurve, 0, mySpread) 217 | # lines(x, farmCurve, col="red3") 218 | # } 219 | 220 | 221 | 222 | 223 | 224 | 225 | ######################################################################################################################## 226 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 227 | ######################################################################################################################## 228 | 229 | ##### 230 | ## ## RESHAPE DATA 231 | ##### 232 | 233 | # 234 | # function to reshape a wide format matrix (with only row and column numbers as descriptors) 235 | # to long format, optionally specifying the labels to assign to each row and column number. 236 | # 237 | reshapeData = function(myMatrix, myRowLabels = FALSE, myColLabels = FALSE) 238 | { 239 | # check how many variables we have 240 | nRows = nrow(myMatrix) 241 | nCols = ncol(myMatrix) 242 | 243 | if (!identical(myRowLabels, FALSE) & nRows != length(myRowLabels)) 244 | { 245 | cat("reshapeData -- warning: number of rows in myMatrix does not equal the number of myRowVars labels\n"); 246 | myRowVars = FALSE 247 | } 248 | 249 | if (!identical(myColLabels, FALSE) & nCols != length(myColLabels)) 250 | { 251 | cat("reshapeData -- warning: number of columns in myMatrix does not equal the number of myColVars labels\n"); 252 | myColVars = FALSE 253 | } 254 | 255 | # invent row and column labels if you didn't 256 | if (identical(myRowLabels, FALSE)) 257 | myRowLabels = 1:nRows 258 | if (identical(myColLabels, FALSE)) 259 | myColLabels = 1:nCols 260 | 261 | 262 | # create a long list of all variable combinations 263 | allRowsList = rep(1:nRows, times=nCols) # 1 2 3 1 2 3 1 2 3 1 2 3 264 | allColsList = rep(1:nCols, each=nRows) # A A A B B B C C C D D D 265 | 266 | # create matrices of row and column vars 267 | matrixRows = matrix(myRowLabels[allRowsList], nRows, nCols) 268 | matrixCols = matrix(myColLabels[allColsList], nRows, nCols) 269 | 270 | 271 | # create our final long format list 272 | return ( data.frame( row=as.vector(matrixRows), col=as.vector(matrixCols), value=as.vector(myMatrix) ) ) 273 | } 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | ######################################################################################################################## 286 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 287 | ######################################################################################################################## 288 | 289 | ##### 290 | ## ## INTERPOLATE AND CONVOLUTE POWER CURVES 291 | ##### 292 | 293 | # 294 | # function to turn a low resolution power curve file (e.g. 0..40 m/s in 1 m/s steps) 295 | # into the kind we need, with 0.01 m/s resolution, using cubic spline interpolation 296 | # 297 | interpolateTable = function(oldData, newX) 298 | { 299 | # remember our old x index 300 | oldX = oldData[ , 1]; 301 | 302 | # create a matrix to hold our new data 303 | newData = mat.or.vec(length(newX), ncol(oldData)); 304 | 305 | # fill our new x axis 306 | newData[ , 1] = newX; 307 | 308 | 309 | # run through each column.. 310 | for (i in 2:ncol(oldData)) 311 | { 312 | # extract the power curve 313 | oldY = oldData[ , i]; 314 | yMax = max(oldY); 315 | 316 | # create a spline.. but ignore any zeros past the upper cut-off speed 317 | # because the spline goes nuts when there's a step change.. 318 | s = spline(oldX[oldX<10 | oldY>0], oldY[oldX<10 | oldY>0], method="fmm", xout=newX); 319 | 320 | # recover some dignity 321 | # clip numbers at zero and the original max (with 0.1% leeway) 322 | s$y[s$y < yMax * 0.001] = 0; 323 | s$y[s$y > yMax * 0.999] = yMax; 324 | 325 | # reinstate the lower cutoff limit 326 | cutoff = min(which(oldY > 0)); # finds which array index has the first non-zero speed 327 | cutoff = oldX[cutoff] - 0.01; # finds the speed corresponding to this 328 | if (cutoff > 3.5) cutoff = 3.5; # none too high 329 | if (cutoff < 2.5) cutoff = 2.5; # none too low 330 | s$y[s$x < cutoff] = 0; 331 | 332 | # reinstate the upper cutoff limit 333 | cutoff = max(which(oldY > 0)); 334 | cutoff = oldX[cutoff] + 0.01; 335 | s$y[s$x > cutoff] = 0; 336 | 337 | # and save 338 | newData[ , i] = s$y; 339 | } 340 | 341 | # convert to a data frame 342 | newData = data.frame(newData) 343 | colnames(newData) = colnames(oldData) 344 | 345 | return (newData); 346 | 347 | } 348 | 349 | 350 | 351 | # 352 | # function to turn a 0..40 m/s turbine power curve into an aggregate farm curve convoluted by N(mean, sd) 353 | # 354 | convoluteFarmCurve = function(myCurve, myMean, mySD) 355 | { 356 | # define our output resolution 357 | resolution = 0.01 358 | real_x = seq(0, 40, 0.01) 359 | 360 | # pad our curve sufficiently for the filtering 361 | x = seq(-10, 50, 0.01) 362 | y = c(rep(0, 1000), myCurve, rep(0, 1000)) 363 | 364 | # build a gaussian filter 365 | w = 10 366 | convolver.x = seq(-w, w, 0.01) 367 | convolver.y = dnorm(convolver.x, myMean, mySD) 368 | convolver.y = convolver.y / sum(convolver.y) 369 | 370 | smooth_y = filter(y, convolver.y, sides=2) 371 | 372 | x_range = which(x == min(real_x)) : which(x == max(real_x)) 373 | 374 | return(smooth_y[x_range]) 375 | } 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | ######################################################################################################################## 384 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## 385 | ######################################################################################################################## 386 | 387 | 388 | ##### 389 | ## ## WIND SPEED TEMPORAL INTERPOLATION AND NOISE INJECTION 390 | ##### 391 | 392 | interpolate_wind_speed = function(windSpeed, resolution=30) 393 | { 394 | ## get rid of fools 395 | if (resolution > 30) 396 | { 397 | cat("interpolate_wind_speed: doing nothing because resolution is greater than 30 minutes...\n") 398 | return(windSpeed) 399 | } 400 | 401 | 402 | ## 403 | ## build our new storage 404 | steps = (60 / resolution) 405 | if (steps %% 1 != 0) 406 | stop('interpolate_wind_speed: must have an integer number of steps within each hour -- resolution =' %&% resolution %&% 'minutes just nah work!\n') 407 | 408 | ws = windSpeed 409 | for (i in 2:steps) 410 | ws = rbind(ws, windSpeed) 411 | 412 | 413 | 414 | ## 415 | ## generate the new date sequence 416 | 417 | # create originals 418 | dates = ymd_hm(windSpeed$Timestamp) 419 | index = seq_along(dates) * steps 420 | index = index - (steps - 1) 421 | 422 | # start building big versions 423 | datesAll = dates 424 | indexAll = index 425 | 426 | for (i in 1:(steps-1)) 427 | { 428 | shift = i * resolution 429 | datesAll = c(datesAll, dates + minutes(shift)) 430 | indexAll = c(indexAll, index + i) 431 | } 432 | 433 | # interleave them 434 | datesAll = datesAll[ order(indexAll) ] 435 | 436 | # generate the sequence 437 | ws[ , 1] = datesAll 438 | 439 | 440 | 441 | ## 442 | ## do the interpolation 443 | 444 | # original data is on the half-hour 445 | x1 = seq(1, nrow(windSpeed)) - 0.5 446 | 447 | # new data will be the average of each half-hour - i.e. on the quarter hour 448 | x2 = (seq(1, nrow(ws)) - 0.5) / steps 449 | 450 | # shower down 451 | for (i in 2:ncol(windSpeed)) 452 | { 453 | # ignore if all NA 454 | if (sum(!is.na(ws[ , i])) == 0) next 455 | # spline otherwise 456 | ws[ , i] = spline(x1, windSpeed[ , i], xout=x2)$y 457 | } 458 | 459 | return(ws) 460 | } 461 | 462 | 463 | inject_noise_into_wind_speeds = function(windSpeed, stdev) 464 | { 465 | len = dim(windSpeed)[1] * dim(windSpeed)[2] 466 | noise = rnorm(len, 1.000, stdev) 467 | 468 | windSpeed * noise 469 | } 470 | 471 | -------------------------------------------------------------------------------- /lib/VWF.FARMS.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | 12 | ####################################################################################################################### 13 | ## 14 | ## global storage: 15 | ## 16 | ## farms = data frame of wind farms 17 | ## essential columns are name, lon, lat, height, model 18 | ## 19 | ## 20 | ## reading in wind farm csv data: 21 | ## 22 | ## prepare_windfarms() - check all your wind farms are valid and prepares them for use 23 | ## prepare_farms_files() 24 | ## 25 | ## plotting and locating farms: 26 | ## 27 | ## plot_farms(wf, cap.weight=FALSE, add.points=FALSE, ...) 28 | ## 29 | ## define_farm_region(wf, margin=10) 30 | ## 31 | ## find_nearest_farm(myfarm, wf, cols) 32 | ## find_farm_outliers(wf) 33 | ## locate_farm_countries(wf) 34 | ## 35 | 36 | 37 | 38 | # THIS IS NEXT LEVEL VERSION 5 SHIT 39 | 40 | # 41 | # READS IN THE WIND FARMS 42 | # CHECKS THEY ARE VALID 43 | # RETURNS THEM 44 | # 45 | prepare_windfarms = function(windFarmInfo, powerCurveFile = 'E:/WORK/Wind Modelling/~ Wind Turbine Power Curves/R/Wind Turbine Power Curves ~ 5 (0.01ms with 0.00 w smoother).csv') 46 | { 47 | 48 | # read in the .r file that gives the csv file path and the column names 49 | if (!file.exists(windFarmInfo)) 50 | stop("prepare_windfarms: Cannot find your wind farms - please specify a different windFarmInfo:\n", windFarmInfo, "\n\n") 51 | 52 | source(windFarmInfo) 53 | 54 | 55 | 56 | # read in details on our wind farms 57 | wf = read_csv(fnchk(windFarmFile)) 58 | colnames(wf) = windFarmCols 59 | 60 | 61 | # check our wind farm files have all the necessary columns 62 | essentialWindFarmCols = c('name', 'lon', 'lat', 'height', 'power_curve') 63 | 64 | check = essentialWindFarmCols %in% windFarmCols 65 | if (sum(check) < length(check)) stop("prepare_windfarms: You must have the following columns in your wind farms file:\n", paste(essentialWindFarmCols, collapse=', '), "\n\n") 66 | 67 | # and those columns have valid input 68 | for (e in essentialWindFarmCols) 69 | { 70 | bad = is.na(wf[ , e]) 71 | if (sum(bad) > 0) stop(" prepare_windfarms: You must have the complete data for " %&% e %&% " in your wind farms file:\n\n") 72 | } 73 | 74 | 75 | # assign default capacity if none is specified 76 | if (is.null(wf$capacity)) 77 | { 78 | cat(" prepare_windfarms: Farm capacity is not specified - setting all farms to 1 MW...\n") 79 | wf$capacity = 1 80 | } 81 | 82 | # assign default performance ratio if none is specified 83 | if (is.null(wf$PR)) 84 | { 85 | cat(" prepare_windfarms: Farm performance ratio is not specified - setting all farms to 100% (i.e. no bias correction)...\n") 86 | wf$capacity = 1 87 | } 88 | 89 | # assign default start date if none is specified 90 | if (is.null(wf$date)) 91 | { 92 | cat(" prepare_windfarms: Farm start date is not specified - setting all farms to be newborns...\n") 93 | wf$date = today() 94 | } 95 | 96 | 97 | # check our date formats -- dmy_hm -> dmy 98 | fix = grep(':', wf$date) 99 | if (length(fix) > 0) 100 | { 101 | wf$date[fix] = get_text_before(wf$date[fix], ' ') 102 | } 103 | 104 | # check our date formats -- ymd -> dmy 105 | fix = grep('-', wf$date) 106 | if (length(fix) > 0) 107 | { 108 | d = ymd(wf$date[fix]) 109 | wf$date[fix] = format(d, '%d/%m/%Y') 110 | } 111 | 112 | 113 | 114 | 115 | # the file our power curves live in 116 | powerCurveFile = fnchk(powerCurveFile, stopOnError=FALSE) 117 | if (!file.exists(powerCurveFile)) stop(" prepare_windfarms: Cannot find your power curves - please specify a different powerCurveFile:\n", powerCurveFile, "\n\n") 118 | curveNames = read.csv(powerCurveFile, nrows=1) 119 | curveNames = colnames(curveNames) 120 | 121 | ## TODO: HACK 122 | #repower_hack = (wf$power_curve == 'REpower 3.4M 104') 123 | #wf$power_curve[repower_hack] = 'REpower 3.4M104' 124 | 125 | 126 | err0r = 0 127 | for (i in 1:nrow(wf)) 128 | { 129 | myModel = wf$power_curve[i] 130 | myModel = make.names(myModel) 131 | 132 | if ( !(myModel %in% curveNames) ) 133 | { 134 | cat (" prepare_windfarms: Couldn't find a power curve for", myModel, "\n") 135 | err0r = 1 136 | } 137 | } 138 | if (err0r) 139 | { 140 | cat ("prepare_windfarms: valid names are:\n") 141 | print(curveNames) 142 | stop("\n") 143 | } 144 | 145 | 146 | # convert wind farm names into valid column names (to match those in the wind speed file) 147 | wf$name = make.unique( make.names(wf$name) ) 148 | 149 | 150 | # simplify the farm heights to speed up the extrapolation stage 151 | # have these sort-of equally spaced in log terms (~0.015 resolution in log10) 152 | h = wf$height 153 | 154 | h[ h >= 0 ] = 0.5 * round(h[ h >= 0 ] / 0.5) 155 | h[ h >= 30 ] = 1 * round(h[ h >= 30 ] / 1) 156 | h[ h >= 60 ] = 2 * round(h[ h >= 60 ] / 2) 157 | h[ h >= 90 ] = 3 * round(h[ h >= 90 ] / 3) 158 | h[ h >= 120 ] = 4 * round(h[ h >= 120 ] / 4) 159 | 160 | wf$height = h 161 | 162 | 163 | # and return 164 | wf 165 | 166 | } 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | ####################################################################################################################### 175 | ## 176 | ## read a list of farms and coordinates 177 | ## optionally specify the column names, if they are not the default 178 | ## 179 | ## return the data 180 | ## 181 | read_farms_file = function(filename, colnames=NA) 182 | { 183 | # read the data 184 | data = read_csv(filename) 185 | 186 | # if colnames=NA, define default column names 187 | if (sum(is.na(colnames)) > 0) 188 | colnames = c('id', 'region', 'name', 'capacity', 'lat', 'lon', 'model', 'height', 'nTurbines') 189 | 190 | # check that the names match the data 191 | if (ncol(data) != length(colnames)) 192 | stop(" read_farms_file -- data has ", ncol(data), " columns, but you want to name ", length(colnames), " of them!\n\n") 193 | 194 | # warn people if we don't have what I consider to be the necessary columns 195 | essentialCols = c('lon', 'lat', 'height', 'name'); 196 | check = essentialCols %in% colnames; 197 | 198 | if (sum(check) < length(check)) 199 | cat(" read_farms_file -- Woah, you ought to have the following columns in your coordinates files:\n", essentialCols, "\n\n"); 200 | 201 | # apply 202 | colnames(data) = colnames 203 | return(data) 204 | } 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | ####################################################################################################################### 213 | ## 214 | ## define the outer boundary box that contains a set of wind farm coordinates 215 | ## box will contain all lat and lon coordinates within wf, plus a buffer of 'margin' degrees 216 | ## the default margin of 10 degrees around all locations gives a minimal error when loessing 217 | ## 218 | ## 219 | ## returns a list: extent$lon[1:2] and extent$lat[1:2] 220 | ## 221 | define_farm_region = function(wf, margin=10) 222 | { 223 | lonBounds = range(wf$lon); 224 | lonBounds = range(c(lonBounds-margin, lonBounds+margin)); 225 | if (lonBounds[1] < -180) lonBounds[1] = -180; 226 | if (lonBounds[1] > 180) lonBounds[1] = 180; 227 | 228 | latBounds = range(wf$lat); 229 | latBounds = range(c(latBounds-margin, latBounds+margin)); 230 | if (latBounds[1] < -90) latBounds[1] = -90; 231 | if (latBounds[1] > 90) latBounds[1] = 90; 232 | 233 | return( list(lon=lonBounds, lat=latBounds) ); 234 | } 235 | 236 | 237 | 238 | 239 | ####################################################################################################################### 240 | ## 241 | ## plot a set of wind farms on a map 242 | ## optionally set cap.weight to a number to scale points by farm capacity (bigger number = bigger points) 243 | ## optionally set other image properties (inherited from __inc.r:plot_map_points) 244 | ## optionally add points to an existing plot 245 | ## 246 | ## return the aspect ratio for saving an image 247 | ## 248 | plot_farms = function(wf, cap.weight=1, add.points=FALSE, ...) 249 | { 250 | # imported from __inc.r 251 | # plot_map_points = function(lon, lat, lonBounds=NA, latBounds=NA, padding=1, 252 | # mapBorder="darkgrey", mapFill="lightgrey", 253 | # mapAspect=1.5, shapeFile=NA, 254 | # resetPar=FALSE, blank=FALSE, ...) 255 | 256 | if (add.points == FALSE) 257 | { 258 | # plot all points the same size 259 | if (cap.weight == FALSE) 260 | ar = plot_map_points(wf$lon, wf$lat, ...) 261 | 262 | # size points proportional to capacity 263 | if (cap.weight != FALSE) 264 | { 265 | cap = wf$capacity 266 | cap = sqrt(cap * cap.weight / mean(wf$capacity)) 267 | 268 | ar = plot_map_points(wf$lon, wf$lat, cex=cap, lwd=cap*2, lend=1, ...) 269 | } 270 | 271 | # return the ideal height:width ratio for saving a PNG 272 | return(ar) 273 | 274 | } 275 | 276 | 277 | if (add.points == TRUE) 278 | { 279 | # plot all points the same size 280 | if (cap.weight == FALSE) 281 | points(wf$lon, wf$lat, ...) 282 | 283 | # size points proportional to capacity 284 | if (cap.weight != FALSE) 285 | { 286 | cap = wf$capacity 287 | cap = sqrt(cap * cap.weight / mean(wf$capacity)) 288 | 289 | points(wf$lon, wf$lat, cex=cap, lwd=cap*2, lend=1, ...) 290 | } 291 | 292 | return() 293 | 294 | } 295 | 296 | } 297 | 298 | 299 | 300 | ####################################################################################################################### 301 | ## 302 | ## find the farm in wf that is physically closest to myfarm 303 | ## return a data frame showing the supplied cols for both farms 304 | ## 305 | find_nearest_farm = function(myfarm, wf, cols = c('name', 'capacity', 'height', 'lat', 'lon')) 306 | { 307 | w = which.min( (myfarm$lat - wf$lat)^2 + (myfarm$lon - wf$lon)^2) 308 | rbind(z[i, cols], o[w, cols]) 309 | } 310 | 311 | 312 | 313 | ####################################################################################################################### 314 | ## 315 | ## analyse a set of farm coordinates 316 | ## draw a map, highlighting any outliers 317 | ## 318 | find_farm_outliers = function(wf) 319 | { 320 | padding = 1 321 | 322 | # what are potentially sensible boundaries? 323 | latInner = quantile(wf$lat, c(0.01, 0.99)) 324 | latInner = range(latInner - padding, latInner + padding) 325 | 326 | lonInner = quantile(wf$lon, c(0.01, 0.99)) 327 | lonInner = range(lonInner - padding, lonInner + padding) 328 | 329 | 330 | # how many farms lie outside these boundaries? 331 | f1 = (wf$lat < latInner[1] | wf$lat > latInner[2]) 332 | f2 = (wf$lon < lonInner[1] | wf$lon > lonInner[2]) 333 | filter = f1 | f2 334 | 335 | if (sum(filter) == 0) 336 | { 337 | cat("\nDon't think I found any silly outliers..\n") 338 | return() 339 | } 340 | 341 | 342 | # show ones that aren't.. 343 | # don't reset par so that we can add more points later 344 | plot_farms(wf, pointCol='darkgreen', resetPar=FALSE) 345 | 346 | # highlight the dodgy points 347 | points(wf[filter, "lon"], wf[filter, "lat"], pch=21, cex=1.2, col="red3", bg="pink") 348 | 349 | # identify them 350 | cat("\nFound", sum(filter), "potential outliers..") 351 | wf[filter, ] 352 | junk = readline(prompt = "Press ENTER to continue...") 353 | } 354 | 355 | 356 | 357 | ####################################################################################################################### 358 | ## 359 | ## locate which country each farm resides in - separating onshore and offshore 360 | ## 361 | ## return a data frame of coordinates with country and region 362 | ## 363 | locate_farm_countries = function(wf) 364 | { 365 | source("E:/WORK/Code/R/Coordinates to Countries.r"); 366 | 367 | # get our map from a custom shapefile which contains EN, WA, SC, NI 368 | shapeFile = 'E:/WORK/Z Data/Maps/Natural Earth/Admin Map Subunits/ne_10m_admin_0_map_subunits.shp' 369 | 370 | # set up our coordinates 371 | points = data.frame(lon = wf$lon, lat = wf$lat) 372 | 373 | # get a list of country names 374 | points = coords2country(points, offshore=TRUE, shapeFile) 375 | 376 | return(points) 377 | } 378 | 379 | 380 | 381 | -------------------------------------------------------------------------------- /lib/VWF.MAIN.PREP.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | ##### 12 | ## ## BIAS CORRECTION FUNCTIONS 13 | ##### 14 | 15 | 16 | # decide where to get performance ratio data for each farm 17 | # either set PR_column to be a column name in WindFarmFile to use the csv data 18 | # or set to NULL to use hard-coded defaults... 19 | PR_column = 'PR' 20 | 21 | 22 | 23 | # determine the scalar for each farm (if we are using fixed scalars) 24 | # this gives the number that we multiply all wind speeds by, before finding the offset which matches our desired capacity factor 25 | determine_farm_scalar = function(PR, iso='') 26 | { 27 | # if we are using the same scalar for every farm 28 | if (match_method == 1) 29 | scalar = 0.65 30 | 31 | # if we are basing the scalar on the farm's PR 32 | if (match_method == 2) 33 | { 34 | scalar = (scalar_alpha * PR) + scalar_beta 35 | 36 | # some crazy post-hacking from the Christian Grams era... 37 | # updated for Axpo 2017... 38 | # this relies on having 0.50 x 0.333 as your parameters 39 | if (iso == 'DE') scalar = scalar + 0.10 40 | if (iso == 'DK') scalar = scalar + 0.05 41 | if (iso == 'ES') scalar = scalar - 0.05 42 | if (iso == 'FR') scalar = scalar - 0.03 43 | if (iso == 'GB') scalar = scalar - 0.06 44 | if (iso == 'GR') scalar = scalar + 0.12 45 | if (iso == 'IE') scalar = scalar + 0.05 46 | if (iso == 'IT') scalar = scalar - 0.08 47 | if (iso == 'SE') scalar = scalar + 0.46 48 | 49 | # WTF with Sweden ey? 50 | } 51 | 52 | return(scalar) 53 | } 54 | 55 | 56 | 57 | # determine the offset for each farm (if we are using fixed offsets) 58 | # this gives the number we subtract from all wind speeds (in m/s), before finding the scalar which matches our desired capacity factor 59 | determine_farm_offset = function(PR) 60 | { 61 | # if we are using the same scalar for every farm 62 | if (match_method == 3) 63 | offset = 1.00 64 | 65 | # if we are basing the scalar on the farm's PR 66 | if (match_method == 4) 67 | offset = 3.00 * PR^0.333 / 0.85^0.333 68 | 69 | return(offset) 70 | } 71 | 72 | 73 | 74 | # which matching method should we use? 75 | # 76 | # 1 = fixed scalar same for every farm, find offset <--- this is the first method i used for the decline paper 77 | # 2 = fixed scalar from each farm's PR, find offset 78 | # 3 = fixed offset same for every farm, find scalar 79 | # 4 = fixed offset from each farm's PR, find scalar <--- this is the method being used in the most recent code 80 | # 81 | # offsets will add or subtract a fixed number to all wind speeds (in m/s) 82 | # scalars will multiply all speeds by a constant (dimensionless) 83 | # 84 | # here we set the default value :) 85 | # 86 | if (!exists('match_method')) match_method = 2 87 | 88 | 89 | 90 | # specify the standard deviation of the convolution 91 | # used to create the multi-turbine farm curve 92 | # (this should be improved!) 93 | get_power_curve_convolution_spread = function(farm) 94 | { 95 | return(convolverStdDev) 96 | 97 | # check either tCount or nTurbines 98 | # and adjust based on that 99 | } 100 | 101 | 102 | 103 | farms_inflate_offshore_pr = function(farms, offshore_factor=1.16, column='offshore') 104 | { 105 | if (column %notin% colnames(farms)) 106 | stop("farms_inflate_offshore_pr: cannot find the " %&% column %&% " column...\n") 107 | 108 | onshore = farms[ , column] %in% c('No', 'no') 109 | offshore = !onshore 110 | 111 | farms$PR[offshore] = farms$PR[offshore] * offshore_factor 112 | 113 | farms 114 | } 115 | 116 | 117 | 118 | 119 | ##### 120 | ## ## CHECK OUR INPUTS AND OUTPUTS 121 | ##### 122 | 123 | # check we can save files 124 | if (substr_reverse(baseSaveFolder, 1) != '/') 125 | baseSaveFolder = baseSaveFolder %&% '/' 126 | 127 | if (!dir.exists(baseSaveFolder)) 128 | { 129 | ok = dir.create(baseSaveFolder) 130 | 131 | if (!ok) 132 | stop("Could not create folder to save files in:", baseSaveFolder, "\n") 133 | } 134 | 135 | 136 | 137 | 138 | ##### 139 | ## ## SORT OUT OUR WIND FARM DATA 140 | ##### 141 | 142 | ## 143 | ## read in our wind farms data 144 | ## 145 | 146 | flush("> Preparing your wind farms...\n") 147 | 148 | windFarms = prepare_windfarms(farmOptionsFile) 149 | 150 | # inflate offshore PR 151 | if (inflate_offshore_pr) 152 | { 153 | windFarms = farms_inflate_offshore_pr(windFarms, 1.16) 154 | } 155 | 156 | 157 | # calculate the boundaries for lat and long that we're interested in 158 | # a margin of 10 degrees around all farms should eliminate interpolation errors 159 | region = define_farm_region(windFarms, margin=6.666) 160 | 161 | 162 | ## TODO: FIX -- SHOULD CHECK IF THIS REALLY NEEDS TO BE 6.666 DEGREES... 163 | 164 | 165 | # plot on a map to check we're being sensible 166 | if (a_few_plots) 167 | farm_plot_aspect = plot_farms(windFarms, pch='+', col='green3', cap.weight=TRUE) 168 | 169 | 170 | 171 | # make a list of all wind farm heights 172 | all_heights = sort(unique(windFarms$height)) 173 | 174 | 175 | 176 | 177 | ## 178 | ## read in power curve data 179 | ## 180 | 181 | # read in power curves for each turbine model 182 | # this must run from 0 to 40 m/s - and will be interpolated to 0.01 m/s steps 183 | turbCurve = read_csv(turbCurveFile) 184 | turbCurveNames = colnames(turbCurve)[-1] 185 | 186 | farmCurve = read_csv(farmCurveFile) 187 | farmCurveNames = colnames(farmCurve)[-1] 188 | 189 | 190 | # high-res interpolation 191 | turbCurve = interpolateTable(turbCurve, seq(0,40,0.01)) 192 | farmCurve = interpolateTable(farmCurve, seq(0,40,0.01)) 193 | 194 | 195 | 196 | 197 | # determine whether we need special measures for crossing the international date line... 198 | dateLineMadness = FALSE 199 | 200 | if (min(region$lon) < -180 | max(region$lon) > 180) 201 | { 202 | dateLineMadness = TRUE 203 | 204 | # remember our original lon range 205 | region_lon = region$lon 206 | if (region_lon[1] < -180) region_lon[1] = region_lon[1] + 360 207 | if (region_lon[2] > 180) region_lon[2] = region_lon[2] - 360 208 | 209 | # grab the entire range of lon from our NetCDF files 210 | region$lon = c(-180, 180) 211 | } 212 | 213 | 214 | 215 | 216 | 217 | 218 | ##### 219 | ## ## SORT OUT OUR MERRA WIND DATA 220 | ##### 221 | 222 | if (is.null(windSpeedFile)) 223 | { 224 | # find and prepare all our extrapolation data files 225 | flush("> Locating MERRA data") 226 | merra_wind = prepare_merra_files(merraFolders, merra_grouping, processNewestFirst) 227 | flush(" - Found", length(merra_wind$files), "days...\n") 228 | 229 | 230 | # create a NetCDF file handler for the wind data 231 | fn = merra_wind$files[1] 232 | nc_wind = NetCdfClass(fn, reanalysis) 233 | 234 | 235 | # set up the region we want to read (so that future reads are faster) 236 | nc_wind$subset_coords(region) 237 | 238 | 239 | flush("> Filtering NetCDF dataset down to", round(100 * diff(region$lon) * diff(region$lat) / 360 / 180, 1), "% of the earth's surface..\n") 240 | } 241 | 242 | 243 | 244 | 245 | ##### 246 | ## ## SORT OUT OUR DATA OBJECTS FOR INTERPOLATION AND EXTRAPOLATION 247 | ##### 248 | 249 | ## INTERPOLATION METHOD 250 | 251 | # either 'akima' or 'loess' 252 | spatial.method = 'akima' 253 | 254 | 255 | if (is.null(windSpeedFile)) 256 | { 257 | 258 | if (spatial.method == 'loess') 259 | { 260 | ### loess and tps require data to be passed as a 1d vector, whereas the merra data comes as a 2d array [lon, lat] 261 | ### so we pre-allocate some storage in 'long format' as a keyed vector 262 | 263 | # read in a merra variable 264 | loess_w = nc_wind$get_var('A') 265 | loess_w = loess_w[ , , 14] 266 | nc_wind$close_file() 267 | 268 | 269 | # create long-format storage for each of our input variables (holding all grid points for a single hour) 270 | # FIXME i should be using melt for this, i now know ;) 271 | loess_w = reshapeData(loess_w, nc_wind$lon, nc_wind$lat) 272 | colnames(loess_w) = c("lon", "lat", "value") 273 | 274 | # required by thin plate splines (fastTps) 275 | tps_xy_in = as.matrix(loess_w[ , 1:2]) 276 | tps_xy_out = as.matrix(windFarms[ , c('lon', 'lat')]) 277 | 278 | 279 | ### loess requires a fitting paramter - how many nearest neighbours to include in the fit 280 | 281 | # determine our loess span based on the geographic area we cover 282 | # 12 merra grid points gives almost the minimum RMS error (10 or 11 is better, but slower) 283 | loessSpan = 12 / nrow(loess_w) 284 | 285 | ### FIXME ### IMPROVE THIS ### FIXME ### 286 | loessSpan = sqrt(0.1 * loessSpan) 287 | } 288 | 289 | 290 | 291 | 292 | # if we are crossing the date line, set up the way we will process our data 293 | if (dateLineMadness) 294 | { 295 | # figure out where we want both sides of the date-line 296 | dateLineWest = which(nc_wind$lon >= region_lon[1]) 297 | dateLineEast = which(nc_wind$lon <= region_lon[2]) 298 | dateLineDims = c(nc_wind$lon[dateLineWest], nc_wind$lon[dateLineEast] + 360) 299 | originalDims = nc_wind$lon 300 | } 301 | 302 | } 303 | 304 | 305 | 306 | 307 | 308 | 309 | ##### 310 | ## ## LAUNCH THE MULTI-CORE CLUSTER 311 | ##### 312 | 313 | # build a multi-core parallel cluster 314 | if (exists('cl')) 315 | { 316 | flush("> Reusing existing", length(cl), "core cluster...\n") 317 | 318 | } else { 319 | 320 | flush("> Building", n.cores, "core cluster...\n") 321 | cl = makeCluster(n.cores) 322 | registerDoParallel(cl) 323 | } 324 | 325 | if (spatial.method == 'akima') 326 | { 327 | library(akima) 328 | clusterExport(cl, varlist='bicubic') 329 | } 330 | 331 | if (spatial.method == 'loess') 332 | { 333 | library(MASS) 334 | } 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | # the VWF model should now be ready to run... 343 | -------------------------------------------------------------------------------- /lib/VWF.MAIN.PROGRESS.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | # initialise 12 | library(lubridate) 13 | all_days = NULL 14 | big_loop = 0 15 | 16 | # chart functions 17 | chart_coords = function(dates) 18 | { 19 | if (merra_grouping == 'monthly') 20 | { 21 | all_groups = ymd(floor_date(dates, 'month')) 22 | } 23 | 24 | if (merra_grouping == 'quarterly') 25 | { 26 | y = year(dates) 27 | m = month(dates) 28 | m = 1 + (floor((m-1)/ 3) * 3) 29 | all_groups = ymd( paste(y, m, '01') ) 30 | } 31 | 32 | all_d = (dates - all_groups + 1) / ddays(1) 33 | 34 | data.frame(x=all_groups, y=all_d) 35 | } 36 | 37 | 38 | 39 | while (1) 40 | { 41 | # search for progress files... 42 | fn = list.files(baseSaveFolder, pattern='~progress~', full.names=TRUE) 43 | 44 | # sit and wait... 45 | if (length(fn) == 0) 46 | { 47 | clear_line('No progress files found...') 48 | Sys.sleep(update_time) 49 | next 50 | } 51 | 52 | 53 | # count how many good files we have 54 | all_progress = NULL 55 | 56 | # read and save progress in each 57 | for (i in 1:length(fn)) 58 | { 59 | progress = readLines(fn[i]) 60 | progress = suppressWarnings(ymd(progress)) 61 | progress = progress[ !is.na(progress) ] 62 | all_progress = c(all_progress, format(progress)) 63 | } 64 | 65 | 66 | # GUI 67 | if (big_loop == 0) 68 | { 69 | # build the list of all dates 70 | all_days = readLines(fn[i])[2] 71 | date_1 = get_text_before(all_days, ' to ') 72 | date_2 = get_text_after(all_days, ' to ') 73 | all_days = seq(ymd(date_1), ymd(date_2), 'day') 74 | 75 | # establish a clean plot 76 | all_coords = chart_coords(all_days) 77 | plot(all_coords, cex=1.5, pch=15, col='grey80', xlab='Month', ylab='Day') 78 | 79 | all_years = seq(ymd(date_1), ymd(date_2), 'year') 80 | axis(1, labels=FALSE, at=all_years) 81 | axis(3, labels=FALSE, at=all_years) 82 | } 83 | 84 | # add on our progress 85 | good_coords = chart_coords(ymd(all_progress)) 86 | points(good_coords, cex=1.5, pch=15) 87 | 88 | 89 | 90 | # TUI 91 | n = length(all_progress) 92 | N = nrow(all_coords) 93 | p = sprintf("%1.2f%%", 100*n/N) 94 | msg = paste0('[', n, ' of ', N, '] ', p) 95 | 96 | clear_line(msg) 97 | 98 | 99 | # wait... 100 | big_loop = big_loop + 1 101 | Sys.sleep(update_time) 102 | 103 | } 104 | 105 | -------------------------------------------------------------------------------- /lib/VWF.MAIN.RESULTS.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | 12 | ##### 13 | ## ## SAVE HOURLY FILES 14 | ##### 15 | 16 | flush('> Saving results...\n') 17 | 18 | 19 | 20 | # save the bias corrected wind speeds for each farm 21 | if (save_modified_wind_speeds) 22 | { 23 | fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.corrected.' %&% xtn 24 | write_data(windSpeed, fn, xtn) 25 | flush(' ~ Written', fn, '\n') 26 | } 27 | 28 | 29 | 30 | # save the hourly capacity factors for every farm 31 | if (save_hourly_farm_cf) 32 | { 33 | fn = baseSaveFolder %&% baseSaveFile %&% '(a).farm.MW.' %&% xtn 34 | write_data(powerMW, fn, xtn) 35 | flush(' ~ Written', fn, '\n') 36 | } 37 | 38 | # save the hourly capacity factors for every farm 39 | if (save_hourly_farm_cf) 40 | { 41 | fn = baseSaveFolder %&% baseSaveFile %&% '(a).farm.CF.' %&% xtn 42 | write_data(loadFactor, fn, xtn) 43 | flush(' ~ Written', fn, '\n') 44 | } 45 | 46 | 47 | 48 | 49 | 50 | # save the total aggregate across all farms 51 | if ('*' %in% save_files_split_by) 52 | { 53 | # hourly 54 | if (ncol(powerMW > 1)) 55 | { 56 | total = rowSums(powerMW, na.rm=TRUE) 57 | 58 | } else { 59 | 60 | total = powerMW 61 | } 62 | 63 | total = data.frame(GMT=ymd_wtf(datecol), MW=total, CF=total/sum(windFarms$capacity)) 64 | 65 | fn = baseSaveFolder %&% baseSaveFile %&% '(b).hourly.MW.CF.csv' 66 | write_csv(total, fn, row.names=FALSE) 67 | flush(' ~ Written', fn) 68 | 69 | # monthly 70 | total_m = aggregate_monthly('GMT', total, mean) 71 | fn = baseSaveFolder %&% baseSaveFile %&% '(b).monthly.MW.CF.csv' 72 | write_csv(total_m, fn, row.names=FALSE) 73 | clear_line(' ~ Written', fn) 74 | 75 | # yearly 76 | total_y = aggregate_yearly('GMT', total, mean) 77 | fn = baseSaveFolder %&% baseSaveFile %&% '(b).yearly.MW.CF.csv' 78 | write_csv(total_y, fn, row.names=FALSE) 79 | clear_line(' ~ Written', fn, '\n') 80 | 81 | } 82 | 83 | 84 | 85 | 86 | # save the total MW and average CF grouped by various things (e.g. by country) 87 | for (split_col in save_files_split_by) 88 | { 89 | # skip doing nothing or everything or something silly 90 | if (split_col == '') next 91 | if (split_col == '*') next 92 | if (split_col %notin% colnames(windFarms)) 93 | { 94 | flush(' !! you want to split results by ' %&% split_col %&% ' which isnt a column in your wind farms file...\n') 95 | flush(' !! you need to look at `save_files_split_by` and `windFarmCols`...\n') 96 | next 97 | } 98 | 99 | 100 | # establish results storage 101 | n = length(datecol) 102 | m = nrow(windFarms) 103 | snapshot_capacity = snapshot_output = data.frame(GMT=ymd_wtf(datecol)) 104 | evolving_capacity = evolving_output = data.frame(GMT=ymd_wtf(datecol)) 105 | 106 | # understand what time period this is 107 | min_date = min(evolving_output$GMT) 108 | max_date = max(evolving_output$GMT) 109 | total_duration = (max_date - min_date) / dyears(1) 110 | 111 | iii = 0 112 | 113 | # go through each value in that column 114 | for (split_val in sort(unique(windFarms[ , split_col]))) 115 | { 116 | s_c = s_o = rep(0, n) 117 | e_c = e_o = rep(0, n) 118 | 119 | # filter our farms - filter gives the farm row / speed column 120 | filter = which(windFarms[ , split_col] == split_val) 121 | if (length(filter) == 0) 122 | next 123 | 124 | # calculate the total output and capacity 125 | for (f in filter) 126 | { 127 | my_output = powerMW[ , f] 128 | my_capacity = rep(windFarms$capacity[f], n) 129 | my_start = dmy(windFarms$date[f]) 130 | 131 | # simply add these on to create the snapshot 132 | s_o = s_o + my_output 133 | s_c = s_c + my_capacity 134 | 135 | # if we have a start date, then wipe out the period before birth 136 | if (!is.na(my_start)) 137 | { 138 | unborn = (evolving_capacity$GMT < my_start) 139 | 140 | # what percentage of the whole period have you lived for? 141 | # take that to be your capacity - so that the oldest farms have 142 | # the greatest weight when we go so far back that we know nothing 143 | my_duration = (max_date - my_start) / dyears(1) 144 | my_duration = my_duration / total_duration 145 | my_scalar = (0.1 * my_duration) / windFarms$capacity[f] 146 | 147 | # now scale output and capacity by this 148 | my_output[unborn] = my_output[unborn] * my_scalar 149 | my_capacity[unborn] = my_capacity[unborn] * my_scalar 150 | } 151 | 152 | # now add these to create the evolving fleet 153 | e_o = e_o + my_output 154 | e_c = e_c + my_capacity 155 | 156 | 157 | # gui 158 | iii = iii + 1 159 | if (iii %% 10 == 0) 160 | clear_line('Processing', iii, '/', m, '-', split_val) 161 | } 162 | 163 | # push these into our results storage 164 | snapshot_output[ , split_val] = s_o 165 | snapshot_capacity[ , split_val] = s_c 166 | 167 | # push these into our results storage 168 | evolving_output[ , split_val] = e_o 169 | evolving_capacity[ , split_val] = e_c 170 | } 171 | 172 | clear_line() 173 | 174 | # save power output to file 175 | fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.MW.' %&% split_col %&% '.csv' 176 | write_csv(snapshot_output, fn, row.names=FALSE) 177 | flush(' ~ Written', fn) 178 | 179 | # now normalise to CF 180 | for (i in 2:ncol(snapshot_output)) 181 | { 182 | snapshot_output[ , i] = snapshot_output[ , i] / snapshot_capacity[ , i] 183 | } 184 | 185 | fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.CF.' %&% split_col %&% '.csv' 186 | write_csv(snapshot_output, fn, row.names=FALSE) 187 | clear_line(' ~ Written', fn) 188 | 189 | 190 | 191 | 192 | # save power output to file 193 | fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.MW.' %&% split_col %&% '.csv' 194 | write_csv(evolving_output, fn, row.names=FALSE) 195 | clear_line(' ~ Written', fn) 196 | 197 | # now normalise to CF 198 | for (i in 2:ncol(evolving_output)) 199 | { 200 | evolving_output[ , i] = evolving_output[ , i] / evolving_capacity[ , i] 201 | } 202 | 203 | fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.CF.' %&% split_col %&% '.csv' 204 | write_csv(evolving_output, fn, row.names=FALSE) 205 | clear_line(' ~ Written', fn) 206 | 207 | fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.capacity.' %&% split_col %&% '.csv' 208 | write_csv(evolving_capacity, fn, row.names=FALSE) 209 | clear_line(' ~ Written', fn) 210 | 211 | 212 | 213 | # oh, oh - finally, aggregate to months! 214 | snapshot_cf = aggregate_monthly('GMT', snapshot_output, mean, silent=TRUE) 215 | evolving_cf = aggregate_monthly('GMT', evolving_output, mean, silent=TRUE) 216 | 217 | #snapshot_cf = aggregate(snapshot_output[ , -1], by=list(years), mean, na.rm=TRUE) 218 | #evolving_cf = aggregate(evolving_output[ , -1], by=list(years), mean, na.rm=TRUE) 219 | 220 | fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.CF.monthly.' %&% split_col %&% '.csv' 221 | write_csv(snapshot_cf, fn, row.names=FALSE) 222 | 223 | fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.CF.monthly.' %&% split_col %&% '.csv' 224 | write_csv(evolving_cf, fn, row.names=FALSE) 225 | clear_line(' ~ Written', fn) 226 | 227 | 228 | 229 | # oh, oh - finally, aggregate to years! 230 | snapshot_cf = aggregate_yearly('GMT', snapshot_output, mean, silent=TRUE) 231 | evolving_cf = aggregate_yearly('GMT', evolving_output, mean, silent=TRUE) 232 | 233 | #snapshot_cf = aggregate(snapshot_output[ , -1], by=list(years), mean, na.rm=TRUE) 234 | #evolving_cf = aggregate(evolving_output[ , -1], by=list(years), mean, na.rm=TRUE) 235 | 236 | fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.CF.yearly.' %&% split_col %&% '.csv' 237 | write_csv(snapshot_cf, fn, row.names=FALSE) 238 | 239 | fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.CF.yearly.' %&% split_col %&% '.csv' 240 | write_csv(evolving_cf, fn, row.names=FALSE) 241 | clear_line(' ~ Written', fn, '\n') 242 | 243 | } 244 | 245 | 246 | 247 | # # save the date column 248 | # fn = baseSaveFolder %&% baseSaveFile %&% '(h).dates.csv' 249 | # write_csv(datecol, fn) 250 | # flush(' ~ Written', fn, '\n') 251 | # 252 | 253 | 254 | # save the model parameters 255 | fn = baseSaveFolder %&% baseSaveFile %&% '(e).parameters.csv' 256 | write_csv(parms, fn) 257 | flush(' ~ Written', fn, '\n') 258 | -------------------------------------------------------------------------------- /lib/VWF.MAIN.WINDPOWER.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | # establish two plots 12 | if (a_few_plots) 13 | { 14 | dev.new(height=5.4, width=12, xpos=0, ypos=450) 15 | good_graphics() 16 | layout(t(1:3)) 17 | 18 | plot_farms(windFarms, pch='+', cap.weight=1) 19 | dev.set() 20 | } 21 | 22 | 23 | 24 | 25 | 26 | #################################### 27 | 28 | ## 29 | ## plot the mean wind speeds at each site 30 | ## 31 | 32 | if (ncol(windSpeed) > 2) { 33 | site_means = colMeans(windSpeed[ , -1]) 34 | } else { 35 | site_means = windSpeed[ , 2] 36 | } 37 | 38 | ylim = range(site_means/1.1, site_means*1.1) 39 | 40 | if (a_few_plots) 41 | plot(sort(site_means), type='l', ylim=ylim, main='Average Speeds by Site') 42 | 43 | 44 | 45 | ## 46 | ## read in air density data 47 | ## 48 | 49 | if (doAirDensity) ##### could parallelise this with cbind? iterators over each column 50 | { 51 | airDensity = read_data(airDensityFile) 52 | 53 | cat("\n") 54 | 55 | for (i in 1:nrow(windFarms)) 56 | { 57 | clear_line() 58 | flush( paste("Applying air density correction: ", i, "/", nrow(windFarms), sep='') ) 59 | 60 | # valid variable version of farm name (column name in csv) 61 | myName = windFarms[i, 'name'] 62 | myCol = make.names(myName) 63 | if (!is.na(as.numeric(myName))) { myCol = as.character(myName) } ###### 64 | 65 | # get the relevant info 66 | speed = windSpeed[ , myCol] 67 | density = airDensity[ , myCol] 68 | height = windFarms[i, 'height'] 69 | 70 | windSpeed[ , myCol] = air_density_correction(speed, density, height) 71 | } 72 | 73 | clear_line() 74 | flush("Applying air density correction...\n") 75 | 76 | if (ncol(windSpeed) > 2) { 77 | site_means = colMeans(windSpeed[ , -1]) 78 | } else { 79 | site_means = windSpeed[ , 2] 80 | } 81 | 82 | if (a_few_plots) 83 | lines(sort(site_means), col='red') 84 | } 85 | 86 | 87 | ## 88 | ## transform unfathomably low speeds 89 | ## use a cubic hermite to enforce a lower-bound floor 90 | ## 91 | 92 | if (doTransformLowestSpeeds) 93 | { 94 | # our transformation parameters 95 | # floor is the lowest we can go (m/s) 96 | # width is how smooth the transition is.. 97 | # e.g. 4 and 1.5 means speeds of 2.5 go up to 4, speeds of 5.5 are unchanged 98 | trnsfrm_floor = 4 99 | trnsfrm_width = 1.5 100 | 101 | trnsfrm_min = trnsfrm_floor - trnsfrm_width 102 | trnsfrm_max = trnsfrm_floor + trnsfrm_width 103 | 104 | w = which(site_means < trnsfrm_min) 105 | 106 | if (length(w) > 0) 107 | { 108 | scalars = 4.5 / site_means[w] 109 | 110 | speed = windSpeed[ , w+1] 111 | speed = mult_cols_by_vector(speed, scalars) 112 | windSpeed[ , w+1] = speed 113 | } 114 | 115 | w = which(site_means >= trnsfrm_min & site_means < trnsfrm_max) 116 | 117 | if (length(w) > 0) 118 | { 119 | scalars = trnsfrm_max - site_means[w] 120 | scalars = scalars ^ 2 / (4 * trnsfrm_width) 121 | scalars = (site_means[w] + scalars) / site_means[w] 122 | 123 | speed = windSpeed[ , w+1] 124 | speed = mult_cols_by_vector(speed, scalars) 125 | windSpeed[ , w+1] = speed 126 | } 127 | 128 | if (ncol(windSpeed) > 2) { 129 | site_means_x = colMeans(windSpeed[ , -1]) 130 | } else { 131 | site_means_x = windSpeed[ , 2] 132 | } 133 | 134 | if (a_few_plots) 135 | { 136 | lines(sort(site_means_x), col='red3') 137 | lines(sort(site_means), col='red') 138 | } 139 | } 140 | 141 | ################################# 142 | 143 | 144 | 145 | 146 | 147 | ## 148 | ## process the wind speed data 149 | ## 150 | 151 | # interpolate to half hourly 152 | if (halfHourly) 153 | { 154 | cat(" Interpolating to half hourly...\n") 155 | windSpeed = interpolate_wind_speed(windSpeed, resolution=30) 156 | } 157 | 158 | # separate the date column from the numerics 159 | if (!exists('datecol')) 160 | { 161 | datecol = windSpeed[ , 1] 162 | windSpeed = windSpeed[ , -1] 163 | } 164 | 165 | # inject noise into the new profile 166 | if (halfHourly) 167 | { 168 | cat(" Injecting gaussian noise into profile...\n") 169 | windSpeed = inject_noise_into_wind_speeds(windSpeed, noise_stdev) 170 | } 171 | 172 | # make sure wind farm names line up in the two files 173 | windFarms$name = colnames(windSpeed) 174 | 175 | 176 | 177 | 178 | 179 | 180 | ##### 181 | ## ## ESTABLISH THE PERFORMANCE RATIO OF ALL FARMS 182 | ##### 183 | 184 | ## 185 | ## transform wind speeds from the nasa ideal to the on-the-ground reality 186 | 187 | # set the base performance for all farms 188 | performance = rep(1.000, ncol(windSpeed)) 189 | 190 | # modify these with the input file values if desired 191 | if (!is.null(PR_column)) 192 | { 193 | performance = performance * windFarms[ , PR_column] 194 | } 195 | 196 | # show a histogram of performance ratios... 197 | if (a_few_plots) 198 | plot(performance) 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | ##### 212 | ## ## CONVERT FROM SPEED TO POWER -- GENERATE LOAD FACTORS 213 | ##### 214 | 215 | flush('> Generating load factors with ') 216 | if (match_method == 1) flush(" fixed scalar same for every farm, finding offset\n") 217 | if (match_method == 2) flush(" fixed scalar from each farm's PR, finding offset\n") 218 | if (match_method == 3) flush(" fixed offset same for every farm, finding scalar\n") 219 | if (match_method == 4) flush(" fixed offset from each farm's PR, finding scalar\n") 220 | 221 | 222 | # to avoid passing the whole data.frame... 223 | colnames_windSpeed = colnames(windSpeed) 224 | 225 | 226 | ##WEIBULL_HAXX 227 | #wxx = seq(0, 1, length.out=1e6) 228 | #w20 = qweibull(wxx, 2.0, 10) 229 | #w17 = qweibull(wxx, 1.7, 10) 230 | #hist(w18, xlim=c(0,40), breaks=100) 231 | #hist(w20, xlim=c(0,40), breaks=100) 232 | #plot_l(w20, w20/w18) 233 | #weibull_spline = spline(w20, w20/w18, xout=seq(0,40,0.001)) 234 | ##WEIBULL_HAXX 235 | 236 | 237 | # run through each farm in parallel building a huge mega-list of results 238 | results = foreach (speed=iter(windSpeed, by='col'), i=icount(), .combine='rbind') %dopar% 239 | { 240 | # farm name and turbine model 241 | myName = windFarms[i, 'name'] 242 | myModel = windFarms[i, 'power_curve'] 243 | 244 | # valid variable version of farm name (column name in csv) 245 | # numeric farm names don't need an X on the front 246 | #myCol = make.names(myName) 247 | myCol = myName 248 | if (is.numeric(myCol)) 249 | myCol = as.character(myCol) 250 | 251 | myModel = make.names(myModel) 252 | 253 | # my individual power curve - unmodified 254 | myCurve = turbCurve[ , myModel] 255 | 256 | # check our name is ok 257 | if (myCol %notin% colnames_windSpeed) 258 | stop("Cannot find the farm:", myName, "\n\n") 259 | 260 | 261 | 262 | 263 | # interpolate any NAs (which i believe are all zeroish) 264 | missing = which(is.na(speed)) 265 | if (length(missing) > 0) 266 | speed[missing] = spline(speed, xout=missing)$y 267 | 268 | 269 | # calculate load factors using the unmodified turbine power curve 270 | mylf = wind_speed_to_power_output(speed, myCurve) 271 | energyInitial = mean(mylf) 272 | 273 | # calculate our target energy yield 274 | energyTarget = energyInitial * performance[i] 275 | 276 | 277 | 278 | 279 | 280 | ## 281 | ## calculate modified yield using convoluted farm curve 282 | 283 | # determine how to convolute this to a curve for this whole farm 284 | mySpread = get_power_curve_convolution_spread(windFarms[i, ]) 285 | 286 | # convolute to produce the power curve for this farm 287 | myCurve = farmCurve[ , myModel] 288 | myCurve = convoluteFarmCurve(myCurve, 0, mySpread) 289 | 290 | 291 | ##WEIBULL_HAXX 292 | #weibull_adjuster = spline(w20, w20/w17, xout=speed) 293 | #speed = speed * weibull_adjuster$y 294 | ##WEIBULL_HAXX 295 | 296 | 297 | # we choose to match using offsets, with fixed scalars 298 | if (match_method == 1 | match_method == 2) 299 | { 300 | # get the scalar for this farm 301 | myScalar = determine_farm_scalar(performance[i], windFarms$iso[i]) 302 | 303 | # find the additive convolution offset that gives us the correct yield 304 | myOffset = find_farm_offset(speed, myCurve, myScalar, energyInitial, energyTarget) 305 | } 306 | 307 | # we choose to match using scalars, with fixed offsets 308 | if (match_method == 3 | match_method == 4) 309 | { 310 | # determine the offset we should use for this farm 311 | myOffset = determine_farm_offset(performance[i]) 312 | 313 | # find the multiplicative scalar for wind speeds that gives us the correct yield 314 | myScalar = find_farm_scalar(speed, myCurve, myOffset, energyInitial, energyTarget) 315 | } 316 | 317 | # calculate the final load factors of this farm 318 | mylf = wind_speed_to_power_output(speed, myCurve, myScalar, myOffset) 319 | 320 | # make it clear if we got stuck 321 | if (myOffset < -20 | myOffset > 20 | myScalar < 0 | myScalar > 5) 322 | mylf = mylf * NA 323 | 324 | # calculate the power output in MW 325 | mymw = mylf * windFarms[i, 'capacity'] 326 | 327 | 328 | 329 | # save the the corrected wind speeds if wanted 330 | if (save_modified_wind_speeds) 331 | speed = (speed + myOffset) * myScalar 332 | 333 | 334 | 335 | # print our VWF convolution parameters 336 | mo = sprintf('%1.2f', myOffset) 337 | cs = sprintf('%1.2f', convolverStdDev) 338 | ms = sprintf('%3.1f%%', myScalar * 100) 339 | flush('-> [ws + N(', mo, ', ', cs, ')] x ', ms, '\n', sep='') 340 | 341 | 342 | # save the VWF parameters for this farm 343 | parms = c(myName, energyInitial, performance[i], energyTarget, myOffset, myScalar, mySpread) 344 | 345 | results = list( 346 | speed=speed, 347 | mylf=mylf, 348 | mymw=mymw, 349 | parms=parms 350 | ) 351 | 352 | return(results) 353 | } 354 | 355 | 356 | 357 | 358 | ##### 359 | ## ## EXTRACT RESULTS 360 | ##### 361 | 362 | # now process the mega-results table 363 | 364 | # modified wind speeds 365 | windSpeed = as.data.frame(results[ , 1]) 366 | colnames(windSpeed) = colnames_windSpeed 367 | 368 | # load factors 369 | loadFactor = as.data.frame(results[ , 2]) 370 | colnames(loadFactor) = colnames_windSpeed 371 | 372 | # power outputs 373 | powerMW = as.data.frame(results[ , 3]) 374 | colnames(powerMW) = colnames_windSpeed 375 | 376 | # VWF model parameters 377 | parms = as.data.frame(results[ , 4], stringsAsFactors=FALSE) 378 | parms = as.data.frame(t(parms), stringsAsFactors=FALSE) 379 | for (i in colnames(parms)[-1]) 380 | parms[ , i] = as.numeric(parms[ , i]) 381 | colnames(parms) = c('name', 'original_LF', 'PR', 'desired_LF', 'offset', 'scalar', 'stdev') 382 | 383 | 384 | # conserve memory 385 | rm(results) 386 | gc() 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | ##### 395 | ## ## POST PROCESSING 396 | ##### 397 | 398 | if (a_few_plots) 399 | { 400 | # histogram of site load factors 401 | cf = parms$desired_LF * 100 402 | hist(cf, breaks=0:ceiling(max(cf)), col='grey90') 403 | abline(v=mean(cf), lwd=2, col='red3') 404 | 405 | # map of capacity factors 406 | dev.set() 407 | cfc = colour_ramp_data(cf) 408 | plot_farms(windFarms, cap.weight=1, add.points=TRUE, pch='+', col=cfc) 409 | dev.set() 410 | } 411 | 412 | 413 | # run garbage collection to keep memory in check 414 | junk = capture.output(gc()) 415 | -------------------------------------------------------------------------------- /lib/VWF.MAIN.WINDSPEED.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | 12 | ##### 13 | ## ## READ IN EXISTING SPEEDS IF WE HAVE THEM 14 | ##### 15 | 16 | # we have specified a custom wind speed file to use... 17 | if (!is.null(windSpeedFile)) 18 | { 19 | # check this file exists... 20 | if (!file.exists(windSpeedFile)) 21 | { 22 | flush(" OH NO! You specified a custom windSpeedFile, but it doesn't exist!\n") 23 | stop(" ", windSpeedFile, "\n") 24 | } 25 | } 26 | 27 | # consider the default filename for wind speeds 28 | if (is.null(windSpeedFile)) 29 | { 30 | windSpeedFile = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% xtn 31 | } 32 | 33 | 34 | # if that file exists - read it in! 35 | use_cached_speeds = file.exists(windSpeedFile) 36 | 37 | if (use_cached_speeds) 38 | { 39 | flush('> Reading pre-compiled wind speeds...\n') 40 | windSpeed = read_data(windSpeedFile) 41 | } 42 | 43 | 44 | 45 | 46 | 47 | ##### 48 | ## ## RUN THROUGH EACH GROUP (MONTH / QUARTER) OF MERRA FILES, READ IN THE WIND SPEED FOR EACH WINDFARM & SAVE TO A TEMP FILE 49 | ##### 50 | 51 | if (!use_cached_speeds) 52 | { 53 | flush('> Generating wind speeds... go and use the [progress meter] file to monitor how things are going...\n') 54 | flush(' Progress files being written to', baseSaveFolder, '\n') 55 | 56 | windSpeed = foreach (g = merra_wind$groups, .combine=rbind, .packages=c('ncdf4', 'lubridate')) %dopar% 57 | { 58 | 59 | # establish our progress reporting 60 | progress_file = paste0(baseSaveFolder, '~progress~', baseSaveFile, '.', g, '.txt') 61 | write('Wind speed processing started', progress_file) 62 | date_1 = head(merra_wind$dates, 1) 63 | date_2 = tail(merra_wind$dates, 1) 64 | date_range = paste(format(date_1), 'to', format(date_2)) 65 | write(date_range, progress_file, append=TRUE) 66 | write('\n', progress_file, append=TRUE) 67 | 68 | 69 | 70 | # prepare this month's input files 71 | nFiles = length(merra_wind$index[[g]]) 72 | cat('Processing', g, '=', nFiles, 'files...\n') 73 | 74 | # prepare our results storage 75 | # this holds the estimated wind speeds, rows=hours, columns=farms 76 | outputCols = c("Timestamp", windFarms$name) 77 | speed = data.frame( matrix(NA, nrow=nFiles*24, ncol=length(outputCols) ) ) 78 | colnames(speed) = outputCols 79 | 80 | # initialise our hour counter 81 | hour = 0 82 | 83 | 84 | ### 85 | ### READ EACH FILE (DAY) IN TURN 86 | ### 87 | for (f in merra_wind$index[[g]]) 88 | { 89 | 90 | ## 91 | ## READ IN THE NETCDF DATA 92 | ## 93 | 94 | # read in the wind profile data 95 | fn = merra_wind$files[f] 96 | nc_wind$open_file(fn) 97 | 98 | # make a vector of our 24 timestamps 99 | nc_wind_date = ymd(merra_wind$dates[f]) 100 | timeVector = nc_wind_date %&% ' ' %&% sprintf("%02d:00", nc_wind$hour) #### horror hack 101 | 102 | # apply this to our storage 103 | todays_hours = (hour+1):(hour+24) 104 | speed[todays_hours, 1] = timeVector 105 | 106 | # update 107 | #flush('Reading', format(nc_wind$date)) 108 | write(format(nc_wind$date), progress_file, append=TRUE) 109 | 110 | 111 | 112 | # extract our extrapolation parameters - format is A[lon, lat, time] 113 | A = nc_wind$get_var('A') 114 | z = nc_wind$get_var('z') 115 | 116 | # close this input file 117 | nc_wind$close_file() 118 | 119 | 120 | 121 | 122 | # if we are crossing the date line, reshape our data to contain what we need 123 | if (dateLineMadness) 124 | { 125 | A = A[ c(dateLineWest, dateLineEast), , ] 126 | z = z[ c(dateLineWest, dateLineEast), , ] 127 | dimnames(A)[[1]] = dateLineDims 128 | dimnames(z)[[1]] = dateLineDims 129 | nc_wind$lon = dateLineDims 130 | } 131 | 132 | 133 | 134 | 135 | ## 136 | ## SPATIAL INTERPOLATION AND HEIGHT EXTRAPOLATION 137 | ## 138 | 139 | # interpolate each hour of the day 140 | for (h in 1:24) 141 | { 142 | # initialise the wind speeds for all farms 143 | s = rep(NA, nrow(windFarms)) 144 | 145 | # run through each height in turn 146 | for (height in all_heights) 147 | { 148 | # get wind speeds at this height 149 | w = A[ , , h] * log(height / z[ , , h]) 150 | 151 | # locate the farms we care about 152 | my_farms = (windFarms$height == height) 153 | 154 | # loess interpolation 155 | if (spatial.method == 'loess') 156 | { 157 | # create a loess fit 158 | loess_w$value = as.vector(w) 159 | loess_fit = loess(value ~ lon*lat, data=loess_w, degree=2, span=loessSpan) 160 | 161 | # interpolate speeds to these farms 162 | s[my_farms] = predict(loess_fit, newdata=windFarms[my_farms, ]) 163 | } 164 | 165 | # spline interpolation 166 | if (spatial.method == 'akima') 167 | { 168 | # cubic spline our speeds 169 | s[my_farms] = bicubic(nc_wind$lon, nc_wind$lat, w, windFarms$lon[my_farms], windFarms$lat[my_farms])$z 170 | } 171 | 172 | } 173 | 174 | # append these extrapolated speeds - forbidding silliness like negative speeds 175 | hour = hour + 1 176 | speed[hour, -1] = pmax(0, s) 177 | } 178 | 179 | 180 | # if we are crossing the date line, repair the damage we did 181 | if (dateLineMadness) 182 | { 183 | nc_wind$lon = originalDims 184 | } 185 | 186 | } 187 | 188 | 189 | # save the wind speeds if requested 190 | if (save_original_wind_speeds) 191 | { 192 | fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% g %&% '.' %&% xtn 193 | write_data(speed, fn, xtn) 194 | flush('\n ~ Written', fn) 195 | } 196 | 197 | 198 | # could return the wind speeds for joining in memory? 199 | # speed 200 | 201 | } # finished processing all files - end of parallel loop 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | ### 210 | ### CLEAR OUT TEMPORARY FILES 211 | ### AND BIND ALL WIND SPEEDS TOGETHER 212 | ### 213 | 214 | # clear out the progress files 215 | progress_file = paste0(baseSaveFolder, '~progress~', baseSaveFile, '.', merra_wind$groups, '.txt') 216 | progress_file = progress_file[ file.exists(progress_file) ] 217 | if (length(progress_file) > 0) file.remove(progress_file) 218 | 219 | # concatenate all the results files 220 | fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% merra_wind$groups %&% '.' %&% xtn 221 | windSpeed = read_data_bind_rows(fn, verbose=FALSE) 222 | 223 | # and save 224 | big_fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% xtn 225 | flush('~ Writing', big_fn, '\n') 226 | write_data(windSpeed, big_fn, xtn) 227 | 228 | # and remove the intermediates 229 | file.remove(fn) 230 | 231 | } 232 | 233 | 234 | 235 | 236 | ##### 237 | ## ## FINISH OFF 238 | ##### 239 | 240 | # kill the plot of wind farm locations 241 | if (a_few_plots) try( dev.off(), silent=TRUE ) 242 | flush('>', nrow(windSpeed), 'hours of wind speeds calculated for', ncol(windSpeed), 'farms\n') 243 | -------------------------------------------------------------------------------- /lib/VWF.NCDF.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | suppressPackageStartupMessages(library(ncdf4)) 12 | suppressPackageStartupMessages(library(lubridate)) 13 | 14 | 15 | 16 | 17 | ####################################################################################################################### 18 | ## 19 | ## locate all the MERRA files in the given folder(s) - note that all files are found, regardless of extension! 20 | ## extract all their dates and create an index that groups them by 'monthly' or 'daily' 21 | ## return a list with $files $dates $groups $index 22 | ## 23 | ## if grouping='daily' then you get faux-groups which allows the group-loop code to still work 24 | ## 25 | prepare_merra_files = function(folders, grouping='monthly', processNewestFirst=FALSE) 26 | { 27 | merra = list() 28 | 29 | # find NetCDF files within our folders 30 | merra$files = list.files(folders, full.names=TRUE) 31 | 32 | if (length(merra$files) == 0) 33 | stop("prepare_merra_files -- no files found in the given folder:", folders, "\n\n") 34 | 35 | # extract the dates from files 36 | if (grepl('Nx', merra$files[1])) { 37 | merra$dates = get_text_between(merra$files, 'Nx.', '.nc') 38 | } else { 39 | merra$dates = get_text_between(merra$files, 'profile.', '.nc') 40 | } 41 | merra$dates = get_text_before(merra$dates, '.SUB') 42 | merra$dates = ymd(merra$dates) 43 | 44 | # create an index for grouping files 45 | merra$index = list() 46 | 47 | # group files by quarter 48 | if (grouping == 'quarterly') 49 | { 50 | all_quarters = paste0( year(merra$dates), 'Q', ceiling(month(merra$dates)/3)) 51 | merra$groups = unique(all_quarters) 52 | 53 | for (g in merra$groups) 54 | { 55 | filter = which( all_quarters == g ) 56 | merra$index[[g]] = filter 57 | } 58 | } 59 | 60 | # group files by months 61 | if (grouping == 'monthly') 62 | { 63 | merra$groups = floor_date(merra$dates, 'month') 64 | merra$groups = format(unique(merra$groups)) 65 | 66 | for (g in merra$groups) 67 | { 68 | filter = which( year(merra$dates) == year(g) & month(merra$dates) == month(g) ) 69 | merra$index[[g]] = filter 70 | } 71 | } 72 | 73 | # create a dummy $index which goes through each file individually 74 | if (grouping == 'daily') 75 | { 76 | merra$groups = format(merra$dates) 77 | 78 | for (g in merra$groups) 79 | { 80 | filter = which(merra$dates == g) 81 | merra$index[[g]] = filter 82 | } 83 | } 84 | 85 | 86 | # run in reverse order if desired 87 | if (processNewestFirst == TRUE) 88 | merra$groups = rev(merra$groups) 89 | 90 | 91 | # return the list 92 | merra 93 | } 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | ####################################################################################################################### 105 | ## 106 | ## a class for opening and reading NetCDF files 107 | ## 108 | NetCdfClass = function(filename, model, printInfo=FALSE) 109 | { 110 | 111 | ##### 112 | ## ## MEMBERS 113 | ##### 114 | 115 | NetCdf = list( 116 | 117 | ncdf = NULL, 118 | 119 | all_vars = NULL, 120 | all_desc = NULL, 121 | all_dims = NULL, 122 | 123 | date = NULL, 124 | hour = NULL, 125 | levels = NULL, 126 | 127 | lon = NULL, 128 | lat = NULL, 129 | subset = NULL, 130 | 131 | model = NULL, 132 | 133 | calendar = NULL 134 | ) 135 | 136 | 137 | 138 | ##### 139 | ## ## METHODS 140 | ##### 141 | 142 | # 143 | # constructor 144 | # opens file 145 | # defines the latitude and longitudes contained within it 146 | NetCdf$constructor = function(filename, model, printInfo=FALSE) 147 | { 148 | # set our reanalysis model 149 | NetCdf$model = model 150 | 151 | # open - getting our date and time 152 | NetCdf$open_file(filename, printInfo) 153 | 154 | # get our coordinate system 155 | if (NetCdf$model == 'MERRA2') 156 | { 157 | NetCdf$lon = ncvar_get(NetCdf$ncdf, 'lon') 158 | NetCdf$lat = ncvar_get(NetCdf$ncdf, 'lat') 159 | } 160 | 161 | if (NetCdf$model == 'MERRA1') 162 | { 163 | NetCdf$lon = ncvar_get(NetCdf$ncdf, 'longitude') 164 | NetCdf$lat = ncvar_get(NetCdf$ncdf, 'latitude') 165 | } 166 | 167 | if (NetCdf$model == 'MERRA1NEW') 168 | { 169 | NetCdf$lon = ncvar_get(NetCdf$ncdf, 'XDim') 170 | NetCdf$lat = ncvar_get(NetCdf$ncdf, 'YDim') 171 | } 172 | 173 | if (NetCdf$model == 'CMIP5') 174 | { 175 | NetCdf$lon = ncvar_get(NetCdf$ncdf, 'lon') 176 | NetCdf$lat = ncvar_get(NetCdf$ncdf, 'lat') 177 | } 178 | 179 | 180 | 181 | # get our pressure levels too, if present 182 | if ('levels' %in% names(NetCdf$ncdf$dim)) 183 | NetCdf$levels = ncvar_get(NetCdf$ncdf, 'levels') 184 | 185 | # default to no subsetting 186 | NetCdf$subset = FALSE 187 | 188 | # save 189 | assign('NetCdf', NetCdf, envir=NetCdf) 190 | } 191 | 192 | 193 | # 194 | # open a NetCDF file 195 | # set ncdf (the file handle), date and hour 196 | # 197 | # state which reanalysis this is: 'MERRA1' or 'MERRA2' 198 | # 199 | # optionally describes the file contents 200 | # 201 | NetCdf$open_file = function(filename, printInfo=FALSE) 202 | { 203 | # open 204 | NetCdf$ncdf = nc_open(filename) 205 | nc = NetCdf$ncdf 206 | 207 | # list all the variables inside it 208 | for (i in 1:nc$nvar) 209 | { 210 | var = nc$var[[i]] 211 | push(NetCdf$all_vars, var$name) 212 | 213 | desc = var$longname %&% ' [' %&% paste(var$size[1], var$size[2], var$size[3], sep=', ') %&% ']' 214 | push(NetCdf$all_desc, desc) 215 | } 216 | 217 | # list all the dimensions inside it 218 | for (i in 1:nc$ndim) 219 | { 220 | dim = nc$dim[[i]] 221 | push(NetCdf$all_dims, dim$name) 222 | } 223 | 224 | 225 | 226 | # get the date and time for this file 227 | if (NetCdf$model == 'MERRA2') 228 | { 229 | NetCdf$date = ncatt_get(nc, 0, 'RangeBeginningDate')$value 230 | NetCdf$date = ymd(NetCdf$date) 231 | 232 | NetCdf$hour = ncvar_get(nc, 'time') / 60 233 | } 234 | 235 | if (NetCdf$model == 'MERRA1') 236 | { 237 | NetCdf$date = ncatt_get(nc, 'time', 'units')$value 238 | NetCdf$date = strsplit(NetCdf$date, ' ')[[1]][3] 239 | NetCdf$date = ymd(NetCdf$date) 240 | 241 | NetCdf$hour = ncvar_get(nc, 'time') 242 | } 243 | 244 | if (NetCdf$model == 'MERRA1NEW') 245 | { 246 | NetCdf$date = ncatt_get(nc, 'TIME', 'units')$value 247 | NetCdf$date = strsplit(NetCdf$date, ' ')[[1]][3] 248 | NetCdf$date = ymd(NetCdf$date) 249 | 250 | NetCdf$hour = ncvar_get(nc, 'TIME') / 60 251 | } 252 | 253 | # get the date and time for this file 254 | if (NetCdf$model == 'CMIP5') 255 | { 256 | # note that CMIP is just completely stupid and has 360 day years... 257 | # we embed a strange string into the data dimnames just for the record... 258 | # and we set up the NetCdf$calendar variable to allow for easy access to all the date components 259 | calendar = data.frame(index=1:14400, year=NA, season=NA, hour=NA) 260 | 261 | # build a calendar to know the years,seasons,hours 262 | calendar$year = rep(2026:2030, each=360*8) 263 | 264 | calendar$month = rep(1:12, each=30*8) 265 | 266 | calendar$season = calendar$month 267 | f = (calendar$season == 12) 268 | calendar$season[f] = 0 269 | calendar$season = floor(calendar$season / 3) # winter = 0, spring = 1, summer = 2, autumn = 3 270 | 271 | calendar$hour = rep(c(3, 6, 9, 12, 15, 18, 21, 0), times=360*5) 272 | 273 | # assign 274 | NetCdf$hour = paste('Y', calendar$year, 'M', calendar$month, 'H', calendar$hour, sep='') 275 | 276 | NetCdf$calendar = calendar 277 | } 278 | 279 | 280 | # HACK - i didn't update the dates correctly in the MERRA2 profile files... 281 | # HACK - they are identifiable because they are called .profile. 282 | if (grepl('profile', filename)) 283 | { 284 | # so... extract the date from the filename 285 | d = get_text_between(filename, 'profile.', '.nc') 286 | d = get_text_before(d, '.SUB') 287 | NetCdf$date = ymd(d) 288 | } 289 | 290 | 291 | 292 | # describe contents 293 | if (printInfo) 294 | { 295 | # annoyingly, this is now hyper-detailed... 296 | # print(NetCdf$ncdf) 297 | 298 | cat(nc$ndim, 'Dimensions:\n') 299 | for (i in 1:nc$ndim) 300 | { 301 | d = NetCdf$all_dims[i] 302 | 303 | cat('\t') 304 | 305 | if (d == 'time') 306 | { 307 | cat(sprintf('%10s', 'time'), '\t') 308 | 309 | t_min = suppressWarnings(min(NetCdf$hour)) 310 | t_max = suppressWarnings(max(NetCdf$hour)) 311 | 312 | cat(format(NetCdf$date, "%Y-%m-%d"), ' - ', t_min, ':', t_min) 313 | 314 | } else if (d == 'longitude' | d == 'lon') 315 | { 316 | lon = ncvar_get(NetCdf$ncdf, d) 317 | cat(sprintf('%10s', 'longitude'), '\t') 318 | cat(min(lon), ':', max(lon)) 319 | 320 | } else if (d == 'latitude' | d == 'lat') 321 | { 322 | lat = ncvar_get(NetCdf$ncdf, d) 323 | cat(sprintf('%10s', 'latitude'), '\t') 324 | cat(min(lat), ':', max(lat)) 325 | 326 | } else 327 | { 328 | cat(d) 329 | } 330 | 331 | cat('\n') 332 | } 333 | 334 | cat(nc$nvar, 'Variables:\n') 335 | for (i in 1:nc$nvar) 336 | cat('\t', sprintf('%10s', NetCdf$all_vars[i]), '\t', NetCdf$all_desc[i], '\n') 337 | } 338 | 339 | # save 340 | assign('NetCdf', NetCdf, envir=NetCdf) 341 | } 342 | 343 | 344 | # 345 | # close a NetCDF file 346 | # 347 | NetCdf$close_file = function() 348 | { 349 | nc_close(NetCdf$ncdf) 350 | assign('NetCdf', NetCdf, envir=NetCdf) 351 | } 352 | 353 | 354 | 355 | 356 | # 357 | # set up the internals so that future reads from NetCDF files are subsetted to a particular region 358 | # 359 | # region is a list containing $lon[min,max] and $lat[min,max] 360 | # 361 | NetCdf$subset_coords = function(region) 362 | { 363 | # remove fools 364 | if (length(region$lon) != 2 | length(region$lat) != 2) 365 | { 366 | cat("NetCdf::subset_coords() -- must pass a list of $lon[min,max] $lat[min,max]\n") 367 | return() 368 | } 369 | 370 | # get our spatial resolution 371 | dx = diff(NetCdf$lon)[1] 372 | dy = diff(NetCdf$lat)[1] 373 | 374 | # expand the region to make sure we include the points adjacent to our range 375 | region$lon[1] = region$lon[1] - dx - 0.01 376 | region$lon[2] = region$lon[2] + dx + 0.01 377 | region$lat[1] = region$lat[1] - dy - 0.01 378 | region$lat[2] = region$lat[2] + dy + 0.01 379 | 380 | # define which parts of the file live within our region 381 | myLon = which(NetCdf$lon >= region$lon[1] & NetCdf$lon <= region$lon[2]) 382 | myLat = which(NetCdf$lat >= region$lat[1] & NetCdf$lat <= region$lat[2]) 383 | 384 | # store the start and count parameters to use when reading variables 385 | NetCdf$start = c(min(myLon), min(myLat), 1) 386 | NetCdf$count = c(length(myLon), length(myLat), -1) 387 | 388 | # chop down our lon and lat vectors to this region 389 | NetCdf$lon = NetCdf$lon[myLon] 390 | NetCdf$lat = NetCdf$lat[myLat] 391 | 392 | # and note that we have done this chopping 393 | NetCdf$subset = TRUE 394 | 395 | # save 396 | assign('NetCdf', NetCdf, envir=NetCdf) 397 | } 398 | 399 | 400 | 401 | 402 | # 403 | # read a variable from the open NetCDF file 404 | # if you have defined a region with subset_coords(), employ subsetted reading to save time 405 | # 406 | NetCdf$get_var = function(var) 407 | { 408 | # check the var we are reading exists 409 | var_ok = (var %in% NetCdf$all_vars) 410 | if (!var_ok) 411 | { 412 | # check if it's a stupid case error in MERRA 413 | var_ok = (toupper(var) %in% toupper(NetCdf$all_vars)) 414 | if (!var_ok) 415 | { 416 | flush("NetCdf$get_var -- your var [", var, "] doesn't exist in this reanalysis...\n") 417 | flush("Available variables are:", NetCdf$all_vars, "\n\n") 418 | } 419 | 420 | if (var_ok) 421 | { 422 | # figure out the actual name 423 | old_var = var 424 | var = match(toupper(var), toupper(NetCdf$all_vars)) 425 | var = NetCdf$all_vars[var] 426 | 427 | flush("NetCdf$get_var -- you requested [", old_var, "], giving you [", var, "]\n") 428 | } 429 | } 430 | 431 | # if we want to read from the entire file... 432 | if (NetCdf$subset == FALSE) 433 | data = ncvar_get(NetCdf$ncdf, var) 434 | 435 | # if we want to read from a specific region... 436 | if (NetCdf$subset == TRUE) 437 | data = ncvar_get(NetCdf$ncdf, var, NetCdf$start, NetCdf$count) 438 | 439 | # encode our longitude, latitude and time into the variable's dimensions 440 | dimnames(data) = list(NetCdf$lon, NetCdf$lat, NetCdf$hour) 441 | return(data) 442 | } 443 | 444 | # 445 | # same as above, but works for 3d variables (with various levels) 446 | # 447 | NetCdf$get_var_3d = function(var) 448 | { 449 | # if we want to read from the entire file... 450 | if (NetCdf$subset == FALSE) 451 | data = ncvar_get(NetCdf$ncdf, var) 452 | 453 | # if we want to read from a specific region... 454 | if (NetCdf$subset == TRUE) 455 | { 456 | # modify start and count to include the fourth dimension (pressure levels) 457 | start = c(NetCdf$start, 1) 458 | count = c(NetCdf$count, -1) 459 | 460 | data = ncvar_get(NetCdf$ncdf, var, start, count) 461 | } 462 | 463 | # encode our longitude, latitude and time into the variable's dimensions 464 | dimnames(data) = list(NetCdf$lon, NetCdf$lat, NetCdf$levels, NetCdf$hour) 465 | 466 | return(data) 467 | } 468 | 469 | 470 | 471 | 472 | # 473 | # read the wind speed at 50 metres 474 | # 475 | NetCdf$get_speed50m = function(min=0, max=NA) 476 | { 477 | # format is u50m[lon, lat, time] 478 | if (NetCdf$model == 'MERRA2') 479 | { 480 | u50m = NetCdf$get_var('U50M') 481 | v50m = NetCdf$get_var('V50M') 482 | } 483 | 484 | if (NetCdf$model == 'MERRA1') 485 | { 486 | u50m = NetCdf$get_var('u50m') 487 | v50m = NetCdf$get_var('v50m') 488 | } 489 | 490 | if (NetCdf$model == 'MERRA1NEW') 491 | { 492 | u50m = NetCdf$get_var('U50M') 493 | v50m = NetCdf$get_var('V50M') 494 | } 495 | 496 | # calculate the speed from pythagoras 497 | speed50m = sqrt(u50m^2 + v50m^2) 498 | 499 | # constrain speeds to our given limits (useful if our plot colours don't go higher than that) 500 | if (!is.na(min)) 501 | speed50m[ speed50m < min ] = min 502 | 503 | if (!is.na(max)) 504 | speed50m[ speed50m > max ] = max 505 | 506 | # return 507 | return(speed50m) 508 | } 509 | 510 | NetCdf$get_speed10m = function(min=0, max=NA) 511 | { 512 | # format is u10m[lon, lat, time] 513 | if (NetCdf$model == 'MERRA2') 514 | { 515 | u10m = NetCdf$get_var('U10M') 516 | v10m = NetCdf$get_var('V10M') 517 | } 518 | 519 | if (NetCdf$model == 'MERRA1') 520 | { 521 | u10m = NetCdf$get_var('u10m') 522 | v10m = NetCdf$get_var('v10m') 523 | } 524 | 525 | if (NetCdf$model == 'MERRA1NEW') 526 | { 527 | u10m = NetCdf$get_var('U10M') 528 | v10m = NetCdf$get_var('V10M') 529 | } 530 | 531 | # calculate the speed from pythagoras 532 | speed10m = sqrt(u10m^2 + v10m^2) 533 | 534 | # constrain speeds to our given limits (useful if our plot colours don't go higher than that) 535 | if (!is.na(min)) 536 | speed10m[ speed10m < min ] = min 537 | 538 | if (!is.na(max)) 539 | speed10m[ speed10m > max ] = max 540 | 541 | # return 542 | return(speed10m) 543 | } 544 | 545 | NetCdf$get_speed02m = function(min=0, max=NA) 546 | { 547 | # format is u02m[lon, lat, time] 548 | if (NetCdf$model == 'MERRA2') 549 | { 550 | u02m = NetCdf$get_var('U2M') 551 | v02m = NetCdf$get_var('V2M') 552 | } 553 | 554 | if (NetCdf$model == 'MERRA1') 555 | { 556 | u02m = NetCdf$get_var('u2m') 557 | v02m = NetCdf$get_var('v2m') 558 | } 559 | 560 | if (NetCdf$model == 'MERRA1NEW') 561 | { 562 | u02m = NetCdf$get_var('U2M') 563 | v02m = NetCdf$get_var('V2M') 564 | } 565 | 566 | # calculate the speed from pythagoras 567 | speed02m = sqrt(u02m^2 + v02m^2) 568 | 569 | # constrain speeds to our given limits (useful if our plot colours don't go higher than that) 570 | if (!is.na(min)) 571 | speed02m[ speed02m < min ] = min 572 | 573 | if (!is.na(max)) 574 | speed02m[ speed02m > max ] = max 575 | 576 | # return 577 | return(speed02m) 578 | } 579 | 580 | 581 | 582 | 583 | 584 | 585 | ##### 586 | ## ## BUILD CLASS 587 | ##### 588 | 589 | # define this as an environment 590 | NetCdf = list2env(NetCdf) 591 | 592 | # set the class type 593 | class(NetCdf) = 'NetCdfClass' 594 | 595 | # run the constructor 596 | NetCdf$constructor(filename, model, printInfo) 597 | 598 | # return 599 | return(NetCdf) 600 | } 601 | -------------------------------------------------------------------------------- /lib/VWF.PLOTS.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | 11 | ####################################################################################################################### 12 | ## 13 | ## 14 | ## generic plotting functions: 15 | ## 16 | ## plot_ncdf(data, mainTitle, legendTitle, latRange, lonRange, zRange, zConstrain, levels, aspect, myColours) 17 | ## . 18 | ## . 19 | ## . data = a single hour of netcdf data e.g. w50m[lon, lat] 20 | ## the longitude and latitute are read from the dimension names of data 21 | ## . 22 | ## . mainTitle = text to go above the main plot (i.e. your variable) 23 | ## . legendTitle = text to go above the legend scale (i.e. your units) 24 | ## . 25 | ## . latRange and lonRange = optionally control the extent of the map, defaults to something sensible 26 | ## . zRange = optionally control the range of values to be plotted, defaults to the full data range 27 | ## . zConstrain = set to TRUE to cap speed data to this range. without this, the plot breaks outside the limits 28 | ## . 29 | ## . levels = optionally control the number of colours to use in the plot, high numbers look smoother but render slower 30 | ## . aspect = optionally specify the aspect ratio for displaying and printing the map 31 | ## . 32 | ## . myColours = optionally specify a colorRampPalette to use, defaults to some nice heat colours 33 | ## 34 | ## 35 | ## full_ncdf(data, ..., dateTime) - same as above but with no axes, legends, etc. suitable for converting into videos 36 | ## . 37 | ## . dateTime = set to TRUE to add three boxes in the southern oceans giving the current date and time 38 | ## 39 | ## 40 | ## print_ncdf(filename, data, width, height, res, ...) - draws a fullscreen plot straight to file - saving on rendering time 41 | ## 42 | ## 43 | ## 44 | ## 45 | ## 46 | ## wind plotting functions: (same as above, but with good formatting for wind speeds) 47 | ## 48 | ## plot_ncdf_wind(data) 49 | ## full_ncdf_wind(data) 50 | ## print_ncdf_wind(filename, data) 51 | ## 52 | ## 53 | ## 54 | ## 55 | ## then there are some testing functions (looking at other ways to plot, which aren't as good) 56 | ## and the base function that they all rely on: 57 | ## 58 | ## filled.contour.is(...) 59 | ## 60 | ## 61 | 62 | 63 | 64 | 65 | 66 | ####################################################################################################################### 67 | ## 68 | ## GENERIC PLOTTING FUNCTIONS 69 | ## 70 | ## 71 | ## 72 | ## a generic function for plotting any NetCDF variable 73 | ## 74 | plot_ncdf = function(data, mainTitle='', legendTitle='', lonRange=NULL, latRange=NULL, zRange=range(data), zConstrain=FALSE, levels=64, aspect=NULL, myColours=NULL) 75 | { 76 | # get our world map shape (low res, for faster drawing) 77 | if (!exists('world.map')) 78 | world.map <<- get_world_map(res='low') 79 | 80 | 81 | # get our coordinates 82 | lon = as.numeric(dimnames(data)[[1]]) 83 | lat = as.numeric(dimnames(data)[[2]]) 84 | 85 | 86 | # calculate our default lon & lat ranges if needed 87 | if (is.null(lonRange)) 88 | lonRange = range(lon) 89 | 90 | if (is.null(latRange)) 91 | latRange = range(lat) 92 | 93 | 94 | # colour scheme for the plot 95 | if (is.null(myColours)) 96 | { 97 | heat.colours = c('#3F168A', '#2049D0', '#3288BD', '#66C2A5', '#ABDDA4', '#E6F598', '#FFFFBF', '#FEE08B', '#FDAE61', '#F46D43', '#D53E4F', '#9E0142') 98 | myColours = colorRampPalette(heat.colours) 99 | } 100 | 101 | 102 | # constrain data to our desired zRange 103 | if (zConstrain) 104 | { 105 | data[data < zRange[1] ] = zRange[1]; 106 | data[data > zRange[2] ] = zRange[2]; 107 | } 108 | 109 | 110 | # aspect ratio of the map 111 | if (is.null(aspect)) 112 | aspect = 2.5 * (max(lat)-min(lat)) / (max(lon)-min(lon)) 113 | 114 | 115 | # plot 116 | filled.contour.is(lon, lat, data, 117 | nlevels=levels, color.palette=myColours, 118 | asp=aspect, xlim=lonRange, ylim=latRange, zlim=zRange, 119 | key.title = title(main=legendTitle), plot.title = title(main=mainTitle, xlab='Longitude', ylab='Latitude'), 120 | plot.axes = { plot(world.map, xlim=lonRange, ylim=latRange, add=TRUE); axis(1); axis(2) } 121 | ) 122 | } 123 | 124 | 125 | 126 | 127 | 128 | ## 129 | ## a generic function for plotting any NetCDF variable fullscreen 130 | ## 131 | full_ncdf = function(data, mainTitle='', legendTitle='', lonRange=NULL, latRange=NULL, zRange=range(data), zConstrain=FALSE, levels=1024, aspect=NULL, myColours=NULL, dateTime=FALSE) 132 | { 133 | # get our world map shape (high res for better visuals) 134 | if (!exists('world.map')) 135 | world.map <<- get_world_map(res='high') 136 | 137 | 138 | # get our coordinates 139 | lon = as.numeric(dimnames(data)[[1]]) 140 | lat = as.numeric(dimnames(data)[[2]]) 141 | 142 | 143 | # calculate our default lon & lat ranges if needed 144 | if (is.null(lonRange)) 145 | lonRange = range(lon) 146 | 147 | if (is.null(latRange)) 148 | latRange = range(lat) 149 | 150 | 151 | # colour scheme for the plot 152 | if (is.null(myColours)) 153 | { 154 | heat.colours = c('#3F168A', '#2049D0', '#3288BD', '#66C2A5', '#ABDDA4', '#E6F598', '#FFFFBF', '#FEE08B', '#FDAE61', '#F46D43', '#D53E4F', '#9E0142') 155 | myColours = colorRampPalette(heat.colours) 156 | } 157 | 158 | 159 | # constrain data to our desired zRange 160 | if (zConstrain) 161 | { 162 | data[data < zRange[1] ] = zRange[1]; 163 | data[data > zRange[2] ] = zRange[2]; 164 | } 165 | 166 | 167 | # aspect ratio of the map 168 | if (is.null(aspect)) 169 | aspect = 2.5 * (max(lat)-min(lat)) / (max(lon)-min(lon)) 170 | 171 | 172 | # remove the borders for full-screen 173 | omar <- par(mar = par('mar')) 174 | on.exit(par(omar)) 175 | par(mar=rep(0,4)) 176 | 177 | 178 | # plot 179 | filled.contour.is(lon, lat, data, 180 | nlevels=levels, color.palette=myColours, 181 | asp=aspect, xlim=lonRange, ylim=latRange, zlim=zRange, 182 | plot.axes = { plot(world.map, xlim=lonRange, ylim=latRange, add=TRUE) }, 183 | axes=FALSE, add.legend=FALSE 184 | ) 185 | 186 | # add datetime 187 | if (dateTime == TRUE) 188 | { 189 | # box centres 190 | bx = c(-120,0,120) 191 | by = c(-50,-50,-50) 192 | 193 | # box width & height 194 | bw = 25 195 | bh = 5 196 | 197 | # plot rectangle and text 198 | rect(bx-bw/2, by-bh/2, bx+bw/2, by+bh/2, col=rgb(1,1,1,0.4)) 199 | text(bx, by, t, adj=c(0.5,0.5)) 200 | } 201 | 202 | } 203 | 204 | 205 | 206 | ## 207 | ## a generic function for plotting any NetCDF variable directly to a PNG file 208 | ## 209 | print_ncdf = function(filename, data, width, height, res, ...) 210 | { 211 | png(filename, height=height, width=width, res=res) 212 | 213 | full_ncdf(data, lon, lat, ...) 214 | 215 | dev.off() 216 | } 217 | 218 | 219 | 220 | 221 | 222 | 223 | ####################################################################################################################### 224 | ## 225 | ## WIND PLOTTING FUNCTIONS 226 | ## 227 | ## 228 | ## 229 | ## defaults for plotting wind speeds 230 | ## legend is fixed to 0-30 m/s - plot is done in blues 231 | ## 232 | plot_ncdf_wind = function(data, mainTitle='', legendTitle='m/s', lonRange=NULL, latRange=NULL, zRange=c(0,30), zConstrain=FALSE, levels=64, aspect=NULL, myColours=NULL) 233 | { 234 | # default colour scheme 235 | if (is.null(myColours)) 236 | myColours = colorRampPalette(c('#F7FBFF', '#DEEBF7', '#C6DBEF', '#9ECAE1', '#6BAED6', '#4292C6', '#2171B5', '#08519C', '#08306B')) 237 | 238 | # plot it 239 | plot_ncdf(data, mainTitle, legendTitle, lonRange, latRange, zRange, zConstrain, levels, aspect, myColours) 240 | } 241 | 242 | 243 | full_ncdf_wind = function(data, latRange=NULL, lonRange=NULL, zRange=c(0,30), zConstrain=FALSE, levels=1024, aspect=NULL, myColours=NULL, dateTime=FALSE) 244 | { 245 | # default colour scheme 246 | if (is.null(myColours)) 247 | myColours = colorRampPalette(c('#F7FBFF', '#DEEBF7', '#C6DBEF', '#9ECAE1', '#6BAED6', '#4292C6', '#2171B5', '#08519C', '#08306B')) 248 | 249 | # plot it 250 | full_ncdf(data, mainTitle, legendTitle, latRange, lonRange, zRange, zConstrain, levels, aspect, myColours) 251 | } 252 | 253 | 254 | print_ncdf_wind_axes = function(filename, data, width, height, res, ...) 255 | { 256 | png(filename, height=height, width=width, res=res) 257 | 258 | plot_ncdf_wind(data, ...) 259 | 260 | dev.off() 261 | } 262 | 263 | print_ncdf_wind_full = function(filename, data, width, height, res, ...) 264 | { 265 | png(filename, height=height, width=width, res=res) 266 | 267 | full_ncdf_wind(data, ...) 268 | 269 | dev.off() 270 | } 271 | 272 | 273 | 274 | 275 | 276 | 277 | ####################################################################################################################### 278 | ## 279 | ## a better filled contour function for R 3.x.x 280 | ## iain staffell ~ 2014 281 | ## 282 | ## by default it no longer draws borders between levels in the legend 283 | ## this lets you draw smooth plots with lots of levels.. 284 | ## 285 | ## if you run with the new option add.legend=FALSE, you just end up with 286 | ## the level plot. this means subsequent calls to points(), lines(), etc.. 287 | ## will work as expected, as you remain in the coordinate system of the plot body 288 | ## 289 | ## if you call par(mar=rep(0,4)), then call this with axes=FALSE, add.legend=FALSE 290 | ## you can obtain a full-screen plot with no borders, no nothing. 291 | ## 292 | filled.contour.is = function (x = seq(0, 1, length.out = nrow(z)), y = seq(0, 1, 293 | length.out = ncol(z)), z, xlim = range(x, finite = TRUE), 294 | ylim = range(y, finite = TRUE), zlim = range(z, finite = TRUE), 295 | levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, 296 | col = color.palette(length(levels) - 1), plot.title, plot.axes, 297 | key.title, key.axes, asp = NA, xaxs = "i", yaxs = "i", las = 1, 298 | axes = TRUE, frame.plot = axes, add.legend=TRUE, ...) 299 | { 300 | # sort out data 301 | if (missing(z)) { 302 | if (!missing(x)) { 303 | if (is.list(x)) { 304 | z <- x$z 305 | y <- x$y 306 | x <- x$x 307 | } 308 | else { 309 | z <- x 310 | x <- seq.int(0, 1, length.out = nrow(z)) 311 | } 312 | } 313 | else stop("no 'z' matrix specified") 314 | } 315 | else if (is.list(x)) { 316 | y <- x$y 317 | x <- x$x 318 | } 319 | if (any(diff(x) <= 0) || any(diff(y) <= 0)) 320 | stop("increasing 'x' and 'y' values expected") 321 | 322 | mar.orig <- (par.orig <- par(c("mar", "las", "mfrow")))$mar 323 | on.exit(par(par.orig)) 324 | 325 | # plot legend 326 | if (add.legend) 327 | { 328 | w <- (3 + mar.orig[2L]) * par("csi") * 2.54 329 | layout(matrix(c(2, 1), ncol = 2L), widths = c(1, lcm(w))) 330 | par(las = las) 331 | mar <- mar.orig 332 | mar[4L] <- mar[2L] 333 | mar[2L] <- 1 334 | par(mar = mar) 335 | 336 | plot.new() 337 | plot.window(xlim = c(0, 1), ylim = range(levels), xaxs = "i", yaxs = "i") 338 | rect(0, levels[-length(levels)], 1, levels[-1L], col = col, border = NA) ## this removes legend borders 339 | if (missing(key.axes)) { 340 | if (axes) 341 | axis(4) 342 | } 343 | else key.axes 344 | box() 345 | if (!missing(key.title)) 346 | key.title 347 | mar <- mar.orig 348 | mar[4L] <- 1 349 | par(mar = mar) 350 | } 351 | 352 | # plot body 353 | plot.new() 354 | plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp) 355 | .filled.contour(x, y, z, levels, col) 356 | if (missing(plot.axes)) { 357 | if (axes) { 358 | title(main = "", xlab = "", ylab = "") 359 | Axis(x, side = 1) 360 | Axis(y, side = 2) 361 | } 362 | } 363 | else plot.axes 364 | if (frame.plot) 365 | box() 366 | if (missing(plot.title)) 367 | title(...) 368 | else plot.title 369 | invisible() 370 | } 371 | 372 | -------------------------------------------------------------------------------- /lib/VWF.R: -------------------------------------------------------------------------------- 1 | ################################################################## 2 | # # 3 | # BSD 3-Clause License # 4 | # Copyright (C) 2012-2017 Iain Staffell # 5 | # All rights reserved. # 6 | # # 7 | ################################################################## 8 | 9 | 10 | # external dependencies (that are always needed) 11 | suppressPackageStartupMessages(library(ncdf4)) 12 | suppressPackageStartupMessages(library(lubridate)) 13 | suppressPackageStartupMessages(library(rworldmap)) 14 | suppressPackageStartupMessages(library(doParallel)) 15 | suppressPackageStartupMessages(library(data.table)) 16 | suppressPackageStartupMessages(library(akima)) 17 | 18 | # external dependencies (not needed by all things, so not loaded by default) 19 | #library(MASS) 20 | #library(fields) 21 | 22 | 23 | 24 | 25 | # figure out the working directory for this VWF code 26 | if (!is.null(parent.frame(2)$ofile)) 27 | { 28 | vwf.dir = dirname(parent.frame(2)$ofile) %&% '/' 29 | } 30 | 31 | # if that failed, establish the default 32 | if (is.null(vwf.dir)) 33 | { 34 | vwf.dir = 'Q:/VWF/lib/' 35 | } 36 | 37 | 38 | 39 | # i'm a concatinator, twisted concatinator 40 | `%&%` = function(a, b) paste0(a, b) 41 | 42 | # load the background code 43 | source(vwf.dir %&% 'VWF.STDLIB.R') 44 | source(vwf.dir %&% 'VWF.NCDF.R') 45 | source(vwf.dir %&% 'VWF.EXTRAPOLATE.R') 46 | source(vwf.dir %&% 'VWF.FARMS.R') 47 | source(vwf.dir %&% 'VWF.PLOTS.R') 48 | source(vwf.dir %&% 'VWF.EXTRAS.R') 49 | 50 | flush('Welcome to the VWF v17.07.21\n') 51 | -------------------------------------------------------------------------------- /lib/VWF.STDLIB.R: -------------------------------------------------------------------------------- 1 | ############################################################# 2 | ##### ____ _ _____ ##### 3 | ##### | _ \ | | | __ \ ##### 4 | ##### | |_) | | | __ _ _ __ ___ | |__) | ##### 5 | ##### | _ < | | / _` | | '_ ` _ \ | _ / ##### 6 | ##### | |_) | | | | (_| | | | | | | | | | \ \ ##### 7 | ##### |____/ |_| \__,_| |_| |_| |_| |_| \_\ ##### 8 | ##### ##### 9 | ############################################################# 10 | ##### INCLUDE BLAM.R - 18-05-2017 - IAIN STAFFELL ##### 11 | ############################################################# 12 | ######################### CONTENTS ######################## 13 | ############################################################# 14 | 15 | 16 | 17 | 18 | ############################################################# 19 | ######### GENERAL STUFF 20 | ## 21 | ## %&% Concatenate strings like a %&% b 22 | ## %notin% Opposite of %in% 23 | ## 24 | ## flush(...) Flush a message to console 25 | ## clear_line(...) Erase the last line of console text and flush a new message 26 | ## 27 | ## rc(text=FALSE, header=FALSE) Read data from clipboard, returns 1d vector or 2d table depending on what's there 28 | ## - Converts to numeric by default, set text=TRUE if you paste in text 29 | ## - Set header=TRUE to make first row of the clipboard into your colnames 30 | ## 31 | ## wc() Write a 1d vector or 2d table to clipboard, for easy pasting into Excel 32 | ## 33 | ## xc() Execute the contents of the clipboard (faster and cleaner than pasting code in) 34 | ## 35 | ## memory_usage(...) / mem_use(...) List objects and their memory use (pass a string to search by object name) 36 | ## 37 | ## sleep(seconds) ZZzz.. 38 | ## 39 | ## 40 | ## 41 | ############################################################# 42 | ######### READ DATA 43 | ## 44 | ## read_data(filename) Lazy man's read - will read big csv, rdata or rds as required 45 | ## 46 | ## read_csv(filename) Read in data 3x faster than read.csv() 47 | ## 48 | ## read_big_csv(filename) Read in data 10x faster than read.csv(), but with longer initialise time, good for >100 MB 49 | ## 50 | ## read_zipped_csv(filename) Read a ZIP file which contains a single CSV file, in-memory decompression, no junk files made 51 | ## 52 | ## read_data_bind_rows(filenames, ...) Read in several files, binding data on rows or columns... 53 | ## read_data_bind_cols(filenames, ...) 54 | ## 55 | ## 56 | ## 57 | ## write_data(data, filename, format) Lazy man's write - will save as csv, rdata or rds as required. 58 | ## 59 | ## write_csv(data, filename) Removes some of the oddities of write.csv() 60 | ## 61 | ## csv_to_rds(filename) Convert CSV data to RDS (r binary) format, which is smaller and faster 62 | ## rds_to_csv(filename) 63 | ## 64 | ## merge_csv_files(inputFiles, outputFile) Merge a set of CSV files into one file 65 | ## 66 | ## 67 | ## 68 | ############################################################# 69 | ######### GRAPHICS 70 | ## 71 | ## good_graphics(mar, mgp, padding, captions) Set up good graphics paramaeters 72 | ## - Pass your own mar(b,l,t,r) or mgp(caption,label,0) 73 | ## - Change padding=TRUE if you want to keep that, or captions=FALSE if you like them rotated 74 | ## 75 | ## good_png(filename, height, width, res) Print a good png 76 | ## - height, width and res are optional (defaults give 8pt text at 8cm width ish) 77 | ## 78 | ## plot_p(x, y, ...) Plot points as transparent greys 79 | ## plot_l(x, y, ...) Plot as a line 80 | ## 81 | ## boxplot.pars Make boxplots pretty with boxplot(y ~ x, pars=boxplot.pars) 82 | ## 83 | ## heat.colours, rainbow.colours, blue.colours Nicer sets of colours 84 | ## 85 | ## colour_ramp_data(pal=NULL, range=NULL) Generate a colour ramp for your data (automatically normalises it, etc.) 86 | ## 87 | ## stepped_line_graph(x, y) Build a stepped data set from x and y which can be plotted like a histogram in Excel 88 | ## stepped_line_graphs(x, col) Build a stepped data set with multiple columns, 'col' is the name of the date/x column 89 | ## 90 | ## log_gridlines(min, max) Return the values of major and minor gridlines for a log chart (e.g. major = 10, 100, 1000; minor = 20, 30, ...) 91 | ## 92 | ## hist_and_fit(data, x, y, title) Plot a histogram and line fit that you have created with pnormal(), pweibull(), etc. 93 | ## hist_and_normal_fit(data, breaks, title) Plot a historgram and automatically fit a normal distribution to it 94 | ## hist_log_x(data, ...) Plot a histogram with a logged x axis 95 | ## hist_log_y(data, ...) Plot a histogram with a logged y axis 96 | ## 97 | ## 98 | ## 99 | ############################################################# 100 | ######### MAPS 101 | ## 102 | ## get_world_map(shapeFile) Returns a map from a given shapeFile, or the rWorldMap library if none passed 103 | ## get_europe_map(resolution, disembodied) 104 | ## get_europe_north_africa_map(resolution) 105 | ## 106 | ## plot_map_points(lon, lat, ...) Plot a map with points at the given lon and lat 107 | ## - See below for the 100 other options 108 | ## 109 | ## 110 | ############################################################# 111 | ######### TEXT TOOLS 112 | ## 113 | ## strrev('Hello World!') Reverse a string 114 | ## 115 | ## substr_reverse = function(x, n) Get a substring from the end backwards 116 | ## 117 | ## str_replace('e', 'u', 'Hello World!') Replace 'x' for 'y' in 'string' 118 | ## 119 | ## get_text_before(string, token, last=F) Return the text in 'string' that comes before 'token' 120 | ## - First instance by default, or the last instance if last=T 121 | ## 122 | ## get_text_after(string, token, last=F) Ditto 123 | ## 124 | ## get_text_between(string, token1, token2) Return the text between 'token1' and 'token2' 125 | ## - Uses the first instance of each 126 | ## 127 | ## is_string_numeric(data) Test if a string (or array of strings) holds valid numbers 128 | ## 129 | ## keyval_to_df(keyval, splitter) Convert an 1d array of key-value pairs into a 1 row data frame 130 | ## all_keyval_to_df(keyval_array, splitter) Convert an array of arrays of key-value pairs into a 2d data frame 131 | ## 132 | ## percent(val, dp=2) Convert one/some numbers into percentage format 133 | ## 134 | ## 135 | ## 136 | ############################################################# 137 | ######### ARRAYS AND DATA FRAMES 138 | ## 139 | ## push(array, item) Push and pop for vectors 140 | ## pop(array) (yes, i'm that lazy) 141 | ## 142 | ## insert_row(DF, row, r) Insert 'row' into 'DF' at row number 'r' 143 | ## 144 | ## xbind(array, array, dimension) Bind two multi-dimensional arrays on a specific dimension (as opposed to rbind, cbind) 145 | ## 146 | ## nth_largest(array, n) Return nth biggest or 147 | ## nth_smallest(array, n) smallest number in an array 148 | ## 149 | ## which.closest(array, x) Return the index of the array element that is closest to x 150 | ## 151 | ## mult_cols_by_vector(matrix, vector) Multiply each column of a matrix by each element of the vector (i.e. each row gets multiplied by the whole vector) 152 | ## mult_rows_by_vector(matrix, vector) Multiply each row of a matrix by each element of the vector (i.e. each column gets multiplied by the whole vector) 153 | ## 154 | ## midPoints(x) Return the midpoint of all adjacent elements (e.g. midPoints(1:5) = 1.5:4.5 155 | ## 156 | ## freq_table(data) Return a frequency table in ascending count order 157 | ## 158 | ## move_column(data, myCol, where, relCol) Move columns within a dataframe - e.g. data = move_column(data, 'a_column', 'before', 'another_column') 159 | ## 160 | ## 161 | ## moving_average(data, n) Calculate the moving average for a vector of data, with n smoothing width either side of the value 162 | ## 163 | ## biggest_diff(data, lag) Calculate the largest swing within a given number of periods 164 | ## 165 | ## na.everyone(dataframe, cols) Copy NA values across all columns in a data frame 166 | ## 167 | ## list_to_data_frame(l) ... 168 | ## 169 | ## aggregate_n(values, n, FUN) Aggregate a vector or data-frame into fixed-sized groups 170 | ## 171 | ## 172 | ## 173 | ############################################################# 174 | ######### TIME-SERIES DATA 175 | ## 176 | ## aggregate_yearly(x, y, FUN) Aggregate a time-series to yearly level. 177 | ## _monthly() x = a lubridate time series 178 | ## _weekly() y = the data series 179 | ## _daily() FUN = aggregate function (defaults to mean) 180 | ## 181 | ## seasonal_diurnal(x, y, FUN) 182 | ## plot_diurnal(dd) 183 | ## 184 | ## dmy_hs(string) Additional options for lubridate 185 | ## ymd_hs(string) 186 | ## 187 | ## lag_data(data, steps) Lag values in a vector of data by n steps by shifting them backwards (accepts non-integer steps) 188 | ## lead_data(data, steps) Lead values in a vector of data by shifting them forwards 189 | ## 190 | ## align_data(df1, col1, df2, col2) Align two data frames so that df1$col1 and df2$col2 are identical. Useful in lining up time-series data. 191 | ## 192 | ## tz_search('City') Search for the timezone name that's relevant to your location 193 | ## 194 | ## tz_shift(data, zone) Calculate the shift needed to move your data into that timezone (works on a series of input times, accounts for daylight savings) 195 | ## 196 | ## fill_missing_dates(x, datecol, period, fill) Pad out your dataframe to have a complete time series of a given period (defaults to days). Optionally, fill all missing timesteps with data = fill 197 | ## 198 | ## 199 | ## 200 | ############################################################# 201 | ######### MATHS AND STATISTICS 202 | ## 203 | ## mode(data) Calculate the mode (most common) 204 | ## 205 | ## weighted_mean(data, weights) Mean and stdev with weighted data points 206 | ## weighted_sd(data, weights) 207 | ## 208 | ## stderr(data) Calculate the standard error on a mean 209 | ## ci95(data) Calculate the 95% confidence interval (based on the stderr) 210 | ## 211 | ## r2(fit) Return the adjusted R² value from an lm() fit 212 | ## 213 | ## coef.err(fit) Return the coefficients and standard errors from an lm() fit 214 | ## lm.coef.err(formula) Run an lm() fit and return its coeffients and errors 215 | ## 216 | ## rms(residuals) Calculate the root mean square from a set of residuals - e.g. call rms(fit - data) 217 | ## 218 | ## rescale(data, min, max) Rescale a vector to be in the range of [min, max] 219 | ## 220 | ## constrained_distro(distro, length, min, max, ...) Generate a random distribtion that is constrained within set limits 221 | ## 222 | ## 223 | ## 224 | 225 | 226 | 227 | 228 | 229 | ################################################################################################################### 230 | ################## GENERAL STUFF ######################################################################## 231 | ################################################################################################################### 232 | 233 | 234 | 235 | `%&%` = function(a, b) paste0(a, b) 236 | 237 | `%notin%` = Negate(`%in%`) 238 | 239 | 240 | 241 | # console - clear line and flush line 242 | clear_line = function(...) 243 | { 244 | cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b") 245 | cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b") 246 | cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b") 247 | cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b") 248 | 249 | if (length(list(...)) > 0) 250 | { 251 | cat(...) 252 | flush.console() 253 | } 254 | } 255 | 256 | flush = function(...) 257 | { 258 | cat(...) 259 | flush.console() 260 | } 261 | 262 | 263 | 264 | ##### 265 | ## ## read data from clipboard 266 | ## ## write data to clipboard 267 | ##### 268 | 269 | # if reading in text, set text=TRUE to return character rather than numeric data 270 | # if reading in multiple columns set header=TRUE to take your first row and set as column names 271 | rc = function(text=FALSE, header=FALSE) 272 | { 273 | # get the contents 274 | x = readClipboard() 275 | 276 | # deal with one column 277 | if (length( grep('\t', x)) == 0) 278 | { 279 | # convert to numeric data 280 | if (text == FALSE) 281 | { 282 | x = as.numeric(x) 283 | } 284 | 285 | return (x) 286 | } 287 | 288 | # deal with multiple columns 289 | if (length( grep('\t', x)) > 0) 290 | { 291 | x = read.table('clipboard', stringsAsFactors=FALSE) 292 | 293 | # turn the header row into column names 294 | if (header) 295 | { 296 | colnames(x) = x[1, ] 297 | x = x[-1, ] 298 | rownames(x) = 1:nrow(x) 299 | } 300 | 301 | # convert to numeric data 302 | if (text == FALSE) 303 | { 304 | cols = colnames(x) 305 | x[ , cols] = sapply(x[ , cols], as.numeric) 306 | } 307 | 308 | return (x) 309 | } 310 | } 311 | 312 | 313 | 314 | 315 | # write an object to clipboard 316 | # this uses various trickery to get around the clipboard size limit! 317 | wc = function(x, row.names=FALSE) 318 | { 319 | # write a 1d vector 320 | if (is.null(dim(x))) 321 | { 322 | writeClipboard( as.character(x) ) 323 | return() 324 | } 325 | 326 | # write a 2d data frame 327 | if (length(dim(x)) == 2) 328 | { 329 | # this tries the 16MB clipboard 330 | tryCatch( 331 | { 332 | write.table(x, "clipboard-16384", sep="\t", row.names=row.names) 333 | }, 334 | 335 | # if it fails, it does some funky business (as strings are unlimited??) 336 | warning = function(cond) 337 | { 338 | # convert x to a data.frame of char 339 | for(col in 1:ncol(x)) 340 | x[ , col] = as.character(x[ , col]) 341 | 342 | # now convert its transpose into a string - so we get c(1st row), c(2nd row), ... 343 | x = as.data.frame( t(x), stringsAsFactors=FALSE) 344 | x = toString(x) 345 | 346 | # convert the delimiter from comma to tab 347 | x = gsub('\", \"', '\t', x, fixed=TRUE) 348 | 349 | # convert EOL to a newline character 350 | x = gsub('\"), c(\"', '\n', x, fixed=TRUE) 351 | 352 | # chop off the first c(\" and the last \") 353 | x = substr(x, 4, nchar(x)-2) 354 | 355 | # now paste your goodies into excel 356 | writeClipboard(x) 357 | }) 358 | 359 | return() 360 | } 361 | } 362 | 363 | 364 | 365 | 366 | # execute the contents of the clipboard 367 | # this makes things work nicer, stop() and wait_for_key() both work, you don't get massive print-outs of code, etc... 368 | xc = function() { source('clipboard') } 369 | 370 | 371 | 372 | # make the console wait until a key is pressed 373 | wait_for_key = function(txt='Press [enter] to continue') 374 | { 375 | invisible(readline(prompt=txt)) 376 | } 377 | 378 | 379 | 380 | ## list objects and memory usage 381 | ## beefed up from http://tinyurl.com/pgxwl6c 382 | 383 | ## memory_usage() 384 | ## memory_usage("b", n=20) 385 | mem_use = function(...) { memory_usage(...) } 386 | memory_usage = function(pattern, pos=1, order.by="Size", decreasing=TRUE, n=10) 387 | { 388 | napply = function(names, fn) sapply(names, function(x) fn(get(x, pos = pos))) 389 | names = ls(pos = pos, pattern = pattern) 390 | obj.class = napply(names, function(x) as.character(class(x))[1]) 391 | obj.mode = napply(names, mode) 392 | obj.type = ifelse(is.na(obj.class), obj.mode, obj.class) 393 | obj.prettysize = napply(names, function(x) { capture.output(print(object.size(x), units = "auto")) }) 394 | obj.size = napply(names, object.size) 395 | obj.dim = t(napply(names, function(x) as.numeric(dim(x))[1:2])) 396 | vec = is.na(obj.dim)[, 1] & (obj.type != "function") 397 | obj.dim[vec, 1] = napply(names, length)[vec] 398 | out = data.frame(obj.size, obj.prettysize, obj.type, obj.dim) 399 | names(out) = c("Size", "PrettySize", "Type", "Rows", "Columns") 400 | out = out[order(out[[order.by]], decreasing=decreasing), ] 401 | out[ , 1] = out[ , 2] 402 | out = out[ , -2] 403 | if (n>0) 404 | out = head(out, n) 405 | out 406 | } 407 | 408 | 409 | 410 | ## sleep with a bit of animated output... 411 | sleep = function(seconds) 412 | { 413 | zzz = function(t) 414 | { 415 | flush('Z'); Sys.sleep(t); 416 | flush('Z'); Sys.sleep(t); 417 | flush('z'); Sys.sleep(t); 418 | flush('z'); Sys.sleep(t); 419 | flush('.'); Sys.sleep(t); 420 | flush('.'); Sys.sleep(t); 421 | flush('\b\b\b\b\b\b'); Sys.sleep(t); 422 | } 423 | 424 | # by default, you should print the 7 characters every 3.5 seconds... 425 | loops = ceiling(seconds / 3.5) 426 | t = seconds / loops / 7 427 | for (i in 1:loops) zzz(t) 428 | } 429 | 430 | 431 | 432 | 433 | 434 | ################################################################################################################### 435 | ################## READ BIG DATA ######################################################################## 436 | ################################################################################################################### 437 | 438 | 439 | # cope with me having data in several locations... 440 | # if my usb stick isn't plugged in (E:/WORK) try reading data from various hard drive backups.. 441 | fnchk = function(filename, stopOnError=TRUE) 442 | { 443 | # is SONY available? 444 | fn = filename 445 | if (file.exists(fn)) return(fn) 446 | 447 | # am i at work? 448 | fn = gsub('E:/', 'M:/', filename) 449 | if (file.exists(fn)) return(fn) 450 | 451 | # am i at home? 452 | fn = gsub('E:/', 'C:/Users/istaffel/BLAM!/', filename) 453 | if (file.exists(fn)) return(fn) 454 | 455 | # am i on my laptop? 456 | fn = gsub('E:/', 'D:/!SONY!/', filename) 457 | if (file.exists(fn)) return(fn) 458 | 459 | # is this a dropbox path at home? 460 | fn = gsub('istaffel', 'TiTS', filename) 461 | if (file.exists(fn)) return(fn) 462 | 463 | # is this a dropbox path at work? 464 | fn = gsub('TiTS', 'istaffel', filename) 465 | if (file.exists(fn)) return(fn) 466 | 467 | 468 | # spit an error 469 | if (stopOnError) stop('OH NO - FNCHK() CANNOT FIND ' %&% filename %&% '\n') 470 | 471 | # or just return junk 472 | return(fn) 473 | } 474 | 475 | 476 | 477 | 478 | # read csv, rdata or rds as required... 479 | read_data = function(filename, verbose=FALSE) 480 | { 481 | xtn = get_text_after(filename, '.', last=TRUE) 482 | 483 | if (tolower(xtn) == 'csv') 484 | { 485 | data = read_big_csv(filename, verbose) 486 | return(data) 487 | } 488 | 489 | if (tolower(xtn) == 'rdata') 490 | { 491 | load(filename) 492 | return(data) 493 | } 494 | 495 | if (tolower(xtn) == 'rds') 496 | { 497 | data = readRDS(filename) 498 | return(data) 499 | } 500 | 501 | stop('Error in read_data(', filename, ')\n') 502 | } 503 | 504 | 505 | # any time you read data use these tweaked options for 3x speed! 506 | read_csv = function(filename, ...) 507 | { 508 | filename = fnchk(filename) 509 | 510 | # read the first few lines to understand the column classes 511 | heading = read.csv(filename, header=TRUE, stringsAsFactors=FALSE, nrows=10, ...) 512 | classes = sapply(heading, class) 513 | 514 | # class integers as numeric (in case not all of them are ints) 515 | classes[ classes == 'integer' ] = 'numeric' 516 | 517 | data = tryCatch( 518 | # read the whole file, stating what classes we expect for 6x speed 519 | read.csv(filename, header=TRUE, colClasses=classes, comment.char='', ...), 520 | 521 | # if that doesn't work then just read normally (sometimes colClasses causes an error in scan()) 522 | error = function(e) { read.csv(filename, header=TRUE, stringsAsFactors=FALSE) } 523 | ) 524 | 525 | return(data) 526 | } 527 | 528 | 529 | # any time you read more than 100MB use this for 100x speed! 530 | # use data.table=TRUE to return a data.table 531 | read_big_csv = function(filename, data.table=FALSE, verbose=FALSE) 532 | { 533 | filename = fnchk(filename) 534 | 535 | suppressPackageStartupMessages(library(data.table)) 536 | data = fread(filename, sep=',', na.strings=c('NA','N/A',''), showProgress=verbose, data.table=data.table) 537 | 538 | return(data) 539 | } 540 | 541 | 542 | # read a csv file stuffed inside a zip file 543 | read_zipped_csv = function(file, ...) 544 | { 545 | zipFileInfo = unzip(file, list=TRUE) 546 | contents = as.character(zipFileInfo$Name) 547 | 548 | if (nrow(zipFileInfo) > 1) 549 | stop("read_zipped_csv: More than one data file inside your file:", file, "\n") 550 | 551 | read.csv( unz(file, contents), stringsAsFactors=FALSE, ...) 552 | } 553 | 554 | 555 | 556 | # read several files binding on rows or columns 557 | read_data_bind_rows = function(filenames, ...) 558 | { 559 | # library(plyr) 560 | # do.call(rbind.fill, lapply(filenames, function(x) read_data(x, ...))) 561 | 562 | library(plyr) 563 | LIST = list() 564 | n = length(filenames) 565 | 566 | # read in all files to a list (this doesn't require memory reallocation) 567 | for (i in 1:n) 568 | { 569 | LIST[[i]] = read_data(filenames[i]) 570 | sz = capture.output(print(object.size(LIST), units = "auto")) 571 | clear_line('Read', sz, '-', i, 'of', n, '-', filenames[i]) 572 | } 573 | 574 | # convert from list to DF (yay, efficiency...) 575 | clear_line('Reformatting as a data.frame...') 576 | DF = do.call(rbind.fill, LIST) 577 | clear_line() 578 | DF 579 | } 580 | 581 | 582 | read_data_bind_cols = function(filenames, ...) 583 | { 584 | do.call(cbind, lapply(filenames, function(x) read_data(x, ...))) 585 | } 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | # write csv, rdata or rds as required... 595 | write_data = function(data, filename, format) 596 | { 597 | # check and correct the extension of our filename 598 | xtn = get_text_after(filename, '.', last=TRUE) 599 | xtn = tolower(xtn) 600 | format = tolower(format) 601 | 602 | if (xtn != format) 603 | { 604 | filename = get_text_before(filename, '.', last=TRUE) 605 | filename = filename %&% '.' %&% format 606 | } 607 | 608 | if (format == 'csv') 609 | { 610 | write_csv(data, filename, row.names=FALSE) 611 | return() 612 | } 613 | 614 | if (format == 'rds') 615 | { 616 | saveRDS(data, filename) 617 | return() 618 | } 619 | 620 | if (format == 'rdata') 621 | { 622 | save(data, file=filename) 623 | return() 624 | } 625 | 626 | stop('Error in write_data(', filename, ',', format, ')\n') 627 | } 628 | 629 | 630 | # write csv: 631 | # accepts col.names=FALSE as an option 632 | # you may also want row.names=FALSE 633 | # defaults to blank NA strings 634 | # accepts append=TRUE as an option 635 | # 636 | write_csv = function(data, filename, col.names=TRUE, na='', append=FALSE, ...) 637 | { 638 | if (append == TRUE) 639 | { 640 | write.table(data, filename, append=TRUE, sep=',', col.names=FALSE, na=na, ...) 641 | return() 642 | } 643 | 644 | if (col.names == TRUE) 645 | { 646 | write.csv(data, filename, na=na, ...) 647 | return() 648 | } 649 | 650 | if (col.names == FALSE) 651 | { 652 | write.table(data, filename, sep=',', col.names=FALSE, na=na, ...) 653 | return() 654 | } 655 | } 656 | 657 | 658 | 659 | # convert csv to rds and vice versa 660 | # pass the name of the file to convert 661 | # the format of the input file ('csv', 'rds', 'rdata') 662 | # and the format to convert it to 663 | # 664 | # optionally, say which columns contain dates (which should be converted to lubridate) 665 | # 666 | convert_data_format = function(filename_in, format_in, format_out, date_cols=NULL) 667 | { 668 | # figure out filenames 669 | filename_out = gsub( format_in, format_out, filename_in) 670 | filename_out = gsub(tolower(format_in), tolower(format_out), filename_out) 671 | filename_out = gsub(toupper(format_in), toupper(format_out), filename_out) 672 | 673 | if (filename_out == filename_in) 674 | stop("convert_data_format(", filename_in, ", ", format_in, ", ", format_out, "): I don't want to overwrite your shit!") 675 | 676 | # read data 677 | flush("Reading", filename_in) 678 | data = read_data(filename_in) 679 | 680 | # convert dates to POSIX 681 | if (!is.null(date_cols)) 682 | { 683 | clear_line('Converting date_cols...') 684 | library(lubridate) 685 | 686 | for (d in date_cols) 687 | { 688 | test = data[1, d] 689 | 690 | # test 2000-01-01 00:00:00 691 | t = suppressWarnings( ymd_hms(test) ) 692 | if (!is.na(t)) 693 | { 694 | data[ , d] = ymd_hms(data[ , d]) 695 | next 696 | } 697 | 698 | # test 01/01/2000 00:00 699 | t = suppressWarnings( dmy_hm(test) ) 700 | if (!is.na(t)) 701 | { 702 | data[ , d] = dmy_hm(data[ , d]) 703 | next 704 | } 705 | 706 | # test 2000-01-01 00:00 707 | t = suppressWarnings( ymd_hm(test) ) 708 | if (!is.na(t)) 709 | { 710 | data[ , d] = ymd_hm(data[ , d]) 711 | next 712 | } 713 | 714 | # test 01/01/2000 00:00:00 715 | t = suppressWarnings( dmy_hms(test) ) 716 | if (!is.na(t)) 717 | { 718 | data[ , d] = dmy_hms(data[ , d]) 719 | next 720 | } 721 | 722 | # if its not one of those formats... ugh, ignore it 723 | } 724 | } 725 | 726 | clear_line('Writing', filename_out) 727 | write_data(data, filename_out, format_out) 728 | 729 | clear_line() 730 | } 731 | 732 | csv_to_rds = function(filename_in, ...) { convert_data_format(filename_in, 'csv', 'rds', ...) } 733 | rds_to_csv = function(filename_in, ...) { convert_data_format(filename_in, 'rds', 'csv', ...) } 734 | 735 | 736 | 737 | 738 | ## 739 | ## merge a set of csv files together, writing to disk 740 | ## this is done on-disk rather than in-memory, allowing you to create very big files 741 | ## 742 | merge_csv_files = function(inputFiles, outputFile) 743 | { 744 | # check all the input files exist 745 | if (FALSE %in% file.exists(inputFiles)) 746 | stop("merge_csv_files: some of your input files don't exist!!!\n\n") 747 | 748 | # write the first month with column headers 749 | data = read_big_csv(inputFiles[1]) 750 | 751 | cat('Merging file:', basename(inputFiles[1])) 752 | flush() 753 | 754 | write_csv(data, outputFile, row.names=FALSE) 755 | 756 | # write the rest of them, appending 757 | for (i in 2:length(inputFiles)) 758 | { 759 | data = read_big_csv(inputFiles[i]) 760 | 761 | clear_line() 762 | cat('Merging file:', basename(inputFiles[i])) 763 | flush() 764 | 765 | write_csv(data, outputFile, row.names=FALSE, append=TRUE) 766 | } 767 | 768 | clear_line() 769 | cat('Saved files to:', outputFile, '\n') 770 | flush() 771 | } 772 | 773 | 774 | 775 | 776 | ################################################################################################################### 777 | ################## GRAPHICS ############################################################################ 778 | ################################################################################################################### 779 | 780 | 781 | ### 782 | # # set up good graphics parameters.. 783 | # # pass your own values for mar (margins) or mgp (distance between axes and labels) 784 | # # options to keep the interior padding, or rotated y-axis labels 785 | ### 786 | good_graphics = function 787 | ( 788 | mar = c(3, 3, 1.5, 1.5), 789 | mgp = c(1.75, 0.5, 0), 790 | padding = FALSE, 791 | captions = TRUE 792 | ) 793 | { 794 | # maximise graphics space 795 | par(bg="white") # white background for drawing pngs 796 | par(mar=mar) # shrink the margins (bottom, left, top, right) 797 | par(tck=0.01) # tick marks go inwards 798 | par(mgp=mgp) # move axis captions and labels closer 799 | par(font.lab=2) # bold axis captions 800 | 801 | if (captions == TRUE) par(las=1) # make all tick-mark labels horizontal 802 | if (captions == FALSE) par(las=0) 803 | 804 | if (padding == FALSE) par(xaxs="i", yaxs="i") # remove padding between the graph and the axes) 805 | if (padding == 'y') par(xaxs="i", yaxs="r") # just remove padding from the x axis (so y axis is padded) 806 | if (padding == TRUE) par(xaxs="r", yaxs="r") 807 | } 808 | 809 | 810 | 811 | ### 812 | # # print a decent quality png of your graph 813 | # # pass the filename to save as 814 | # # pass your own height and width (in pixels) and resolution (in ppi) 815 | # # the defaults equate to ~8pt font at 8.5cm width 816 | ### 817 | good_png = function 818 | ( 819 | filename, 820 | height = 2500, 821 | width = 2500, 822 | res = 400 823 | ) 824 | { 825 | dev.print(png, file=filename, height=height, width=width, res=res) 826 | } 827 | 828 | 829 | 830 | ## 831 | ## simple plot functions for lazy fuckers 832 | ## 833 | 834 | plot_p = function(x, y=NULL, pch=16, cex=0.75, col=rgb(0,0,0,0.1), xmin=NULL, xmax=NULL, ymin=NULL, ymax=NULL, xlim=NULL, ylim=NULL, ...) 835 | { 836 | # deal with passing a single variable 837 | if (is.null(y)) 838 | { 839 | if (is.vector(x)) 840 | { 841 | y = x 842 | x = seq_along(y) 843 | 844 | } else { 845 | 846 | y = x[ , 2] 847 | x = x[ , 1] 848 | } 849 | 850 | } 851 | 852 | # default axis limits 853 | if (is.null(xlim)) xlim = range(x, na.rm=TRUE) 854 | if (is.null(ylim)) ylim = range(y, na.rm=TRUE) 855 | 856 | # modified axis limits 857 | if (!is.null(xmin)) xlim[1] = xmin 858 | if (!is.null(xmax)) xlim[2] = xmax 859 | if (!is.null(ymin)) ylim[1] = ymin 860 | if (!is.null(ymax)) ylim[2] = ymax 861 | 862 | # plot 863 | plot(x, y, pch=pch, cex=cex, col=col, xlim=xlim, ylim=ylim, ...) 864 | } 865 | 866 | 867 | 868 | 869 | 870 | plot_l = function(x, y=NULL, xmin=NULL, xmax=NULL, ymin=NULL, ymax=NULL, xlim=NULL, ylim=NULL, ...) 871 | { 872 | # deal with passing a single variable 873 | if (is.null(y)) 874 | { 875 | if (is.vector(x)) 876 | { 877 | y = x 878 | x = seq_along(y) 879 | 880 | } else { 881 | 882 | y = x[ , 2] 883 | x = x[ , 1] 884 | } 885 | 886 | } 887 | 888 | # default axis limits 889 | if (is.null(xlim)) xlim = range(x, na.rm=TRUE) 890 | if (is.null(ylim)) ylim = range(y, na.rm=TRUE) 891 | 892 | # modified axis limits 893 | if (!is.null(xmin)) xlim[1] = xmin 894 | if (!is.null(xmax)) xlim[2] = xmax 895 | if (!is.null(ymin)) ylim[1] = ymin 896 | if (!is.null(ymax)) ylim[2] = ymax 897 | 898 | # plot 899 | plot(x, y, type='l', xlim=xlim, ylim=ylim, ...) 900 | } 901 | 902 | 903 | 904 | # plot_l = function(x, y=NULL, ...) 905 | # { 906 | # if (!is.null(y)) 907 | # plot(x, y, type='l', ...) 908 | # 909 | # if (is.null(y)) 910 | # plot(x, type='l', ...) 911 | # } 912 | 913 | 914 | 915 | ## 916 | ## add transparency to a given colour (e.g. named one) 917 | ## http://stackoverflow.com/questions/8047668/transparent-equivalent-of-given-color 918 | ## 919 | alphaColour = function(someColor, alpha=100) 920 | { 921 | newColor =col2rgb(someColor) 922 | apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2], blue=curcoldata[3], alpha=alpha, maxColorValue=255)}) 923 | } 924 | 925 | 926 | 927 | # a list of parameters to make boxplots nice 928 | # see http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/bxp.html 929 | boxplot.pars = list(boxfill = 'grey90', whisklty=1, whiskcol='grey40', outpch=16, outcex=0.4, outcol='grey70') 930 | 931 | 932 | 933 | 934 | 935 | 936 | heat.colours = c('#3F168A', '#2049D0', '#3288BD', '#66C2A5', '#ABDDA4', '#E6F598', '#FFFFBF', '#FEE08B', '#FDAE61', '#F46D43', '#D53E4F', '#9E0142') 937 | blue.colours = c('#F7FBFF', '#DEEBF7', '#C6DBEF', '#9ECAE1', '#6BAED6', '#4292C6', '#2171B5', '#08519C', '#08306B') 938 | 939 | # go see the random hacks in E:\WORK\Code\R\colour ramps - perceptual hue colour wheel.R 940 | rainbow.colours.10 = c('#CC00FF', '#8F00FF', '#000AFF', '#00A3FF', '#00FFC2', '#8FFF00', '#FAFF00', '#FFB800', '#FF7A00', '#F00000', '#AA0000') 941 | rainbow.colours.8 = c('#9E00FF', '#000AFF', '#00A3FF', '#00FFC2', '#70FF00', '#FFF500', '#FF7A00', '#FF0000', '#AA0000') 942 | rainbow.colours = rainbow.colours.8 943 | 944 | 945 | 946 | 947 | 948 | # function to turn your data into a ramp 949 | # accepts a vector of numbers, returns a vector of hex RGB colours 950 | # optionally pass the palette to use (defaults to rainbow.colours) 951 | # optionally pass the min-max range to enforce on your values 952 | # 953 | colour_ramp_data = function(x, pal=NULL, range=NULL) 954 | { 955 | # normalise our input data 956 | normalise = function(x, range) 957 | { 958 | if (!is.null(range)) 959 | { 960 | min = min(range) 961 | max = max(range) 962 | 963 | } else { 964 | 965 | min = min(x, na.rm=TRUE) 966 | max = max(x, na.rm=TRUE) 967 | } 968 | 969 | x = (x - min) / (max - min) 970 | x[ x < 0 ] = 0 971 | x[ x > 1 ] = 1 972 | x 973 | } 974 | 975 | x = normalise(x, range) 976 | 977 | # create a palette if needs be 978 | if (length(pal) < 2) 979 | pal = rainbow.colours 980 | 981 | # convert the x data to rgb 982 | myRamp = colorRampPalette(pal)(length(pal)) 983 | cols = colorRamp(myRamp)(x) 984 | 985 | # convert to an rgb array to hex strings 986 | rgb_to_hex = function(x) { if (is.na(x[1])) return(NA); rgb(x[1], x[2], x[3], maxColorValue=255) } 987 | apply(cols, 1, rgb_to_hex) 988 | } 989 | 990 | 991 | 992 | # 993 | # turn standard x-y data series into a stepped x-y series 994 | # with vertical transitions at the mid-point between each data point 995 | # 996 | # x and y must be of equal length 997 | # 998 | # this lets you plot a line chart in excel that looks like line(type='s') in R 999 | # 1000 | # e.g. 1001 | # h = hist(data, plot=FALSE) 1002 | # h = stepped_line_graph(h$mids, h$counts) 1003 | # plot(h, type='l') 1004 | # 1005 | stepped_line_graph = function(x, y, mids=TRUE) 1006 | { 1007 | # check 1008 | n = length(x) 1009 | if (n != length(y)) 1010 | stop(paste('stepped_line_graph: x and y vectors must have equal length. x =', length(x), '- y =', length(y), '\n')) 1011 | 1012 | # find the x midpoints 1013 | if (mids) 1014 | { 1015 | dx = diff(x) / 2 1016 | push(dx, tail(dx, 1)) 1017 | x = c(x[1]-dx[1], x+dx) 1018 | } 1019 | 1020 | # or bolt something on the end 1021 | if (!mids) 1022 | { 1023 | dx = diff(x) 1024 | push(x, tail(x,1)+tail(dx,1)) 1025 | } 1026 | 1027 | # build a stepped x sequence 1028 | x = rep(x, each=2) 1029 | x = tail( head(x, -1), -1) 1030 | 1031 | # build a stepped y sequence 1032 | y = rep(y, each=2) 1033 | 1034 | data.frame(x=x, y=y) 1035 | } 1036 | 1037 | # run on multiple columns 1038 | # pass the data frame and column name 1039 | stepped_line_graphs = function(df, x, mids=TRUE) 1040 | { 1041 | # get the data columns 1042 | y = colnames(df) 1043 | y = y[y %notin% x] 1044 | 1045 | if (length(y) == 0) 1046 | stop('stepped_line_graphs(): no data columns found\n') 1047 | 1048 | # run through the first 1049 | out = stepped_line_graph(df[ , x], df[ , y[1]], mids) 1050 | colnames(out) = c(x, y[1]) 1051 | 1052 | if (length(y) == 1) 1053 | return(out) 1054 | 1055 | # run through the rest 1056 | for (i in 2:length(y)) 1057 | { 1058 | o = stepped_line_graph(df[ , x], df[ , y[i]], mids) 1059 | out = cbind(out, o[ , 2]) 1060 | } 1061 | 1062 | colnames(out) = c(x, y) 1063 | 1064 | return(out) 1065 | } 1066 | 1067 | 1068 | 1069 | 1070 | # return the value of grid lines on a log chart between min and max 1071 | log_gridlines = function(min, max) 1072 | { 1073 | x = list() 1074 | 1075 | log_min = floor(log10(min)) 1076 | log_max = ceiling(log10(max)) 1077 | 1078 | m = log_min:log_max 1079 | m = 10 ^ m 1080 | 1081 | x$major = m 1082 | x$minor = c(m*2, m*3, m*4, m*5, m*6, m*7, m*8, m*9) 1083 | x$minor = sort(x$minor) 1084 | 1085 | x$major = x$major[ x$major > min & x$major < max ] 1086 | x$minor = x$minor[ x$minor > min & x$minor < max ] 1087 | 1088 | x 1089 | } 1090 | 1091 | 1092 | 1093 | 1094 | 1095 | # plot a histogram with a line fit 1096 | # data = values to histogram 1097 | # x = sequence of breaks for the histogram 1098 | # fit = the pnorm(), pweibull(), etc. function 1099 | hist_and_fit = function(data, x, fit, title='') 1100 | { 1101 | # pre-calculate the histogram 1102 | h = hist(data, breaks=x, plot=FALSE) 1103 | 1104 | # calculate the density of our fit 1105 | fit = diff(fit) * length(data) 1106 | 1107 | # use that to determine the ylimits 1108 | ylim = c(0, max(h$counts, fit)) 1109 | 1110 | # and plot 1111 | plot(h, ylim=ylim, main=title, col='grey95') 1112 | lines(midPoints(x), fit, col='red', lwd=2) 1113 | } 1114 | 1115 | 1116 | 1117 | # hist_and_fit for the lazy man 1118 | # automatically fit a normal distribution to your data 1119 | # pass the data, the number of breaks (if you want), and the title (if you want) 1120 | # returns the mean and standard deviation 1121 | hist_and_normal_fit = function(data, breaks=NA) 1122 | { 1123 | if (is.na(breaks)) h = hist(data, plot=FALSE) 1124 | else h = hist(data, breaks=breaks, plot=FALSE) 1125 | 1126 | x = h$breaks 1127 | mu = mean(data, na.rm=TRUE) 1128 | sd = sd(data, na.rm=TRUE) 1129 | y = pnorm(x, mu, sd) 1130 | title = paste(signif(mu, 3), '\U00B1', signif(sd, 3)) 1131 | 1132 | hist_and_fit(data, x, y, title) 1133 | 1134 | c(mu, sd) 1135 | } 1136 | 1137 | 1138 | 1139 | # histogram with logged x axis 1140 | # isn't very good - it should have funny shaped bars perhaps? 1141 | # 1142 | hist_log_x = function(data, ...) 1143 | { 1144 | # plot your logged data without an x axis 1145 | data = log10(data) 1146 | hist(data, xaxt='n', ...) 1147 | 1148 | # design a nice x axis 1149 | t = axTicks(1) 1150 | axis(1, at=t, labels=prettyNum(10**t)) 1151 | } 1152 | 1153 | 1154 | 1155 | # histogram with logged y axis 1156 | # 1157 | hist_log_y = function(data, ...) 1158 | { 1159 | # calculate the histogram 1160 | h = hist(data, plot=FALSE, ...) 1161 | 1162 | # log the counts and add on a nubbin so that counts of 1 are visible 1163 | nubbin = 0.05 1164 | h$counts = log10(h$counts) + nubbin 1165 | 1166 | # plot without a y-axis 1167 | plot(h, yaxt='n', ylim=c(0, max(h$counts, na.rm=TRUE)*1.02)) 1168 | 1169 | # get the tick locations - round them off so you get nice numbers - then add on our nubbin 1170 | t = axTicks(2) 1171 | t = signif(10**t, 1) 1172 | t = log10(t) + nubbin 1173 | 1174 | # print the axis values (without our nubbin) 1175 | axis(2, at=t, labels=prettyNum(10**(t-nubbin))) 1176 | } 1177 | 1178 | 1179 | 1180 | 1181 | ################################################################################################################## 1182 | ################## MAP TOOLS ########################################################################### 1183 | ################################################################################################################## 1184 | 1185 | 1186 | ## 1187 | ## Grab a world map from either: 1188 | ## the shapefile of your choosing 1189 | ## rworldmap's getMap with the resolution of your choosing 1190 | ## 1191 | ## return the spatial data frame thing... 1192 | ## 1193 | ## my usual maps are: 1194 | ## E:/WORK/Z Data/Maps/Natural Earth/50m Countries/ne_50m_admin_0_countries.shp 1195 | ## E:/WORK/Z Data/Maps/Natural Earth/Admin Map Subunits/ne_10m_admin_0_map_subunits.shp 1196 | ## E:/WORK/Z Data/Maps/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp 1197 | ## 1198 | get_world_map = function(shapeFile=NULL, res='high', wgsProjection=TRUE) 1199 | { 1200 | # use a custom shapefile 1201 | if (!is.null(shapeFile)) 1202 | { 1203 | # if we used this shapefile previously 1204 | if (exists('world.map.shape') & exists('world.map')) 1205 | { 1206 | # then don't load it again! 1207 | if (world.map.shape == shapeFile) 1208 | { 1209 | return(world.map) 1210 | } 1211 | } 1212 | 1213 | # load the shapefile 1214 | suppressPackageStartupMessages(library(maptools)) 1215 | world.map = readShapeSpatial(shapeFile) 1216 | 1217 | # not necessary, but yields nicer default axis labels and aspect ratio 1218 | if (wgsProjection == TRUE) 1219 | proj4string(world.map) = "+proj=longlat +datum=WGS84" 1220 | 1221 | # remember the shapefile we used 1222 | world.map.shape <<- shapeFile 1223 | } 1224 | 1225 | # otherwise use rWorldMap 1226 | if (!exists('world.map')) 1227 | { 1228 | suppressPackageStartupMessages(library(rworldmap)) 1229 | world.map = getMap(res) 1230 | } 1231 | 1232 | return(world.map) 1233 | } 1234 | 1235 | 1236 | 1237 | ## 1238 | ## Grab a map of Europe, with non-members cut out 1239 | ## and far-flung colonies cropped out using a bounding box 1240 | ## 1241 | ## optionally set disembodied=TRUE to remove CIS and Turkey, making EU look like an island of paradise.. 1242 | ## 1243 | ## only works with the rWorldMap maps for now... 1244 | ## 1245 | get_europe_map = function(resolution='high', disembodied=FALSE) 1246 | { 1247 | # load a world map 1248 | suppressPackageStartupMessages(library(rworldmap)) 1249 | world.map <<- getMap(resolution) 1250 | 1251 | 1252 | # figure out which countries are in europe 1253 | 1254 | # identify countries by their general region 1255 | eu = world.map@data$REGION == 'Europe' 1256 | eu[is.na(eu)] = FALSE 1257 | 1258 | # filter out ones that aren't there 1259 | if (disembodied) 1260 | { 1261 | not.really.europe = c('RUS', 'BLR', 'UKR', 'GRL', 'TUR', 'ARM', 'AZE', 'GEO') 1262 | eu[world.map$ISO3 %in% not.really.europe] = FALSE 1263 | } 1264 | 1265 | # filter down our world map 1266 | europe.map = world.map[eu, ] 1267 | 1268 | # filter out the colonies 1269 | suppressPackageStartupMessages(library(raster)) 1270 | 1271 | if (disembodied) 1272 | europe.map = crop(europe.map, extent(-25, 32, 34, 72)) 1273 | 1274 | if (!disembodied) 1275 | europe.map = crop(europe.map, extent(-25, 32+10, 34, 72)) 1276 | 1277 | 1278 | return(europe.map) 1279 | } 1280 | 1281 | get_europe_north_africa_map = function(resolution='high') 1282 | { 1283 | # load a world map 1284 | suppressPackageStartupMessages(library(rworldmap)) 1285 | world.map <<- getMap(resolution) 1286 | 1287 | 1288 | # figure out which countries are in europe 1289 | 1290 | # identify countries by their general region 1291 | eu = world.map@data$REGION == 'Europe' 1292 | eu[is.na(eu)] = FALSE 1293 | 1294 | # filter out ones that aren't there 1295 | not.really.europe = c('RUS', 'BLR', 'UKR', 'GRL', 'TUR', 'ARM', 'AZE', 'GEO') 1296 | eu[world.map$ISO3 %in% not.really.europe] = FALSE 1297 | 1298 | # also grab north africa 1299 | na = world.map@data$ISO3 %in% c('MAR', 'DZA', 'LBY', 'EGY') 1300 | 1301 | # filter down our world map 1302 | europe.map = world.map[eu | na, ] 1303 | 1304 | 1305 | # filter out the colonies 1306 | suppressPackageStartupMessages(library(raster)) 1307 | europe.map = crop(europe.map, extent(-25, 37, 18, 72)) 1308 | 1309 | 1310 | return(europe.map) 1311 | } 1312 | 1313 | # iso3 should come from world.map@data$ISO3 1314 | # extent should be c(left, right, bottom, top) 1315 | get_country_map = function(iso3, extent=NULL, resolution='high') 1316 | { 1317 | # load a world map 1318 | suppressPackageStartupMessages(library(rworldmap)) 1319 | world.map <<- getMap(resolution) 1320 | 1321 | # filter down to our countries 1322 | keep = world.map@data$ISO3 %in% iso3 1323 | my.map = world.map[keep, ] 1324 | 1325 | # crop to a specific extent 1326 | if (length(extent) == 4) 1327 | { 1328 | library(raster) 1329 | my.map = crop(my.map, extent(extent)) 1330 | } 1331 | 1332 | return (my.map) 1333 | } 1334 | 1335 | 1336 | 1337 | 1338 | 1339 | ## 1340 | ## Plot a map with points at your specified coordinates 1341 | ## - pass longitude and latitude as numeric vectors 1342 | ## - optionally set the boundaries of the map with lonBounds and latBounds = c(minimum, maximum) 1343 | ## otherwise these are calculated automatically to cover all points 1344 | ## - optionally set how many degrees of padding to add around auto-generated bounds 1345 | ## - optionally specify the border and fill colour for countries 1346 | ## - optionally specify the aspect ratio to use (1.5 default for UK) 1347 | ## - optionally specify your own shapefile to render 1348 | ## - optionally specify the height of the dev() window 1349 | ## - optionally send other parameters on to points() 1350 | ## 1351 | ## - returns the ratio of width to height you should use if printing the file as a png 1352 | ## 1353 | ## - will create and store map shape in the world.map global variable to save time on future calls... 1354 | ## 1355 | ## 1356 | ## if you want to plot a blank map with no points (useful to get the right boundaries) then just set col=NULL 1357 | ## 1358 | ## note that this makes a new graphics window with new par(). this will let you continue editing the map, 1359 | ## but if you start making new plots in this window they will look funny. just call dev.new() afterwards. 1360 | ## 1361 | ## 1362 | ## TO DO: 1363 | ## - different projections, motherfucker!!! 1364 | ## 1365 | ## - if i want to repeat this with colour fills for each country 1366 | ## see http://stackoverflow.com/questions/1260965/developing-geographic-thematic-maps-with-r 1367 | ## 1368 | plot_map_points = function(lon, lat, xlim=range(lon, na.rm=TRUE), ylim=range(lat, na.rm=TRUE), lonBounds=NULL, latBounds=NULL, padding=1, 1369 | mapBorder='grey75', mapFill='grey95', mapLwd=1, 1370 | mapAspect=1.5, shapeFile=NULL, height=7, new.plot=TRUE, add.points=FALSE, ...) 1371 | { 1372 | # get our world map and save globally for future 1373 | world.map <<- get_world_map(shapeFile) 1374 | 1375 | # add some padding around the map 1376 | xlim = range(xlim-padding, xlim+padding) 1377 | ylim = range(ylim-padding, ylim+padding) 1378 | 1379 | # work out the aspect ratio 1380 | ar = diff(xlim) / diff(ylim) / mapAspect 1381 | 1382 | # build a new plot window with the correct ratio 1383 | if (new.plot == TRUE) 1384 | { 1385 | dev.new(height=height, width=height*ar) 1386 | } 1387 | 1388 | if (add.points == FALSE) 1389 | { 1390 | # remove our plot margins 1391 | par(bg="white") 1392 | par(mar = rep(0, 4)) 1393 | 1394 | # plot the map 1395 | plot(world.map, xlim=xlim, ylim=ylim, border=mapBorder, col=mapFill, asp=mapAspect, lwd=mapLwd) 1396 | } 1397 | 1398 | # plot the points 1399 | points(lon, lat, ...) 1400 | 1401 | # return our ideal width:height ratio 1402 | # then call good_png(height=1000, width=1000*ar) 1403 | ar 1404 | } 1405 | 1406 | 1407 | 1408 | 1409 | ################################################################################################################### 1410 | ################## TEXT TOOLS ########################################################################### 1411 | ################################################################################################################### 1412 | 1413 | 1414 | # reverse a string 1415 | strrev = function(x) { reverse_string(x) } 1416 | reverse_string = function(x) 1417 | { 1418 | sapply(lapply(strsplit(x, NULL), rev), paste, collapse="") 1419 | } 1420 | 1421 | # get a subsection of a string, working from the end backwards 1422 | substr_reverse = function(x, n) 1423 | { 1424 | substr(x, nchar(x)-n+1, nchar(x)) 1425 | } 1426 | 1427 | # replace 'x' for 'y' in a string 1428 | str_replace = function(...) 1429 | { 1430 | gsub(..., fixed=TRUE) 1431 | } 1432 | 1433 | 1434 | # get text before, after, or between tokens 1435 | get_text_before = function(string, token, last=FALSE) 1436 | { 1437 | chunks = strsplit(string, token, fixed=TRUE) 1438 | 1439 | if (last == FALSE) 1440 | fun = function(x) { x[1] } 1441 | 1442 | if (last == TRUE) 1443 | fun = function(x) { paste(head(x, -1), collapse=token) } 1444 | 1445 | sapply(chunks, fun) 1446 | } 1447 | 1448 | 1449 | get_text_after = function(string, token, last=FALSE) 1450 | { 1451 | chunks = strsplit(string, token, fixed=TRUE) 1452 | 1453 | if (last == TRUE) 1454 | fun = function(x) { tail(x, 1) } 1455 | 1456 | if (last == FALSE) 1457 | fun = function(x) { paste(tail(x, -1), collapse=token) } 1458 | 1459 | sapply(chunks, fun) 1460 | } 1461 | 1462 | 1463 | get_text_between = function(string, token1, token2) 1464 | { 1465 | string = get_text_after(string, token1) 1466 | string = get_text_before(string, token2) 1467 | string 1468 | } 1469 | 1470 | 1471 | 1472 | 1473 | # convert an array of key-val pairs into a dataframe 1474 | # keyval = array('name=Iain', 'hair=NA') 1475 | # splitter = '=' 1476 | keyval_to_df = function(keyval, splitter) 1477 | { 1478 | # parse your string to a data frame 1479 | df = as.data.frame(sapply(strsplit(keyval, splitter), rbind), stringsAsFactors=FALSE) 1480 | 1481 | # apply the header 1482 | names(df) = df[1,] 1483 | df = df[-1,] 1484 | 1485 | return(df) 1486 | } 1487 | 1488 | # convert an array of arrays of key-val pairs into a dataframe 1489 | all_keyval_to_df = function(keyval_array, splitter) 1490 | { 1491 | # initialise results storage 1492 | DF = NULL 1493 | 1494 | # run through each keyval string 1495 | for (keyval in keyval_array) 1496 | { 1497 | df = keyval_to_df(keyval, splitter) 1498 | DF = merge(DF, df, all=TRUE, sort=TRUE) 1499 | } 1500 | 1501 | return(DF) 1502 | } 1503 | 1504 | 1505 | 1506 | # convert one/some numbers into percentage format 1507 | percent = function(val, dp=2) 1508 | { 1509 | paste0(round(100*val, dp), "%") 1510 | } 1511 | 1512 | 1513 | 1514 | 1515 | ################################################################################################################### 1516 | ################## ARRAY TOOLS ########################################################################## 1517 | ################################################################################################################### 1518 | 1519 | 1520 | # push a value onto the end of a vector 1521 | # in case you're too lazy to type array[length(array)+1] = item... 1522 | push = function(array, item) 1523 | { 1524 | new_array = c(array, item) 1525 | eval.parent(substitute(array <- new_array)) 1526 | } 1527 | 1528 | # pop an item off the end of a vector 1529 | # and return the item 1530 | pop = function(array) 1531 | { 1532 | item = array[ length(array) ] 1533 | new_array = array[ -length(array) ] 1534 | eval.parent(substitute(array <- new_array)) 1535 | item 1536 | } 1537 | 1538 | 1539 | # http://stackoverflow.com/questions/11561856/add-new-row-to-dataframe 1540 | # rbind a new row into a specific place of a data frame 1541 | insert_row = function(DF, row, r) 1542 | { 1543 | s = seq(r, nrow(DF)) 1544 | DF[s+1, ] = DF[s, ] 1545 | DF[r, ] = row 1546 | DF 1547 | } 1548 | 1549 | 1550 | 1551 | # 1552 | # multi-dimensional rbind/cbind equivalent... 1553 | # 1554 | # binds two arrays on the xth dimension 1555 | # the arrays must have the same number of dimensions, and this can be any number 1556 | # 1557 | # for example: 1558 | # xbind(a[x,y,z1], b[x,y,z2], 3) returns c[x,y,z1+z2]... 1559 | # 1560 | # xbind(a[x,y], b[x,y], 1) == rbind(a[x,y], b[x,y]) 1561 | # xbind(a[x,y], b[x,y], 2) == cbind(a[x,y], b[x,y]) 1562 | # 1563 | xbind = function(a, b, x) 1564 | { 1565 | 1566 | # check we are passing arrays 1567 | if (!is.array(a) | !is.array(b) | !is.numeric(x)) 1568 | stop('serious? this ting deals with arrays...') 1569 | 1570 | # check the number of dimensions 1571 | n = length(dim(a)) 1572 | 1573 | if (length(dim(b)) != n) 1574 | stop('xbind only does arrays with the same number of dimensions fam!\n') 1575 | 1576 | 1577 | # check the sizes are correct in other dimensions 1578 | for (i in 1:n) 1579 | { 1580 | if (i == x) 1581 | next 1582 | 1583 | if (dim(a)[i] != dim(b)[i]) 1584 | stop('blud are you mad? xbind only needs your arrays to have the same size in the non-x dimension!\n') 1585 | } 1586 | 1587 | 1588 | # work out the interleaving 1589 | cuts = 1 1590 | 1591 | for (i in n:1) 1592 | { 1593 | if (i == x) 1594 | break 1595 | 1596 | cuts = cuts * dim(a)[i] 1597 | } 1598 | 1599 | 1600 | # convert to a set of 1d vectors 1601 | aa = matrix(a, ncol=cuts) 1602 | bb = matrix(b, ncol=cuts) 1603 | 1604 | 1605 | # join together and reshape 1606 | cc = rbind(aa, bb) 1607 | 1608 | newdim = (1:n %in% x) * 1 1609 | newdim = dim(a) + dim(b) * newdim 1610 | 1611 | c = array(cc, dim=newdim) 1612 | 1613 | 1614 | # add on the dimension names 1615 | for (i in 1:n) 1616 | { 1617 | if (i == x) 1618 | { 1619 | dimnames(c)[[i]] = c(dimnames(a)[[i]], dimnames(b)[[i]]) 1620 | } 1621 | 1622 | if (i != x) 1623 | { 1624 | dimnames(c)[[i]] = dimnames(a)[[i]] 1625 | } 1626 | } 1627 | 1628 | 1629 | # bumbaraasclart 1630 | # n.a.bass 1631 | c 1632 | } 1633 | 1634 | if (0) 1635 | { 1636 | a = array(c(111, 211, 121, 221, 112, 212, 122, 222), dim=c(2,2,2)) 1637 | a[1, , ] 1638 | a[ , 1, ] 1639 | a[ , , 1] 1640 | 1641 | b = array(c(311, 411, 321, 421, 312, 412, 322, 422), dim=c(2,2,2)) 1642 | c = array(c(131, 231, 141, 241, 132, 232, 142, 242), dim=c(2,2,2)) 1643 | d = array(c(113, 213, 123, 223, 114, 214, 124, 224), dim=c(2,2,2)) 1644 | 1645 | xbind(a, b, 1) 1646 | xbind(a, c, 2) 1647 | xbind(a, d, 3) 1648 | } 1649 | 1650 | 1651 | 1652 | # return the array indices for the min/max element 1653 | # e.g. x = array(runif(10000), dim=c(10,10,100)); which.min.arr(x); 1654 | which.min.arr = function(x) 1655 | { 1656 | w = which.min(x) 1657 | w = which(x == x[w], arr.ind=TRUE) 1658 | w 1659 | } 1660 | 1661 | which.max.arr = function(x) 1662 | { 1663 | w = which.max(x) 1664 | w = which(x == x[w], arr.ind=TRUE) 1665 | w 1666 | } 1667 | 1668 | # return the array element that is closest to x 1669 | which.closest = function(data, x) 1670 | { 1671 | which.min(abs(data - x)) 1672 | } 1673 | 1674 | 1675 | # http://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column 1676 | nth_largest = function(data, n) 1677 | { 1678 | n = length(data) + 1 - n 1679 | if (n < 1) return (NA) 1680 | 1681 | if (is.unsorted(data)) 1682 | return ( sort(data, partial=n)[n] ) 1683 | 1684 | return (data[n]) 1685 | } 1686 | 1687 | nth_smallest = function(data, n) 1688 | { 1689 | if (n < 1 | n > length(data)) return (NA) 1690 | 1691 | if (is.unsorted(data)) 1692 | return ( sort(data, partial=n)[n] ) 1693 | 1694 | return (data[n]) 1695 | } 1696 | 1697 | 1698 | 1699 | # Multiply each column of a matrix by each element of the vector (i.e. each row gets multiplied by the whole vector) 1700 | # http://stackoverflow.com/questions/3643555/multiply-rows-of-matrix-by-vector 1701 | multiply_each_column = function(matrix, vector) 1702 | { 1703 | result = t( t(matrix) * vector ) 1704 | as.data.frame(result) 1705 | } 1706 | 1707 | mult_cols_by_vector = function(matrix, vector) 1708 | { 1709 | result = t( t(matrix) * vector ) 1710 | as.data.frame(result) 1711 | } 1712 | 1713 | multiply_each_row = function(matrix, vector) 1714 | { 1715 | result = matrix * vector 1716 | as.data.frame(result) 1717 | } 1718 | 1719 | mult_rows_by_vector = function(matrix, vector) 1720 | { 1721 | result = matrix * vector 1722 | as.data.frame(result) 1723 | } 1724 | 1725 | 1726 | # calculate the arithmetic center of adjacent values in an array 1727 | midPoints = function(x) 1728 | { 1729 | (x[-length(x)] + x[-1]) / 2 1730 | } 1731 | 1732 | 1733 | # test if a string (or array of strings) holds valid numbers 1734 | is_string_numeric = function(data) 1735 | { 1736 | suppressWarnings(!is.na(as.numeric(data))) 1737 | } 1738 | 1739 | 1740 | # return a table in ascending frequency order 1741 | freq_table = function(data) 1742 | { 1743 | # create the table 1744 | tbl = as.data.frame(table(data)) 1745 | colnames(tbl) = c('data', 'freq') 1746 | 1747 | # convert data from factors to strings or numeric 1748 | tbl$data = as.character(tbl$data) 1749 | is_numeric = is_string_numeric(tbl$data) 1750 | if (length(is_numeric) == sum(is_numeric)) 1751 | tbl$data = as.numeric(tbl$data) 1752 | 1753 | # return 1754 | tbl[ order(tbl[2]), ] 1755 | } 1756 | 1757 | 1758 | 1759 | 1760 | # 1761 | # move a column in a data frame to a different position 1762 | # 1763 | # data = the data frame 1764 | # myCol = the column(s) you wish to move 1765 | # where = 'first', 'last', 'before', 'after' 1766 | # relCol = the relative column to move before or after 1767 | # 1768 | # e.g. move_column(data, c('b','c'), 'before', 'e') 1769 | # 1770 | # http://stackoverflow.com/questions/18339370/reordering-columns-in-a-large-dataframe 1771 | # 1772 | move_column = function(data, myCol, where = "last", relCol = NULL) 1773 | { 1774 | otherCols = setdiff(names(data), myCol) 1775 | 1776 | x = switch(where, 1777 | 1778 | first = data[c(myCol, otherCols)], 1779 | 1780 | last = data[c(otherCols, myCol)], 1781 | 1782 | before = 1783 | { 1784 | if (is.null(relCol)) stop("must specify relCol column") 1785 | if (length(relCol) > 1) stop("relCol must be a single character string") 1786 | data[append(otherCols, values = myCol, after = (match(relCol, otherCols)-1))] 1787 | }, 1788 | 1789 | after = 1790 | { 1791 | if (is.null(relCol)) stop("must specify relCol column") 1792 | if (length(relCol) > 1) stop("relCol must be a single character string") 1793 | data[append(otherCols, values = myCol, after = (match(relCol, otherCols)))] 1794 | } 1795 | ) 1796 | 1797 | x 1798 | } 1799 | 1800 | 1801 | 1802 | # calculate the moving average of a vector of data 1803 | moving_average = function(data, n=5) 1804 | { 1805 | # integer filter width 1806 | if (n%%1 == 0) 1807 | return( as.numeric( filter(data, rep(1/n, n), sides=2) ) ) 1808 | 1809 | # otherwise interpolate between the nearest integers 1810 | nlo = floor(n) 1811 | nhi = ceiling(n) 1812 | 1813 | a = filter(data, rep(1/nlo, nlo), sides=2) 1814 | b = filter(data, rep(1/nhi, nhi), sides=2) 1815 | 1816 | return( as.numeric( a*(nhi-n) + b*(n-nlo) )) 1817 | } 1818 | 1819 | 1820 | moving_average_full = function(data, n=5) 1821 | { 1822 | data = c(head(data, n), data, tail(data, n)) 1823 | 1824 | smooth = moving_average(data, n) 1825 | 1826 | smooth = head(smooth, -n) 1827 | smooth = tail(smooth, -n) 1828 | 1829 | smooth 1830 | } 1831 | 1832 | 1833 | 1834 | 1835 | # do the same as diff(x, lag), except return the biggest swing 1836 | # seen from now across the next lag elements... 1837 | # i.e. x = c(3, 0, 3, 0); diff(x, 2) will return 0, 0 1838 | # but biggest_diff(x, 2) will return 3, 3 1839 | biggest_diff = function(x, lag) 1840 | { 1841 | if (lag == 1) 1842 | return( diff(x) ) 1843 | 1844 | # initialise 1845 | DX = x*0 1846 | 1847 | for (i in 1:lag) 1848 | { 1849 | # pop the last element off our previous array 1850 | DX = DX[ -length(DX) ] 1851 | 1852 | # calculate this new array 1853 | dx = diff(x, i) 1854 | 1855 | # figure out which is biggest 1856 | w = (abs(dx) > abs(DX)) 1857 | DX[w] = dx[w] 1858 | } 1859 | 1860 | DX 1861 | } 1862 | 1863 | 1864 | 1865 | # censor all rows in a data.frame where any column is already censored 1866 | # i.e. mirror NAs across all of the chosen columns if any of them is NA 1867 | # 1868 | na.everyone = function(dataframe, cols=colnames(dataframe)) 1869 | { 1870 | # find NA in all rows/columns 1871 | filter = is.na(dataframe[ , cols]) 1872 | 1873 | # find rows where any column is NA 1874 | filter = apply(filter, 1, any) 1875 | filter = as.vector(filter) 1876 | 1877 | # NA all columns in those rows 1878 | for (c in cols) 1879 | { 1880 | dataframe[filter, c] = NA 1881 | } 1882 | 1883 | # return 1884 | dataframe 1885 | } 1886 | 1887 | 1888 | 1889 | list_to_data_frame = function(l) 1890 | { 1891 | # we need stringsAsFactors to be false... 1892 | osaf = options('stringsAsFactors') 1893 | options(stringsAsFactors=FALSE) 1894 | 1895 | # convert the list to the data frame 1896 | d = do.call(rbind.data.frame, l) # this but it requires options(stringsAsFactors=FALSE) first ... 1897 | 1898 | # reset stringsAsFactors 1899 | options(stringsAsFactors=osaf[[1]][[1]]) 1900 | 1901 | d 1902 | } 1903 | 1904 | 1905 | 1906 | # aggregate a vector or a data.frame into groups of a fixed size 1907 | # e.g. aggregate_n(my_data, 2) to average half-hourly to hourly 1908 | # 1909 | aggregate_n = function(values, n, FUN='mean') 1910 | { 1911 | 1912 | # work out our counter 1913 | if (!is.data.frame(values)) 1914 | y = seq_along(values) 1915 | 1916 | if (is.data.frame(values)) 1917 | y = seq_along(values[ , 1]) 1918 | 1919 | # aggregate our counter every n 1920 | y = floor( (y-1) / n ) 1921 | 1922 | # now aggregate our values 1923 | z = aggregate(values, by=list(y), FUN) 1924 | 1925 | # return without the Group.1 1926 | z[ , -1] 1927 | 1928 | } 1929 | 1930 | 1931 | 1932 | 1933 | ################################################################################################################### 1934 | ################## TIME-SERIES ########################################################################## 1935 | ################################################################################################################### 1936 | 1937 | aggregate_yearly = function(dates, values, FUN='mean') aggregate_ts(dates, values, 'yearly', FUN) 1938 | aggregate_monthly = function(dates, values, FUN='mean') aggregate_ts(dates, values, 'monthly', FUN) 1939 | aggregate_weekly = function(dates, values, FUN='mean') aggregate_ts(dates, values, 'weekly', FUN) 1940 | aggregate_daily = function(dates, values, FUN='mean') aggregate_ts(dates, values, 'daily', FUN) 1941 | aggregate_hourly = function(dates, values, FUN='mean') aggregate_ts(dates, values, 'hourly', FUN) 1942 | 1943 | aggregate_ts = function(dates, values, by='yearly', FUN='mean') 1944 | { 1945 | # forbid failure 1946 | if (by %notin% c('daily', 'weekly', 'monthly', 'yearly', 'hourly', 'seasonal diurnal')) 1947 | stop("aggregate_ts: don't know how you want to aggregate things dear boy...") 1948 | 1949 | 1950 | # pre-process our dates into the relevant chunks 1951 | process_dates = function(dates, by) 1952 | { 1953 | if (by == 'yearly') dates = format(dates, "%Y-01-01") 1954 | if (by == 'monthly') dates = format(dates, "%Y-%m-01") 1955 | if (by == 'weekly') dates = round_date(dates, 'week') 1956 | if (by == 'daily') dates = format(dates, "%Y-%m-%d") 1957 | if (by == 'hourly') dates = format(dates, "%Y-%m-%d %H:00") 1958 | 1959 | if (by == 'MUSE') # this gives something appropriate for MUSE and other energy systems models quarterly and three hourly... 1960 | { 1961 | season = floor( ((month(dates) + 1) %% 12) / 3) 1962 | hour = 3 * floor(hour(dates) / 3) 1963 | dates = sprintf("Y%4d Q%i H%02i", year(dates), season, hour) 1964 | } 1965 | 1966 | dates 1967 | } 1968 | 1969 | 1970 | # we want to process a single time series 1971 | # dates = {.....}, values = {.....} 1972 | if (!is.data.frame(values)) 1973 | { 1974 | my_dates = process_dates(dates, by) 1975 | return( aggregate_ts_once(my_dates, values, by, FUN) ) 1976 | } 1977 | 1978 | # we want to process a simple data frame 1979 | # dates = 'colname', values = data.frame(.....) 1980 | if (is.character(dates) & is.data.frame(values) & ncol(values) == 2) 1981 | { 1982 | my_dates = values[ , colnames(values) == dates] 1983 | my_dates = process_dates(my_dates, by) 1984 | 1985 | my_values = values[ , colnames(values) != dates] 1986 | 1987 | return( aggregate_ts_once(my_dates, my_values, by, FUN) ) 1988 | } 1989 | 1990 | # we want to process a whole data frame 1991 | # dates = 'colname', values = data.frame(.....) 1992 | if (is.character(dates) & is.data.frame(values)) 1993 | { 1994 | # strip out dates and data 1995 | my_dates = values[ , colnames(values) == dates] 1996 | my_values = values[ , colnames(values) != dates] 1997 | } 1998 | 1999 | # we want to process a data frame of pure data 2000 | if (!is.character(dates) & is.data.frame(values)) 2001 | { 2002 | my_dates = dates 2003 | my_values = values 2004 | } 2005 | 2006 | cols = colnames(my_values) 2007 | my_dates = process_dates(my_dates, by) 2008 | 2009 | # process the first column, creating the output structure 2010 | c = cols[1] 2011 | results = aggregate_ts_once(my_dates, my_values[ , c], by, FUN) 2012 | colnames(results)[2] = c 2013 | 2014 | # determine frequency of updates 2015 | i_tick = 1 2016 | if (nrow(values) < 10000) i_tick = 10 2017 | if (nrow(values) < 100) i_tick = 100 2018 | 2019 | 2020 | 2021 | # process subsequent columns 2022 | for (i in 2:length(cols)) 2023 | { 2024 | c = cols[i] 2025 | res = aggregate_ts_once(my_dates, my_values[ , c], by, FUN, lube.dates=FALSE) 2026 | results[ , c] = res$value 2027 | 2028 | if (i %% i_tick == 0) clear_line('Aggregating', i, 'of', length(cols)) 2029 | } 2030 | clear_line() 2031 | 2032 | return( results ) 2033 | } 2034 | 2035 | 2036 | aggregate_ts_once = function(dates, values, by='yearly', FUN='mean', lube.dates=TRUE) 2037 | { 2038 | # do something different and special if we want a seasonal-diurnal aggregation 2039 | if (by == 'seasonal diurnal') 2040 | { 2041 | hour = hour(dates) 2042 | month = month(dates) 2043 | 2044 | season = floor( (month %% 12) / 3) 2045 | 2046 | results = aggregate(values, by=list(hour, season), FUN=mean, na.rm=TRUE) 2047 | results = data.frame(array(results$x, dim=c(24,4))) 2048 | colnames(results) = c('winter', 'spring', 'summer', 'autumn') 2049 | 2050 | # this hands back a custom format, so skip the rest 2051 | return( results ) 2052 | } 2053 | 2054 | # aggregate our data as desired 2055 | results = aggregate(values, by=list(dates), FUN=FUN) 2056 | 2057 | # convert to a nice format 2058 | colnames(results) = c('date', 'value') 2059 | 2060 | if (lube.dates) 2061 | { 2062 | if (by != 'hourly') results$date = ymd(results$date) 2063 | if (by == 'hourly') results$date = ymd_hm(results$date) 2064 | } 2065 | 2066 | return( results ) 2067 | } 2068 | 2069 | 2070 | 2071 | 2072 | # yea boy 2073 | seasonal_diurnal = function(dates, values, FUN='mean') aggregate_ts(dates, values, 'seasonal diurnal', FUN) 2074 | 2075 | # a function to help with plotting diurnal data 2076 | plot_diurnal = function(diurnal, ...) 2077 | { 2078 | good_graphics(padding=TRUE) 2079 | 2080 | range = range(diurnal*0.98, diurnal*1.02) 2081 | 2082 | plot(0:23, diurnal$summer, col='red', type='l', xlab="Hour", ylab="CF", ylim=range, xaxt='n', ...) 2083 | lines(0:23, diurnal$spring, col='green') 2084 | lines(0:23, diurnal$autumn, col='brown') 2085 | lines(0:23, diurnal$winter, col='blue') 2086 | 2087 | axis(1, at=seq(0,24,6)) 2088 | axis(1, at=seq(0,24,3), labels=FALSE) 2089 | 2090 | axis(3, at=seq(0,24,3), labels=FALSE) 2091 | 2092 | axis(4, labels=FALSE) 2093 | 2094 | } 2095 | 2096 | 2097 | 2098 | # additional lubridate functions because I am exceptionally lazy 2099 | dmy_hs = function(string) { dmy_hms(paste(string, '00', sep=':')) } 2100 | ymd_hs = function(string) { ymd_hms(paste(string, '00', sep=':')) } 2101 | 2102 | 2103 | 2104 | 2105 | 2106 | 2107 | # lag a vector of data by a specified number of (non-integer) steps 2108 | # i.e. lagging by one will turn [4,1,2] into [ ,4,1] 2109 | # you can lag by fractional amounts, which uses linear interpolation 2110 | lag_data = function(data, steps) 2111 | { 2112 | while (steps >= 1) 2113 | { 2114 | # lag by 1 2115 | data = c(NA, head(data, -1)) 2116 | steps = steps - 1 2117 | } 2118 | 2119 | if (steps > 0) 2120 | { 2121 | # lag by a fraction 2122 | data = c(NA, head(data, -1) + diff(data)*steps) 2123 | } 2124 | 2125 | data 2126 | } 2127 | 2128 | # opposite of lagging data 2129 | lead_data = function(data, steps) 2130 | { 2131 | data = rev(data) 2132 | data = lag_data(data, steps) 2133 | rev(data) 2134 | } 2135 | 2136 | 2137 | 2138 | 2139 | # 2140 | # align two data frames based on the values of the given columns 2141 | # e.g. you have two sets of time-series data, and you want them aligned in time 2142 | # so that sim$date == act$date 2143 | # 2144 | # res = align_date(sim, 'date', act, 'date') 2145 | # sim = res[[1]]; act = res[[2]]; 2146 | # 2147 | align_data = function(df1, col1, df2, col2) 2148 | { 2149 | # shrink 2150 | df1 = df1[ (df1[ , col1] %in% df2[ , col2]), ] 2151 | df2 = df2[ (df2[ , col2] %in% df1[ , col1]), ] 2152 | 2153 | # align 2154 | df1 = df1[ match(df2[ , col2], df1[ , col1]), ] 2155 | df2 = df2[ match(df1[ , col1], df2[ , col2]), ] 2156 | 2157 | list(df1, df2) 2158 | } 2159 | 2160 | 2161 | 2162 | # search through the list of lubridate's timezones 2163 | # e.g. tz_search('Paris') 2164 | # 2165 | tz_search = function(str) 2166 | { 2167 | tz = olson_time_zones() 2168 | tz[ grep(str, tz) ] 2169 | } 2170 | 2171 | 2172 | # calculate the shift in hours needed to move to that time-zone 2173 | # this accounts for daylight savings time, and works on a series of lubridates 2174 | # 2175 | tz_shift = function(t, zone) 2176 | { 2177 | z = force_tz(t, zone) 2178 | (t - z) / dhours(1) 2179 | } 2180 | 2181 | # e.g. 2182 | # t = seq(dmy('01/01/2013'), dmy('01/01/2015'), by='hour') 2183 | # shift = tz_shift(t, 'Europe/Paris') 2184 | # plot(shift) 2185 | 2186 | 2187 | 2188 | # x is your data frame 2189 | # datecol is the column where the dates lives 2190 | # period can be day, hour, or anythingelse that seq() accepts 2191 | # fill optionally contains what you'd like your missing timesteps to contain (e.g. 0) 2192 | fill_missing_dates = function(x, datecol, period='day', fill=NA) 2193 | { 2194 | min = min(x[ , datecol]) 2195 | max = max(x[ , datecol]) 2196 | 2197 | all_dates = seq(from=min, to=max, by=period) 2198 | 2199 | m = match(all_dates, x[ , datecol]) 2200 | f = is.na(m) 2201 | 2202 | all_dates = data.frame(all_dates) 2203 | 2204 | for (c in 2:ncol(x)) 2205 | { 2206 | all_dates[ , c] = x[m, c] 2207 | all_dates[f, c] = fill 2208 | } 2209 | 2210 | colnames(all_dates) = colnames(x) 2211 | all_dates 2212 | } 2213 | 2214 | 2215 | 2216 | 2217 | ################################################################################################################### 2218 | ################## STATISTICS ########################################################################### 2219 | ################################################################################################################### 2220 | 2221 | 2222 | # calculate the mode 2223 | mode = function(x) 2224 | { 2225 | ux = unique(x) 2226 | ux[which.max(tabulate(match(x, ux)))] 2227 | } 2228 | 2229 | 2230 | # already exists in the base package 2231 | weighted_mean = function(x, w) 2232 | { 2233 | weighted.mean(x, w) 2234 | } 2235 | 2236 | 2237 | # calculate weighted standard deviation 2238 | weighted_sd = function(x, w) 2239 | { 2240 | w = w / sum(w) 2241 | mu = weighted.mean(x, w) 2242 | sd = sqrt( sum(w * (x - mu)^2) ) 2243 | return(sd) 2244 | } 2245 | 2246 | 2247 | # calculate standard error on the mean 2248 | stderr = function(x) { sqrt(var(x, na.rm=TRUE) / length(na.omit(x))) } 2249 | 2250 | 2251 | # function to calculate 95% confidence interval for a normally distributed variable with unknown mean and standard deviation 2252 | ci95 = function(x) 2253 | { 2254 | x = na.omit(x) 2255 | n = length(x) 2256 | 2257 | return( qt(0.975, df=(n-1)) * sd(x) / sqrt(length(x)) ) 2258 | } 2259 | 2260 | 2261 | # return the adjusted R² value from an lm() fit 2262 | r2 = function(fit) 2263 | { 2264 | summary(fit)$adj.r.squared 2265 | } 2266 | 2267 | 2268 | # return the coefficients and errors of a regression as a single row 2269 | coef.err = function(fit) 2270 | { 2271 | coef = summary(fit)$coefficients[ , 1] 2272 | err = summary(fit)$coefficients[ , 2] 2273 | 2274 | cbind(t(coef), t(err)) 2275 | } 2276 | 2277 | lm.coef.err = function(formula) 2278 | { 2279 | fit = lm(formula) 2280 | coef.err(fit) 2281 | } 2282 | 2283 | 2284 | # calculate root mean squared 2285 | rms = function(x) 2286 | { 2287 | sqrt( mean( x^2, na.rm=TRUE) ) 2288 | } 2289 | 2290 | 2291 | # rescale a vector to be in the range of [min, max] 2292 | rescale = function(data, min=0, max=1) 2293 | { 2294 | # rescale to [0, 1] 2295 | data = (data - min(data)) / diff(range(data)) 2296 | 2297 | # rescale to our limits 2298 | data = min + (data * (max - min)) 2299 | 2300 | # return 2301 | data 2302 | } 2303 | 2304 | 2305 | ## generate a sequence of random numbers following a given distribution 2306 | ## but, constrain the distribution to lie within set bounds (min to max) 2307 | ## any numbers lying outside those bounds will be re-evaluated 2308 | ## 2309 | ## e.g. constrained_distro('rnorm', 1000, 1, 2, 0, 1) 2310 | ## 2311 | constrained_distro = function(distro, length, min, max, ...) 2312 | { 2313 | # e.g. rnorm(length, parm1, parm2) 2314 | x = do.call(distro, list(length, ...) ) 2315 | 2316 | while (1) 2317 | { 2318 | filter = (x < min | x > max) 2319 | if (sum(filter) == 0) break 2320 | x[filter] = do.call(distro, list(sum(filter), ...) ) 2321 | } 2322 | 2323 | x 2324 | } 2325 | 2326 | 2327 | 2328 | -------------------------------------------------------------------------------- /power_curves/LICENSE: -------------------------------------------------------------------------------- 1 | Contents of this folder are excluded from the BSD 3 Clause license and remain the property of the respective copyright holders. 2 | --------------------------------------------------------------------------------