├── 1.COMPILE.WIND.PROFILE (merra1).R
├── 1.COMPILE.WIND.PROFILE (merra2).R
├── 2.CALCULATE.R
├── 3.PROGRESS.MONITOR.R
├── CITATION
├── LICENSE
├── README.md
├── data
    ├── BASE.OPTIONS.R
    ├── INPUT_CH.CSV
    ├── INPUT_CH.PNG
    └── INPUT_CH.R
├── lib
    ├── VWF.EXTRAPOLATE.R
    ├── VWF.EXTRAS.R
    ├── VWF.FARMS.R
    ├── VWF.MAIN.PREP.R
    ├── VWF.MAIN.PROGRESS.R
    ├── VWF.MAIN.RESULTS.R
    ├── VWF.MAIN.WINDPOWER.R
    ├── VWF.MAIN.WINDSPEED.R
    ├── VWF.NCDF.R
    ├── VWF.PLOTS.R
    ├── VWF.R
    └── VWF.STDLIB.R
└── power_curves
    ├── LICENSE
    ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.00 w smoother).csv
    ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.10 w smoother).csv
    ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.20 w smoother).csv
    ├── Wind Turbine Power Curves ~ 5 (0.01ms with 0.30 w smoother).csv
    └── Wind Turbine Power Curves ~ 5 (0.01ms with 0.40 w smoother).csv


/1.COMPILE.WIND.PROFILE (merra1).R:
--------------------------------------------------------------------------------
  1 |  ##
  2 | #####
  3 | ## ##################################################################################################
  4 | #####
  5 |  ##
  6 |  ##  this requires us to have a template NetCDF file with the right variables available
  7 |  ##  can't do that within R easily, so need to download NCO from http://nco.sourceforge.net/
  8 |  ##  save an single MERRA SLV file as MERRA_IN.nc and run the following commands:
  9 |  ##
 10 |  #### MERRA 1
 11 |   # ncks -x -v disph,t10m,t2m,ts,u10m,u50m,v10m,v50m MERRA_IN.nc MERRA_OUT.nc
 12 |   # ncrename -h -O -v u2m,A MERRA_OUT.nc
 13 |   # ncrename -h -O -v v2m,z MERRA_OUT.nc
 14 |   # ncatted -a long_name,A,o,c,"Scale factor for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc
 15 |   # ncatted -a long_name,z,o,c,"Ref height for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc
 16 |   #
 17 |  ##
 18 |  ##
 19 | #####
 20 | ## ##################################################################################################
 21 | #####
 22 |  ##
 23 |  ##  this runs through all merra files in the specified folders  
 24 |  ##  reads the wind speed data and calculates the extrapolation parameters
 25 |  ##  then saves the A and z values to NetCDF files
 26 |  ##
 27 |  ##  after running, you should move the resulting files into their own folder
 28 |  ##
 29 |  ##
 30 | 
 31 | 
 32 | #####
 33 | ## ##  SETUP
 34 | #####
 35 | 
 36 | 	# the folders we wish to read merra data from
 37 | 	merraBase = 'W:/MERRA_WIND/'
 38 | 
 39 | 	# the folders we wish to write profiles to
 40 | 	outputBase = 'W:/MERRA_PROFILES/'
 41 | 
 42 | 	# our blank NetCDF template file to be filled
 43 | 	templateFile = 'M:/WORK/Wind Modelling/VWF CODE/TOOLS/MERRA.log.law.template.A.z.nc'
 44 | 
 45 | 	# path to the VWF model
 46 | 	VWFMODEL = 'Q:/VWF/lib/VWF.R'
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | #####
 53 | ## ##  READ IN DATA
 54 | #####
 55 | 
 56 | 	# load the VWF model
 57 | 	source(VWFMODEL)
 58 | 
 59 | 	# find and prepare all our merra files
 60 | 	merra = prepare_merra_files(merraFolder)
 61 | 
 62 | 	# prepare a NetCDF file handler so our format is known
 63 | 	f = merra$files[1]
 64 | 	nc = NetCdfClass(f, 'MERRA1', TRUE)
 65 | 
 66 | 	# subset if necessary
 67 | 	if (exists('region'))
 68 | 		nc$subset_coords(region)
 69 | 
 70 | 	# close this input file
 71 | 	nc$close_file()
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | #####
 80 | ## ##  PREPARE OUR CLUSTER
 81 | #####
 82 | 
 83 | 	# build a parallel cluster
 84 | 	# note that it doesn't make sense going much beyond 8 cores
 85 | 	library(doParallel)
 86 | 	cl = makeCluster(8)
 87 | 	registerDoParallel(cl)
 88 | 
 89 | 	# provide the extrapoalte function to each core
 90 | 	clusterExport(cl, varlist=c('extrapolate_log_law'))
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | #####
 99 | ## ##  RUN
100 | #####
101 | 
102 | 	# run through each input merra file
103 | 	for (f in merra$files)
104 | 	{
105 | 		# decide the filename for this output data
106 | 		o = gsub('prod.assim.tavg1_2d_slv', 'wind_profile', f)
107 | 		o = gsub('tavg1_2d_slv_Nx', 'wind_profile', f)
108 | 		o = gsub(merraBase, outputBase, o)
109 | 
110 | 		# skip if this already exists
111 | 		clear_line()
112 | 		if (file.exists(o))
113 | 		{
114 | 			clear_line("Skipping", o)
115 | 			next
116 | 		}
117 | 
118 | 
119 | 
120 | 		# get this file's time attributes
121 | 		nc$open_file(f)
122 | 		myTime = ncatt_get(nc$ncdf, "time", "units")$value
123 | 		nc$close_file()
124 | 
125 | 
126 | 
127 | 		# do the extrapolation, getting the A,z parameters
128 | 		profile = extrapolate_ncdf(f)
129 | 
130 | 		# safety checks
131 | 		err = which(profile$z > 100)
132 | 		profile$z[err] = 100
133 | 
134 | 		err = which(profile$z < 10^-10)
135 |  		profile$z[err] = 10^-10
136 | 
137 | 		err = which(profile$A < 0)
138 | 		profile$A[err] = 0
139 | 
140 | 
141 | 
142 | 
143 | 		# create the NetCDF file for this month
144 | 		file.copy(templateFile, o)
145 | 
146 | 		# open this file for writing
147 | 		ncout = nc_open(o, write=TRUE)
148 | 
149 | 		# set its time attributes
150 | 		ncatt_put(ncout, "time", "units", myTime)
151 | 
152 | 		# put our data in
153 | 		for (var in names(profile))
154 | 		{
155 | 			ncvar_put(ncout, var, profile[[var]])
156 | 		}
157 | 
158 | 		# save and close
159 | 		nc_close(ncout)
160 | 		cat("Written", o, "\n")
161 | 	}
162 | 
163 | 
164 | 
165 | 	cat("\n\n\nFLAWLESS!\n\n")
166 | 	


--------------------------------------------------------------------------------
/1.COMPILE.WIND.PROFILE (merra2).R:
--------------------------------------------------------------------------------
  1 |  ##
  2 | #####
  3 | ## ##################################################################################################
  4 | #####
  5 |  ##
  6 |  ##  this requires us to have a template NetCDF file with the right variables available
  7 |  ##  can't do that within R easily, so need to download NCO from http://nco.sourceforge.net/
  8 |  ##  save an single MERRA SLV file as MERRA_IN.nc and run the following commands:
  9 |  ##
 10 |  #### MERRA 2
 11 |   # ncks -x -v CLDPRS,CLDTMP,DISPH,H1000,H250,H500,H850,OMEGA500,PBLTOP,PS,Q250,Q500,Q850,QV10M,QV2M,SLP,T10M,T250,T2M,T2MDEW,T2MWET,T500,T850,TO3,TOX,TQI,TQL,TQV,TROPPB,TROPPT,TROPPV,TROPQ,TROPT,TS,U10M,U250,U500,U50M,U850,V10M,V250,V500,V50M,V850,ZLCL MERRA_IN.nc4 MERRA_OUT.nc4
 12 |   # ncrename -h -O -v U2M,A MERRA_OUT.nc4
 13 |   # ncrename -h -O -v V2M,z MERRA_OUT.nc4
 14 |   # ncatted -a long_name,A,o,c,"Scale factor for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc4
 15 |   # ncatted -a long_name,z,o,c,"Ref height for log-law extrapolation: W = A log(h / z)" MERRA_OUT.nc4
 16 |   #
 17 |  ##
 18 |  ##
 19 | #####
 20 | ## ##################################################################################################
 21 | #####
 22 |  ##
 23 |  ##  this runs through all merra files in the specified folders  
 24 |  ##  reads the wind speed data and calculates the extrapolation parameters
 25 |  ##  then saves the A and z values to NetCDF files
 26 |  ##
 27 |  ##  after running, you should move the resulting files into their own folder
 28 |  ##
 29 |  ##
 30 | 
 31 | 
 32 | #####
 33 | ## ##  SETUP
 34 | #####
 35 | 
 36 | 
 37 | 	# the folders we wish to read data from
 38 | 	merraFolder = 'X:/MERRA2_WIND/'
 39 | 
 40 | 	# the folders we wish to write to
 41 | 	outputFolder = 'X:/MERRA2_PROFILES/'
 42 | 
 43 | 	# our blank NetCDF template file to be filled
 44 | 	templateFile = 'M:/WORK/Wind Modelling/VWF CODE/TOOLS/MERRA2.log.law.template.A.z.nc4'
 45 | 
 46 | 	# path to the VWF model
 47 | 	VWFMODEL = 'Q:/VWF/lib/VWF.R'
 48 | 
 49 | 
 50 | 
 51 | #####
 52 | ## ##  READ IN DATA
 53 | #####
 54 | 
 55 | 	# load the VWF model
 56 | 	source(VWFMODEL)
 57 | 
 58 | 	# find and prepare all our merra files
 59 | 	merra = prepare_merra_files(merraFolder)
 60 | 
 61 | 	# prepare a NetCDF file handler so our format is known
 62 | 	f = merra$files[1]
 63 | 	nc = NetCdfClass(f, 'MERRA2', TRUE)
 64 | 
 65 | 	# subset if necessary
 66 | 	if (exists('region'))
 67 | 		nc$subset_coords(region)
 68 | 
 69 | 	# close this input file
 70 | 	nc$close_file()
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | #####
 79 | ## ##  PREPARE OUR CLUSTER
 80 | #####
 81 | 
 82 | 	# build a parallel cluster
 83 | 	# note that it doesn't make sense going much beyond 8 cores
 84 | 	library(doParallel)
 85 | 	cl = makeCluster(8)
 86 | 	registerDoParallel(cl)
 87 | 
 88 | 	# provide the extrapoalte function to each core
 89 | 	clusterExport(cl, varlist=c('extrapolate_log_law'))
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | #####
 98 | ## ##  RUN
 99 | #####
100 | 
101 | 	# run through each input merra file
102 | 	for (f in merra$files)
103 | 	{
104 | 
105 | 		# get this file's time attributes
106 | 		nc$open_file(f)
107 | 		myTime = ncatt_get(nc$ncdf, "time", "units")$value
108 | 		nc$close_file()
109 | 
110 | 
111 | 
112 | 		# do the extrapolation, getting the A,z parameters
113 | 		profile = extrapolate_ncdf(f)
114 | 
115 | 		# safety checks
116 | 		err = which(profile$z > 100)
117 | 		profile$z[err] = 100
118 | 
119 | 		err = which(profile$z < 10^-10)
120 |  		profile$z[err] = 10^-10
121 | 
122 | 		err = which(profile$A < 0)
123 | 		profile$A[err] = 0
124 | 
125 | 
126 | 	
127 | 
128 | 		# build a filename for this output data
129 | 		o = gsub('prod.assim.tavg1_2d_slv', 'wind_profile', f)
130 | 		o = gsub('tavg1_2d_slv_Nx', 'wind_profile', f)
131 | 		o = gsub(merraFolder, outputFolder, o)
132 | 
133 | 		# create the NetCDF file for this month
134 | 		file.copy(templateFile, o)
135 | 
136 | 		# open this file for writing
137 | 		ncout = nc_open(o, write=TRUE)
138 | 
139 | 		# set its time attributes
140 | 		ncatt_put(ncout, "time", "units", myTime)
141 | 
142 | 		# put our data in
143 | 		for (var in names(profile))
144 | 		{
145 | 			ncvar_put(ncout, var, profile[[var]])
146 | 		}
147 | 
148 | 		# save and close
149 | 		nc_close(ncout)
150 | 		cat("Written", o, "\n")
151 | 	}
152 | 
153 | 
154 | 
155 | 	cat("\n\n\nFLAWLESS!\n\n")
156 | 	


--------------------------------------------------------------------------------
/2.CALCULATE.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##########################
 4 | ##                      ##
 5 | ##   GENERAL OPTIONS    ##
 6 | ##                      ##
 7 | ##########################
 8 | 
 9 | 
10 | 	# our wind farms file with coordinates, turbine models, etc.
11 | 	farmOptionsFile = 'Q:/VWF/data/INPUT_CH.R'
12 | 
13 | 	# our base options file with model parameters, etc.
14 | 	baseOptionsFile = 'Q:/VWF/data/BASE.OPTIONS.R'
15 | 
16 | 
17 | 
18 | 
19 | ##########################
20 | ##                      ##
21 | ##       PREPARE        ##
22 | ##                      ##
23 | ##########################
24 | 
25 | 
26 | 	# load the VWF model and required libraries
27 | 	#
28 | 	source('Q:/VWF/lib/VWF.R')
29 | 
30 | 	# prepare the VWF model with your parameters
31 | 	#   check ability to save output files
32 | 	#   read in wind farms and power curves
33 | 	#   prep the merra data for reading in
34 | 	#   launch multi-core cluster
35 | 	#   establish spatial interpolation and bias correction code
36 | 	#
37 | 	source(baseOptionsFile)
38 | 	source('Q:/VWF/lib/VWF.MAIN.PREP.R')
39 | 
40 | 	# key things you now have:
41 | 	#   windFarms  - dataframe containing our wind farms
42 | 	#   farmCurve  - dataframe containing our power curves
43 | 	#   merra_wind - object holding our merra filenames
44 | 	#   nc_wind    - object for handling netcdf files
45 | 	#   cl         - multicore cluster object
46 | 
47 | 
48 | 
49 | 
50 | ##########################
51 | ##                      ##
52 | ##    GET WIND SPEED    ##
53 | ##                      ##
54 | ##########################
55 | 
56 | 	# read in pre-calculated speeds if we have them, otherwise go and interpolate & extrapolate them
57 | 	# this is a very slow stage - go fire up 3.PERFORMANCE.MONITOR.R to see how it's going..
58 | 	source('Q:/VWF/lib/VWF.MAIN.WINDSPEED.R')
59 | 
60 | 	# key things you now have:
61 | 	#   windSpeed - dataframe containing hourly wind speeds for each farm
62 | 	#               this is also saved into baseSaveFolder
63 | 
64 | 
65 | 
66 | ##########################
67 | ##                      ##
68 | ##    GET WIND POWER    ##
69 | ##                      ##
70 | ##########################
71 | 
72 | 	source('Q:/VWF/lib/VWF.MAIN.WINDPOWER.R')
73 | 
74 | 	# key things you now have:
75 | 	#   windSpeed  - now modified for bias correction
76 | 	#   loadFactor - dataframe containing the hourly capacity factors for each farm
77 | 	#   powerMW    - dataframe containing the hourly power output for each farm
78 | 	#   parms      - dataframe containing the parameters used for each farm
79 | 
80 | 
81 | 
82 | ##########################
83 | ##                      ##
84 | ##     SAVE RESULTS     ##
85 | ##                      ##
86 | ##########################
87 | 
88 | 	source('Q:/VWF/lib/VWF.MAIN.RESULTS.R')
89 | 	flush('\n\nFLAWLESS\n\n')
90 | 


--------------------------------------------------------------------------------
/3.PROGRESS.MONITOR.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 	# the options files being used in your VWF calculation
 3 | 	farmOptionsFile = 'Q:/VWF/data/INPUT_CH.R'
 4 | 	baseOptionsFile = 'Q:/VWF/data/BASE.OPTIONS.R'
 5 | 
 6 | 
 7 | 	# update frequency in seconds
 8 | 	update_time = 4
 9 | 
10 | 	# establish the progress monitor
11 | 	# this will continually update until you exit this R instance
12 | 	source(baseOptionsFile)
13 | 	source('Q:/VWF/lib/VWF.MAIN.PROGRESS.R')
14 | 


--------------------------------------------------------------------------------
/CITATION:
--------------------------------------------------------------------------------
 1 | To reference VWF in publications, please cite the following paper:
 2 | 
 3 | I Staffell and S Pfenninger, 2016.  Using bias-corrected reanalysis to simulate current and future wind power output.  Energy, 114, 1224–1239.  https://dx.doi.org/10.1016/j.energy.2016.08.068
 4 | 
 5 | 
 6 | 
 7 | To reference the Renewables.ninja model (i.e. if you use the renewables.ninja website) please cite both of the following papers:
 8 | 
 9 | S Pfenninger and I Staffell, 2016. Long-term patterns of European PV output using 30 years of validated hourly reanalysis and satellite data. Energy, 114, 1251-1265.  https://dx.doi.org/10.1016/j.energy.2016.08.060
10 | 
11 | I Staffell and S Pfenninger, 2016.  Using bias-corrected reanalysis to simulate current and future wind power output.  Energy, 114, 1224–1239.  https://dx.doi.org/10.1016/j.energy.2016.08.068)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | Copyright (c) 2012-2017, Iain Staffell
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | * Redistributions of source code must retain the above copyright notice, this
 9 |   list of conditions and the following disclaimer.
10 | 
11 | * Redistributions in binary form must reproduce the above copyright notice,
12 |   this list of conditions and the following disclaimer in the documentation
13 |   and/or other materials provided with the distribution.
14 | 
15 | * Neither the name of the copyright holder nor the names of its
16 |   contributors may be used to endorse or promote products derived from
17 |   this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The Virtual Wind Farm (VWF) model
 2 | 
 3 | `vwf` is an R project to simulate the power output from wind farms based on NASA weather data.  The wind energy simulations on [Renewables.ninja](https://www.renewables.ninja/) are based on the VWF model.
 4 | 
 5 | 
 6 | 
 7 | ## REQUIREMENTS
 8 | 
 9 | An unsound mind and plenty of alcohol.  
10 | 
11 | [R](https://www.r-project.org/) or [MRO](https://mran.revolutionanalytics.com/open/) version 3+
12 | 
13 | Required libraries:
14 |  * ncdf4
15 |  * lubridate
16 |  * rworldmap
17 |  * doParallel
18 |  * data.table
19 |  * akima
20 |  * MASS
21 |  * fields
22 | 
23 | 
24 | 
25 | ## SETUP
26 | 
27 | ### Download wind speed data
28 | First, download the necessary input data from NASA's [MERRA-2 reanalysis](https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/), specifically the [SLV tables](http://dx.doi.org/10.5067/VJAFPLI1CSIV) (M2T1NXSLV).
29 | 
30 | The easiest (and most wasteful) option is to use the 'Online archive' link and download the complete files (approx 400 MB per day).  Alternatively you could use the various subsetting tools available, selecting the DISPH, U2M, V2M, U10M, V10M, U50M and V50M variables, and whatever region and time period you are interested in.  Good luck, they rarely function correctly!
31 | 
32 | ### Set up the VWF model
33 | With the required libraries installed, the VWF code should hopefully just require you to edit the paths to the include files and your data files.  These are dotted all over the place, so you're best to search for ":/" in all the files to check you have found all paths.
34 | 
35 | ### Process wind speed data
36 | Performing the wind speed extrapolation (working out wind speed at the specific height of your turbine) is very computationally expensive, so the VWF calculation engine uses pre-compiled wind speed profile data, in the form of the `A` and `z` parameters of the logarithmic profile law.
37 | 
38 | First, you need to run one of the `1.COMPILE.WIND.PROFILE.R` files, editing the paths to your input, template and output files.  On a standard desktop, this takes around a week per year of input data.  I said it was computationally expensive.  Alternatively, perhaps we could collaborate on some fun research together?  
39 | 
40 | This process should yield a set of NetCDF files, one per day, containing the wind profile parameters for each location and hour of the day.  These are about 30 MB each, so around 10 GB per year. 
41 | 
42 | 
43 | 
44 | ## USAGE INSTRUCTIONS
45 | 
46 | First, run either `1.COMPILE.WIND.PROFILE (merra1).R` or `1.COMPILE.WIND.PROFILE (merra2).R` to generate your wind speed profiles, as detailed above.
47 | 
48 | Second, create a simulation input file describing the wind farms you wish to simulate.  An example covering 39 wind farms in Switzerland is provided in `data\INPUT_CH.CSV` and `data\INPUT_CH.R`
49 | 
50 | Third, decide on the model parameters you wish to use.  There are various nobs and switches to tweak, with a set of 'safe' defaults listed in `data\BASE.OPTIONS.R`.  Some of these alter the calculation engine (such as how much smoothing to apply to power curves, or how to do the bias correction of wind speeds).  Some control what output you receive (do you want to save corrected wind speeds, monthly average farm output, output aggregated by turbine model, by country, etc.).  Hopefully there are enough comments in that file to help you along.
51 | 
52 | Finally, setup the model to run.  Change the path names to your specific options files in `2.CALCULATE.R`, and off you go.  You may find this is also quite slow to run (a few hours to simulate 100 farms over 30 years perhaps).  As it runs on multiple cores in parallel it isn't easy to provide a progress indicator in the main code (at least, I don't know how to).  So, you can optionally run `3.PROGRESS.MONITOR.R` in another R session, it will paint pretty pictures showing how much you have simulated...
53 | 
54 | 
55 | ## LICENSE
56 | BSD 3-Clause License
57 | Copyright (C) 2012-2017  Iain Staffell
58 | All rights reserved.
59 | 
60 | This excludes all contents of the `power_curves` folder which remains the property of the respective copyright holders.
61 | 
62 | See `LICENSE` for more detail
63 | 
64 | 
65 | ## CREDITS & CONTACT
66 | 
67 | The VWF code is developed by Iain Staffell.  You can try emailing me at i.staffell@imperial.ac.uk
68 | 
69 | VWF is part of the [Renewables.ninja](https://renewables.ninja) project, developed by Stefan Pfenninger and Iain Staffell.  Use the [contacts page](https://www.renewables.ninja/about) there.
70 | 
71 | ## Citation
72 | 
73 | I Staffell and S Pfenninger, 2016.  Using bias-corrected reanalysis to simulate current and future wind power output.  *Energy*, 114, 1224–1239. [doi: 10.1016/j.energy.2016.08.068](https://dx.doi.org/10.1016/j.energy.2016.08.068)
74 | 


--------------------------------------------------------------------------------
/data/BASE.OPTIONS.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # some tips..
  3 | # save your results to a different physical drive to the one you read merra data from
  4 | # don't assume more cores = more speed... 16 cores seems ok, beyond that split your job up
  5 | 
  6 | 
  7 | 	## MOST COMMON OPTIONS
  8 | 
  9 | 	# where to save everything - filenames are automatically generated
 10 | 	baseSaveFolder = 'L:/NINJA_GLOBAL/US_STATES/'
 11 | 	baseSaveFile = get_text_before(basename(farmOptionsFile), '.', last=TRUE) %&% '.'
 12 | 
 13 | 	# what format to save as: csv rds or rdata
 14 | 	xtn = 'rds'
 15 | 
 16 | 	# which years to calculate
 17 | 	yearRange = 1980:2016
 18 | 
 19 | 	# set how many cores you wish to use 
 20 | 	n.cores = 10
 21 | 
 22 | 
 23 | 
 24 | 
 25 | 
 26 | 	##  MERRA EXTRAPOLATED WIND DATA
 27 | 
 28 | 	# do you want to use custom pre-compiled windspeeds?
 29 | 	# set this to NULL to use the default speeds from a previous model run, or to calculate new ones 
 30 | 	windSpeedFile = NULL
 31 | 
 32 | 	# the folders we wish to read data from
 33 | 	merraFolders = 'X:/MERRA2_PROFILES/' %&% yearRange %&% '/'
 34 | 
 35 | 	# process most recent data first?
 36 | 	processNewestFirst = FALSE
 37 | 
 38 | 	# which model are we using 'MERRA1' or 'MERRA2'
 39 | 	reanalysis = 'MERRA2'
 40 | 
 41 | 	# how to group files ('monthly' or 'quarterly')
 42 | 	merra_grouping = 'quarterly'
 43 | 
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 	##  POWER CONVERSION PARAMETERS
 50 | 
 51 | 	# the parameters we use to determine the scalar and offset for each farm
 52 | 	# scalar = (scalar_alpha * PR) + scalar_beta
 53 | 	scalar_alpha = 1/2
 54 | 	scalar_beta = 1/3
 55 | 
 56 | 	# specify the parameters for multi-turbine farm curve
 57 | 	convolverStdDev = 0.20			# must be > 0
 58 | 	convolverSpeedSmooth = 0.20		# must be 0.00, 0.10, 0.20, 0.30, 0.40
 59 | 
 60 | 	# the files our power curves live in (raw turbine curves and smoothed farm curves)
 61 | 	turbCurveFile = 'Q:/VWF/power_curves/Wind Turbine Power Curves ~ 5 (0.01ms with 0.00 w smoother).csv')
 62 | 	farmCurveFile = 'Q:/VWF/power_curves/Wind Turbine Power Curves ~ 5 (0.01ms with ' %&% sprintf("%.2f", convolverSpeedSmooth) %&% ' w smoother).csv')
 63 | 
 64 | 	# should we inflate offshore farm PR by 1.16 (i.e. you haven't done it in your farms file, or you have separately calibrated onshore/offshore)
 65 | 	inflate_offshore_pr = TRUE
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 	##  HOW TO TRANSFORM WIND SPEEDS
 72 | 
 73 | 	# do we require speeds interpolated to half-hourly?
 74 | 	halfHourly = FALSE
 75 | 
 76 | 	# if so, inject noise into the half-hourly wind speeds to represent the distribution of power swings more closely
 77 | 	# stdev of normal distribution that things are multiplied by
 78 | 	noise_stdev = 0.04
 79 | 
 80 | 	# do you want to factor air density into the calculations?
 81 | 	# if so airDensityFile must be specified
 82 | 	doAirDensity = FALSE
 83 | 	airDensityFile = NULL
 84 | 
 85 | 	# should we transform unfathomably low wind speeds?
 86 | 	# i.e. sites with a long-run average < 4 m/s ... just wouldn't have a wind farm!
 87 | 	doTransformLowestSpeeds = TRUE
 88 | 
 89 | 
 90 | 
 91 | 	##  RESULTS FILES
 92 | 
 93 | 	# should the original wind speeds for each farm be saved?
 94 | 	# this will let you skip the first (slow) part of processing if you want to rerun
 95 | 	save_original_wind_speeds = TRUE
 96 | 
 97 | 	# should the modified wind speeds for each farm be saved?
 98 | 	# these are the 'bias corrected' speeds which give the desired energy output
 99 | 	save_modified_wind_speeds = FALSE
100 | 
101 | 	# should the hourly power output / capacity factor for each farm be saved?
102 | 	# note, this file will be similar in size to the wind speeds (i.e. big)
103 | 	save_hourly_farm_mw = FALSE
104 | 	save_hourly_farm_cf = FALSE
105 | 
106 | 
107 | 	# should MW/CF be aggregated across farms?
108 | 	# use '*' to aggregate all farms together (MW is the sum of all, CF is the capacity-weighted average)
109 | 	# use other columns names from windFarms to split into groups
110 | 	# use '' to do no aggregation at all
111 | 	#
112 | 	# e.g. c('iso', 'offshore') will give one file with columns for each country, one file with all onshore and offshore seperated
113 | 	#      if you'd like onshore/offshore seperated *in* each country, create a new column in windFarms (windFarms$iso_off = windFarms$iso %&% ifelse(tolower(windfarms$offshore) == 'no', 'ON', 'OFF'))
114 | 	#
115 | 	# '*' is quick... others are slow as they also create the 'evolving' total (where farms don't produce before they were born) for validation purposes
116 | 	#
117 | 	save_files_split_by = c('*')
118 | 
119 | 
120 | 
121 | 
122 | 	##  GLORIOUS GUI TYPE STUFF
123 | 
124 | 	# do you want a graphical / interactive experience (at the expense of speed)
125 | 	lots_of_plots = FALSE
126 | 
127 | 	# do you any plots at all (at the expense of R claiming the foreground when redrawing)
128 | 	a_few_plots = FALSE
129 | 


--------------------------------------------------------------------------------
/data/INPUT_CH.CSV:
--------------------------------------------------------------------------------
 1 | id,country,name,lon,lat,offshore,height,capacity,power_curve,PR
 2 | CH1,CH,Feldmoos,8.06,46.99,no,60,0.9,NEG.Micon.NM52.900,0.983
 3 | CH2,CH,Gatsch,8.594,46.647,no,45,0.6,Enercon.E40.600,0.983
 4 | CH3,CH,Collonges,7.03,46.17,no,100,2,Enercon.E70.2000,0.983
 5 | CH4,CH,Gatsch,8.594,46.647,no,55,0.9,Enercon.E44.900,0.983
 6 | CH5,CH,Oberer Grenchenberg,7.4,47.23,no,30,0.15,Bonus.B23.150,0.983
 7 | CH6,CH,Martigny,7.03,46.17,no,100,2,Enercon.E82.2000,0.983
 8 | CH7,CH,Saint-Brais,7.113,47.305,no,80,4,Enercon.E82.2000,0.983
 9 | CH8,CH,Calandawind,9.526,46.879,no,120,3,Vestas.V112.3000,0.983
10 | CH9,CH,Feldmoos,8.06,46.99,no,50,0.95,NEG.Micon.NM52.900,0.983
11 | CH10,CH,Mount Crosin,7.017,47.18,no,95,16,Vestas.V90.2000,0.983
12 | CH11,CH,Peuchapatte,6.978,47.246,no,110,6.9,Enercon.E82.2300,0.983
13 | CH12,CH,Gatsch,8.594,46.647,no,55,1.8,Enercon.E44.900,0.983
14 | CH13,CH,Gries,8.374,46.463,no,85,2.3,Enercon.E70.2300,0.983
15 | CH14,CH,Lutersarni,8.108,46.992,no,80,2.3,Enercon.E82.2300,0.983
16 | CH15,CH,Charrat,7.145,46.131,no,100,3,Enercon.E101.3000,0.983
17 | CH16,CH,Mount Crosin,7.013,47.177,no,95,8,Vestas.V90.2000,0.983
18 | CH17,CH,Mount Crosin,7.06,47.198,no,100,6.6,Vestas.V112.3000,0.983
19 | CH18,CH,Mount Crosin,6.983,47.161,no,110,6.6,Vestas.V112.3000,0.983
20 | CH19,CH,Gries,8.374,46.463,no,115,7.05,Enercon.E92.2300,0.983
21 | CH20,CH,Extra 1,7.954166667,47.20416667,no,100,0.419,Enercon.E101.3000,0.983
22 | CH21,CH,Extra 2,10.3375,46.72916667,no,80,0.419,Enercon.E66.2000,0.983
23 | CH22,CH,Extra 3,10.2625,46.90416667,no,105,0.419,Enercon.E101.3000,0.983
24 | CH23,CH,Extra 4,9.354166667,47.0375,no,60,0.419,Enercon.E82.2000,0.983
25 | CH24,CH,Extra 5,8.770833333,46.90416667,no,70,0.419,REPower.MD77.1500,0.983
26 | CH25,CH,Extra 6,8.9125,47.44583333,no,30,0.419,Vestas.V112.3000,0.983
27 | CH26,CH,Extra 7,9.1875,46.37083333,no,55,0.419,Enercon.E101.3000,0.983
28 | CH27,CH,Extra 8,9.279166667,47.57083333,no,80,0.419,Enercon.E82.2000,0.983
29 | CH28,CH,Extra 9,6.120833333,46.30416667,no,60,0.419,Vestas.V112.3000,0.983
30 | CH29,CH,Extra 10,9.520833333,46.67916667,no,30,0.419,Enercon.E44.900,0.983
31 | CH30,CH,Extra 11,8.354166667,47.5375,no,80,0.419,Nordtank.NTK500,0.983
32 | CH31,CH,Extra 12,8.3625,46.8125,no,100,0.419,Enercon.E101.3000,0.983
33 | CH32,CH,Extra 13,8.1875,46.87083333,no,100,0.419,Enercon.E82.2000,0.983
34 | CH33,CH,Extra 14,8.345833333,46.72916667,no,100,0.419,Enercon.E82.2300,0.983
35 | CH34,CH,Extra 15,8.304166667,46.7375,no,80,0.419,Gamesa.G47.660,0.983
36 | CH35,CH,Extra 16,8.929166667,46.1625,no,120,0.419,Enercon.E70.2300,0.983
37 | CH36,CH,Extra 17,9.720833333,46.57916667,no,55,0.419,Enercon.E70.2300,0.983
38 | CH37,CH,Extra 18,8.754166667,47.29583333,no,75,0.419,Enercon.E40.600,0.983
39 | CH38,CH,Extra 19,8.970833333,47.4625,no,85,0.419,Siemens.SWT.3.0.101,0.983
40 | CH39,CH,Extra 20,7.345833333,46.1625,no,120,0.419,Enercon.E92.2300,0.983
41 | 


--------------------------------------------------------------------------------
/data/INPUT_CH.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/renewables-ninja/vwf/5ce182d09f9d50c10be03cb7ebdc0bf41e18d86e/data/INPUT_CH.PNG


--------------------------------------------------------------------------------
/data/INPUT_CH.R:
--------------------------------------------------------------------------------
1 | 
2 | 	# our wind farms file with coordinates, turbine models, etc.
3 | 	windFarmFile = 'Q:/VWF/data/INPUT_CH.CSV'
4 | 
5 | 	# the simplified column names used in these files (must be the same across all files)
6 | 	windFarmCols = c('id', 'iso', 'name', 'lon', 'lat', 'offshore', 'height', 'count', 'capacity', 'power_curve', 'PR')
7 | 


--------------------------------------------------------------------------------
/lib/VWF.EXTRAPOLATE.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 
 12 |  ########################################################################################################################
 13 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
 14 |  ########################################################################################################################
 15 | 
 16 | #####
 17 | ## ##  WIND PROFILE EXTRAPOLATION ==== VERSION FOR PRE-COMPILING
 18 | #####
 19 | 
 20 | 
 21 | # calculate the wind extrapolation parameters for a given data point (one location and height)
 22 | # takes four parameters: wind speed at 2, 10, 50 metres, and the displacement height
 23 | # returns two parameters: the scale factor (A) and reference height (z)
 24 | # wind speed at any height can be calculated from: v = A * log(h / z)
 25 | extrapolate_log_law = function(w2m, w10m, w50m, dh)
 26 | {
 27 | 	# assemble our three heights and wind speeds
 28 | 	h = c(2+dh, 10+dh, 50)
 29 | 	v = c(w2m, w10m, w50m)
 30 | 
 31 | 	# linearise and perform a ls fit
 32 | 	# weight the data at 50m more strongly
 33 | 	logh = log(h)
 34 | 	fit = lm(v ~ logh, weights=c(1,1,2))
 35 | 
 36 | 	# extract our coefficients
 37 | 	# v = A log(h) - A log(z) therefore slope = A, exp(-intercept / A) = z
 38 | 	A = coef(fit)[[2]]
 39 | 	z = exp(-coef(fit)[[1]] / A)
 40 | 
 41 | 	return( c(A, z) )
 42 | }
 43 | 
 44 | 
 45 | 
 46 | # calculate the wind extrapolation parameters for a given data point (one location and height)
 47 | # takes four parameters: wind speed at 2, 10, 50 metres, and the displacement height
 48 | # returns two parameters: the scale factor (epsilon) and shear coefficient (alpha)
 49 | # wind speed at any height can be calculated from: v = epsilon * h ^ alpha
 50 | extrapolate_power_law = function(w2m, w10m, w50m, dh)
 51 | {
 52 | 	# assemble our three heights and wind speeds
 53 | 	h = c(2+dh, 10+dh, 50)
 54 | 	v = c(w2m, w10m, w50m)
 55 | 
 56 | 	# linearise and perform a ls fit
 57 | 	# weight the data at 50m more strongly
 58 | 	logh = log(h)
 59 | 	logv = log(v)
 60 | 
 61 | 	fit = lm(logv ~ logh, weights=c(1,1,2))
 62 | 
 63 | 	# extract our coefficients
 64 | 	# v2 / v1 =  (h2 / h1) ^ alpha, therefore v2 = epsilon * h ^ alpha
 65 | 	epsilon = exp(coef(fit)[[1]])
 66 | 	alpha = coef(fit)[[2]]
 67 | 
 68 | 	return( c(epsilon, alpha) )
 69 | }
 70 | 
 71 | 
 72 | 
 73 | # wrapper function to calculate the extrapolation parameters for all data points
 74 | # takes the filename to be processed
 75 | # returns a list of arrays, one containing each parameter returned by the chosen extrapolation function
 76 | #
 77 | # note - if you want to change which extrapolation function is used, change the mapply() statement, and the last segment of code (extracting & returning parameters)
 78 | #
 79 | extrapolate_ncdf = function(nc.filename)
 80 | {
 81 | 
 82 | 	#####
 83 | 	## ##  READ THE NECESSARY NETCDF DATA
 84 | 	#####
 85 | 
 86 | 	# open the file
 87 | 	nc$open_file(nc.filename)
 88 | 	flush("Calculating wind shear using the log law -", format(nc$date))
 89 | 
 90 | 	# extract wind speed at 2, 10 and 50 metres - format is speed2m[lon, lat, time]
 91 | 	wind2m = nc$get_speed02m()
 92 | 	wind10m = nc$get_speed10m()
 93 | 	wind50m = nc$get_speed50m()
 94 | 
 95 | 	# extract displacement height
 96 | 	if (nc$model == 'MERRA1') disph = nc$get_var('disph')
 97 | 	if (nc$model == 'MERRA2') disph = nc$get_var('DISPH')
 98 | 
 99 | 	# close this input file
100 | 	nc$close_file()
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 	#####
107 | 	## ##  CALCULATE ALL EXTRAPOLATION PARAMETERS
108 | 	#####
109 | 
110 | 	# create our results storage
111 | 	profile = NULL
112 | 
113 | 	# run through each hour in the data set
114 | 	for (h in 1:length(nc$hour))
115 | 	{
116 | 		flush(sprintf(" %02d:00", h-1))
117 | 
118 | 		# process all locations in parallel
119 | 		wp = foreach (y = 1:length(nc$lat), .combine=cbind) %dopar%
120 | 		{
121 | 			mapply(FUN=extrapolate_log_law, wind2m[ , y, h], wind10m[ , y, h], wind50m[ , y, h], disph[ , y, h])
122 | 		}
123 | 
124 | 		# bind the results together
125 | 		profile = cbind(profile, wp)
126 | 
127 | 		flush("\b\b\b\b\b\b")
128 | 	}
129 | 
130 | 	clear_line()
131 | 
132 | 
133 | 	# extract the parameters from extrapolate_one(), and reshape to match our input arrays
134 | 	A = array(profile[1, ], dim=dim(wind2m), dimnames=dimnames(wind2m))
135 | 	z = array(profile[2, ], dim=dim(wind2m), dimnames=dimnames(wind2m))
136 | 
137 | 	# return as a list
138 | 	list(A=A, z=z)
139 | }
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 |  ########################################################################################################################
149 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
150 |  ########################################################################################################################
151 | 
152 | 
153 | #####
154 | ## ##  WIND PROFILE EXTRAPOLATION ==== OLD VERSION
155 | #####
156 | 
157 | 	#  function that takes our speed and height data and extrapolates the wind speed up to hub height
158 | 	#  uses the log law, deriving the roughness length from the set of observations
159 | 	extrapolateToHubHeight = function(row)
160 | 	{
161 | 		# assemble our data
162 | 		h = as.numeric(c(row["height2m"], row["height10m"], row["height50m"]))
163 | 		v = as.numeric(c(row["speed2m"], row["speed10m"], row["speed50m"]))
164 | 		
165 | 		hx = as.numeric(row["hubHeight"])
166 | 
167 | 
168 | ### FIX ME ###
169 | #
170 | #		## this might be faster, and cleaner
171 | #		h = c(row$height2m, row$height10m, row$height50m)
172 | #		v = c(row$speed2m, row$speed10m, row$speed50m)
173 | #		hx = row$hubHeight
174 | #
175 | #
176 | ### FIX ME ###
177 | 
178 | 
179 | 		# extrapolate to our hub height
180 | 		vx = extrapolateUsingLogLaw(h, v, hx)
181 | 
182 | 		#plot(h, v, xlim=c(0,100), ylim=c(min(v)/1.3, max(v)*1.3))
183 | 		#lines(1:100, A*log(1:100 / z), col="red3")
184 | 		#lines(1:100, epsilon*(1:100)^alpha, col="blue")
185 | 		#points(hx, vx, pch=16, col="purple")
186 | 
187 | 		if (is.infinite(vx) | is.na(vx))
188 | 			vx = 0
189 | 
190 | 		return (vx)
191 | 	}
192 | 
193 | 
194 | 	#  h = list of heights for which we have measurements
195 | 	#  v = list of speeds at those heights
196 | 	# hx = hub height at which you want to know the speed 
197 | 	extrapolateUsingLogLaw = function(h, v, hx)
198 | 	{
199 | 		# linearise and perform a ls fit
200 | 		# weight the data at 50m more strongly
201 | 		log_h = log(h)
202 | 
203 | 		fit = lm(v ~ log_h, weights=c(1,1,2))
204 | 
205 | 		# extract our coefficients
206 | 		# v = A log(h) - A log(z) therefore slope = A, exp(-intercept/ / A) = z
207 | 		A = coef(fit)[[2]]
208 | 		z = exp(-coef(fit)[[1]] / A)
209 | 
210 | 		# extrapolate to the desired height
211 | 		vx = A * log(hx / z)
212 | 
213 | 		return (vx)
214 | 	}
215 | 
216 | 
217 | 
218 | 	#  h = list of heights for which we have measurements
219 | 	#  v = list of speeds at those heights
220 | 	# hx = hub height at which you want to know the speed 
221 | 	extrapolateUsingPowerLaw = function(h, v, hx)
222 | 	{
223 | 		# linearise and perform a ls fit
224 | 		# weight the data at 50m more strongly
225 | 		log_h = log(h)
226 | 		log_v = log(v)
227 | 
228 | 		fit = lm(log_v ~ log_h, weights=c(1,1,2))
229 | 
230 | 		# extract our coefficients
231 | 		# v2 / v1 =  (h2 / h1) ^ alpha, therefore v2 = epsilon * h ^ alpha
232 | 		epsilon = exp(coef(fit)[[1]])
233 | 		alpha = coef(fit)[[2]]
234 | 
235 | 		# extrapolate to the desired height
236 | 		vx = epsilon * hx ^ alpha
237 | 
238 | 		return (vx)
239 | 	}
240 | 
241 | 
242 | 
243 | 


--------------------------------------------------------------------------------
/lib/VWF.EXTRAS.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 
 12 | 	#
 13 | 	#  correct wind speeds based on air density
 14 | 	#
 15 | 	#       w = wind speed at hub height (m/s)
 16 | 	#   rho_0 = air density at ground level (kg/m^3)
 17 | 	#       z = hub height (m)
 18 | 	#
 19 | 	air_density_correction = function(w, rho_0, z)
 20 | 	{
 21 | 		# estimate air density at hub height using the international standard atmosphere
 22 | 		rho = rho_0 - 0.00012*z
 23 | 
 24 | 		# decide the exponent based on the wind speed
 25 | 		# 1/3 up to 8 m/s, then a smooth-step function up to 2/3 above 13 m/s
 26 | 		# this was fitted in matlab from the form x = [0, 1]; y = -2x^3 + 3x^2;
 27 | 		m = -2/375 * w^3  +  21/125 * w^2  -  208/125 * w  +  703/125
 28 | 		m[ w < 8 ]  = 1/3
 29 | 		m[ w > 13 ] = 2/3
 30 | 
 31 | 		# modify the wind speed
 32 | 		w = w * (1.225 / rho)^m
 33 | 		w
 34 | 	}
 35 | 
 36 | 
 37 | 
 38 |  ########################################################################################################################
 39 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
 40 |  ########################################################################################################################
 41 | 
 42 | 
 43 | #####
 44 | ## ##  SPEED TO POWER CONVERSION THINGS
 45 | #####
 46 | 
 47 | 
 48 | 
 49 | 	#
 50 | 	# take a time series of wind speeds and convert to load factors
 51 | 	#
 52 | 	# pass:
 53 | 	#   a vector of wind speeds
 54 | 	#   the power curve of the wind turbine / farm
 55 | 	#   the scale factor and offset for adjusting wind speeds
 56 | 	#
 57 | 	wind_speed_to_power_output = function(windSpeed, powerCurve, myScalar=1.00, myOffset=0.00)
 58 | 	{
 59 | 		# transform the wind speeds
 60 | 		windSpeed = (windSpeed + myOffset) * myScalar
 61 | 		#windSpeed = (windSpeed * myScalar) + myOffset
 62 | 
 63 | 		# convert to a power curve index
 64 | 		# (i.e. which element of powerCurve to access)
 65 | 		index = (100 * windSpeed) + 1
 66 | 
 67 | 		# constrain this to within acceptable limits
 68 | 		index[ index < 1 ] = 1
 69 | 		index[ index > 4000 ] = 4000
 70 | 
 71 | 		return(powerCurve[index])
 72 | 	}
 73 | 
 74 | 
 75 | 	#
 76 | 	# find the fixed offset to add to wind speeds such that the resulting load factor equals energyTarget
 77 | 	#
 78 | 	# pass:
 79 | 	#   speed = the wind speed time series
 80 | 	#   farmCurve = the power curve you want to use for this farm
 81 | 	#   myScalar = the scalar you want to use for this farm
 82 | 	#   energyInitial = the initial load factor coming from unmodified MERRA
 83 | 	#   energyTarget = the desired load factor that we are fitting to
 84 | 	find_farm_offset = function(speed, farmCurve, myScalar, energyInitial, energyTarget, verbose=FALSE)
 85 | 	{
 86 | 		myOffset = 0
 87 | 
 88 | 		# decide our initial search step size
 89 | 		stepSize = -0.64
 90 | 		if (energyTarget > energyInitial)
 91 | 			stepSize = 0.64
 92 | 
 93 | 		repeat
 94 | 		{
 95 | 			# change our offset
 96 | 			myOffset = myOffset + stepSize
 97 | 
 98 | 			# calculate the yield with our farm curve
 99 | 			mylf = wind_speed_to_power_output(speed, farmCurve, myScalar, myOffset)
100 | 			energyGuess = mean(mylf)
101 | 
102 | 			if (verbose) cat("target =", sprintf("%2.2f%%", 100*energyTarget), "~ guess =", sprintf("%2.2f%%", 100*energyGuess), "~ error =", sprintf("%+2.3f%%", 100 * (energyGuess - energyTarget) / energyTarget), "% with offset =", myOffset, "\n")
103 | 
104 | 			# if we have overshot our target, then repeat, searching the other direction
105 | 			# if ((guess < target & sign(step) < 0) | (guess > target & sign(step) > 0))
106 | 			if (sign(energyGuess - energyTarget) == sign(stepSize))
107 | 				stepSize = -stepSize / 2
108 | 
109 | 			# if we have done enough loops then step-size is smaller than our power curve's resolution
110 | 			if (abs(stepSize) < 0.002)
111 | 				break
112 | 
113 | 			# if we have ended up in a very strange place, quit, and spit an error later on
114 | 			if (myOffset < -20 | myOffset > 20)
115 | 				break
116 | 		}
117 | 
118 | 		return( myOffset )
119 | 	}
120 | 
121 | 
122 | 	#
123 | 	# find the scalar to multiply wind speeds by such that the resulting load factor equals energyTarget
124 | 	#
125 | 	# pass:
126 | 	#   speed = the wind speed time series
127 | 	#   farmCurve = the power curve you want to use for this farm
128 | 	#   myOffset = the offset you want to use for this farm
129 | 	#   energyInitial = the initial load factor coming from unmodified MERRA
130 | 	#   energyTarget = the desired load factor that we are fitting to
131 | 	find_farm_scalar = function(speed, farmCurve, myOffset, energyInitial, energyTarget, verbose=FALSE)
132 | 	{
133 | 		myScalar = 1.00
134 | 
135 | 		# decide our initial search step size
136 | 		stepSize = -0.128
137 | 		if (energyTarget > energyInitial)
138 | 			stepSize = 0.128
139 | 
140 | 		repeat
141 | 		{
142 | 			# change our scalar
143 | 			myScalar = myScalar + stepSize
144 | 
145 | 			# calculate the yield with our farm curve
146 | 			mylf = wind_speed_to_power_output(speed, farmCurve, myScalar, myOffset)
147 | 			energyGuess = mean(mylf)
148 | 
149 | 			if (verbose) cat("target =", sprintf("%2.2f%%", 100*energyTarget), "~ guess =", sprintf("%2.2f%%", 100*energyGuess), "~ error =", sprintf("%+2.3f%%", 100 * (energyGuess - energyTarget) / energyTarget), "% with scalar =", myScalar, "\n")
150 | 
151 | 			# if we have overshot our target, then repeat, searching the other direction
152 | 			# if ((guess < target & sign(step) < 0) | (guess > target & sign(step) > 0))
153 | 			if (sign(energyGuess - energyTarget) == sign(stepSize))
154 | 				stepSize = -stepSize / 2
155 | 
156 | 			# if we have done enough loops then step-size is smaller than our power curve's resolution
157 | 			if (abs(stepSize) < 0.0002)
158 | 				break
159 | 
160 | 			# if we have ended up in a very strange place, quit, and spit an error later on
161 | 			if (myScalar <= 0 | myScalar > 5)
162 | 				break
163 | 		}
164 | 
165 | 		return( myScalar )
166 | 	}
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 	#
176 | 	#  do what it says on the tin
177 | 	#
178 | 	plot_power_curve_and_wind_speed = function(speed, myCurve, myOffset, mySpread)
179 | 	{
180 | 		x = seq(0, 40, 0.01)
181 | 		y = myCurve
182 | 		y[y==0] = NA
183 | 
184 | 		# plot the density of wind speeds
185 | 		ds = density(speed)
186 | 		plot(ds$x, 4*ds$y, type='h', col='grey85', xlab="MERRA wind speed (m/s)", ylab="Load factor", lwd=2, main=paste(myName, "=", myModel), xlim=c(0, 30), ylim=c(0, 1.01))
187 | 
188 | 		# the original power curve
189 | 		lines(x, y, lwd=2)
190 | 
191 | 		# our convoluted power curve
192 | 		farmCurve = convoluteFarmCurve(myCurve, myOffset, mySpread)
193 | 		x = x / myScalar
194 | 		lines(x, farmCurve, col="red3")
195 | 	}
196 | 
197 | 
198 | #	plot_power_curve_and_wind_speed_alt = function(speed, myCurve, myOffset, mySpread)
199 | #	{
200 | #		x = seq(0, 40, 0.01)
201 | #		y = myCurve
202 | #		y[y==0] = NA
203 | #
204 | #		# plot the density of wind speeds
205 | #		ds = density(speed)
206 | #		plot(ds$x, 4*ds$y, type='h', col='grey85', xlab="MERRA wind speed (m/s)", ylab="Load factor", lwd=2, main=paste(myName, "=", myModel), xlim=c(0, 30), ylim=c(0, 1.01))
207 | #
208 | #		modspeed = (speed + myOffset) * myScalar
209 | #		ds = density(modspeed)
210 | #		lines(ds$x, 4*ds$y, col='grey45', lwd=3)
211 | #
212 | #		# the original power curve
213 | #		lines(x, y, lwd=2)
214 | #
215 | #		# our convoluted power curve
216 | #		farmCurve = convoluteFarmCurve(myCurve, 0, mySpread)
217 | #		lines(x, farmCurve, col="red3")
218 | #	}
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 |  ########################################################################################################################
226 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
227 |  ########################################################################################################################
228 | 
229 | #####
230 | ## ##  RESHAPE DATA
231 | #####
232 | 
233 | 	#
234 | 	#  function to reshape a wide format matrix (with only row and column numbers as descriptors)
235 | 	#  to long format, optionally specifying the labels to assign to each row and column number.
236 | 	#
237 | 	reshapeData = function(myMatrix, myRowLabels = FALSE, myColLabels = FALSE)
238 | 	{
239 | 		# check how many variables we have
240 | 		nRows = nrow(myMatrix)
241 | 		nCols = ncol(myMatrix)
242 | 
243 | 		if (!identical(myRowLabels, FALSE) & nRows != length(myRowLabels))
244 | 		{
245 | 			cat("reshapeData -- warning: number of rows in myMatrix does not equal the number of myRowVars labels\n");
246 | 			myRowVars = FALSE
247 | 		}
248 | 
249 | 		if (!identical(myColLabels, FALSE) & nCols != length(myColLabels))
250 | 		{
251 | 			cat("reshapeData -- warning: number of columns in myMatrix does not equal the number of myColVars labels\n");
252 | 			myColVars = FALSE
253 | 		}
254 | 
255 | 		# invent row and column labels if you didn't
256 | 		if (identical(myRowLabels, FALSE))
257 | 			myRowLabels = 1:nRows
258 | 		if (identical(myColLabels, FALSE))
259 | 			myColLabels = 1:nCols
260 | 
261 | 
262 | 		# create a long list of all variable combinations
263 | 		allRowsList = rep(1:nRows, times=nCols)  # 1 2 3 1 2 3 1 2 3 1 2 3
264 | 		allColsList = rep(1:nCols, each=nRows)   # A A A B B B C C C D D D
265 | 
266 | 		# create matrices of row and column vars
267 | 		matrixRows = matrix(myRowLabels[allRowsList], nRows, nCols)
268 | 		matrixCols = matrix(myColLabels[allColsList], nRows, nCols)
269 | 
270 | 
271 | 		# create our final long format list
272 | 		return ( data.frame( row=as.vector(matrixRows), col=as.vector(matrixCols), value=as.vector(myMatrix) ) )
273 | 	}
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 |  ########################################################################################################################
286 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
287 |  ########################################################################################################################
288 | 
289 | #####
290 | ## ##  INTERPOLATE AND CONVOLUTE POWER CURVES
291 | #####
292 | 
293 | 	#
294 | 	#  function to turn a low resolution power curve file (e.g. 0..40 m/s in 1 m/s steps)
295 | 	#  into the kind we need, with 0.01 m/s resolution, using cubic spline interpolation
296 | 	#
297 | 	interpolateTable = function(oldData, newX)
298 | 	{
299 | 		# remember our old x index
300 | 		oldX = oldData[ , 1];
301 | 
302 | 		# create a matrix to hold our new data
303 | 		newData = mat.or.vec(length(newX), ncol(oldData));
304 | 		
305 | 		# fill our new x axis
306 | 		newData[ , 1] = newX;
307 | 
308 | 		
309 | 		# run through each column..
310 | 		for (i in 2:ncol(oldData))
311 | 		{
312 | 			# extract the power curve
313 | 			oldY = oldData[ , i];
314 | 			yMax = max(oldY);
315 | 			
316 | 			# create a spline.. but ignore any zeros past the upper cut-off speed
317 | 			# because the spline goes nuts when there's a step change..
318 | 			s = spline(oldX[oldX<10 | oldY>0], oldY[oldX<10 | oldY>0], method="fmm", xout=newX);
319 | 		
320 | 			# recover some dignity
321 | 			# clip numbers at zero and the original max (with 0.1% leeway)
322 | 			s$y[s$y < yMax * 0.001] = 0;
323 | 			s$y[s$y > yMax * 0.999] = yMax;
324 | 		
325 | 			# reinstate the lower cutoff limit
326 | 			cutoff = min(which(oldY > 0));   # finds which array index has the first non-zero speed
327 | 			cutoff = oldX[cutoff] - 0.01;    # finds the speed corresponding to this
328 | 			if (cutoff > 3.5) cutoff = 3.5;  # none too high
329 | 			if (cutoff < 2.5) cutoff = 2.5;  # none too low
330 | 			s$y[s$x < cutoff] = 0;
331 | 			
332 | 			# reinstate the upper cutoff limit
333 | 			cutoff = max(which(oldY > 0));
334 | 			cutoff = oldX[cutoff] + 0.01;
335 | 			s$y[s$x > cutoff] = 0;
336 | 			
337 | 			# and save
338 | 			newData[ , i] = s$y;
339 | 		}
340 | 		
341 | 		# convert to a data frame
342 | 		newData = data.frame(newData)
343 | 		colnames(newData) = colnames(oldData)
344 | 		
345 | 		return (newData);
346 | 
347 | 	}
348 | 
349 | 
350 | 
351 | 	#
352 | 	#  function to turn a 0..40 m/s turbine power curve into an aggregate farm curve convoluted by N(mean, sd)
353 | 	#
354 | 	convoluteFarmCurve = function(myCurve, myMean, mySD)
355 | 	{
356 | 		# define our output resolution
357 | 		resolution = 0.01
358 | 		real_x = seq(0, 40, 0.01)
359 | 
360 | 		# pad our curve sufficiently for the filtering
361 | 		x = seq(-10, 50, 0.01)
362 | 		y = c(rep(0, 1000), myCurve, rep(0, 1000))
363 | 
364 | 		# build a gaussian filter
365 | 		w = 10
366 | 		convolver.x = seq(-w, w, 0.01)
367 | 		convolver.y = dnorm(convolver.x, myMean, mySD)
368 | 		convolver.y = convolver.y / sum(convolver.y)
369 | 
370 | 		smooth_y = filter(y, convolver.y, sides=2)
371 | 
372 | 		x_range = which(x == min(real_x)) : which(x == max(real_x))
373 | 
374 | 		return(smooth_y[x_range])
375 | 	}
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 |  ########################################################################################################################
384 | ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
385 |  ########################################################################################################################
386 | 
387 | 
388 | #####
389 | ## ##  WIND SPEED TEMPORAL INTERPOLATION AND NOISE INJECTION
390 | #####
391 | 	
392 | 	interpolate_wind_speed = function(windSpeed, resolution=30)
393 | 	{
394 | 		## get rid of fools
395 | 		if (resolution > 30)
396 | 		{
397 | 			cat("interpolate_wind_speed: doing nothing because resolution is greater than 30 minutes...\n")
398 | 			return(windSpeed)
399 | 		}
400 | 
401 | 
402 | 		##
403 | 		## build our new storage
404 | 		steps = (60 / resolution)
405 | 		if (steps %% 1 != 0)
406 | 			stop('interpolate_wind_speed: must have an integer number of steps within each hour -- resolution =' %&% resolution %&% 'minutes just nah work!\n')
407 | 
408 | 		ws = windSpeed
409 | 		for (i in 2:steps)
410 | 			ws = rbind(ws, windSpeed)
411 | 
412 | 
413 | 
414 | 		##
415 | 		## generate the new date sequence
416 | 
417 | 		# create originals
418 | 		dates = ymd_hm(windSpeed$Timestamp)
419 | 		index = seq_along(dates) * steps
420 | 		index = index - (steps - 1)
421 | 
422 | 		# start building big versions
423 | 		datesAll = dates
424 | 		indexAll = index
425 | 
426 | 		for (i in 1:(steps-1))
427 | 		{
428 | 			shift = i * resolution
429 | 			datesAll = c(datesAll, dates + minutes(shift))
430 | 			indexAll = c(indexAll, index + i)
431 | 		}
432 | 
433 | 		# interleave them
434 | 		datesAll = datesAll[ order(indexAll) ]
435 | 
436 | 		# generate the sequence
437 | 		ws[ , 1] = datesAll
438 | 
439 | 
440 | 
441 | 		##
442 | 		## do the interpolation
443 | 
444 | 		# original data is on the half-hour
445 | 		x1 = seq(1, nrow(windSpeed)) - 0.5
446 | 
447 | 		# new data will be the average of each half-hour - i.e. on the quarter hour
448 | 		x2 = (seq(1, nrow(ws)) - 0.5) / steps
449 | 
450 | 		# shower down
451 | 		for (i in 2:ncol(windSpeed))
452 | 		{
453 | 			# ignore if all NA
454 | 			if (sum(!is.na(ws[ , i])) == 0) next
455 | 			# spline otherwise			
456 | 			ws[ , i] = spline(x1, windSpeed[ , i], xout=x2)$y
457 | 		}
458 | 
459 | 		return(ws)
460 | 	}
461 | 
462 | 
463 | 	inject_noise_into_wind_speeds = function(windSpeed, stdev)
464 | 	{
465 | 		len = dim(windSpeed)[1] * dim(windSpeed)[2]
466 | 		noise = rnorm(len, 1.000, stdev)
467 | 		
468 | 		windSpeed * noise
469 | 	}
470 | 
471 | 


--------------------------------------------------------------------------------
/lib/VWF.FARMS.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 
 12 | #######################################################################################################################
 13 | ##
 14 | ##  global storage:
 15 | ##    
 16 | ##    farms = data frame of wind farms
 17 | ##            essential columns are name, lon, lat, height, model
 18 | ##
 19 | ##
 20 | ##  reading in wind farm csv data:
 21 | ##
 22 | ##    prepare_windfarms()     - check all your wind farms are valid and prepares them for use
 23 | ##    prepare_farms_files()
 24 | ##
 25 | ##  plotting and locating farms:
 26 | ##
 27 | ##    plot_farms(wf, cap.weight=FALSE, add.points=FALSE, ...)
 28 | ##
 29 | ##    define_farm_region(wf, margin=10)
 30 | ##
 31 | ##    find_nearest_farm(myfarm, wf, cols)
 32 | ##    find_farm_outliers(wf)
 33 | ##    locate_farm_countries(wf)
 34 | ##
 35 | 
 36 | 
 37 | 
 38 | # THIS IS NEXT LEVEL VERSION 5 SHIT
 39 | 
 40 | #
 41 | #  READS IN THE WIND FARMS
 42 | #  CHECKS THEY ARE VALID
 43 | #  RETURNS THEM
 44 | #
 45 | prepare_windfarms = function(windFarmInfo, powerCurveFile = 'E:/WORK/Wind Modelling/~ Wind Turbine Power Curves/R/Wind Turbine Power Curves ~ 5 (0.01ms with 0.00 w smoother).csv')
 46 | {
 47 | 
 48 | 	# read in the .r file that gives the csv file path and the column names
 49 | 	if (!file.exists(windFarmInfo))
 50 | 		stop("prepare_windfarms: Cannot find your wind farms - please specify a different windFarmInfo:\n", windFarmInfo, "\n\n")
 51 | 	
 52 | 	source(windFarmInfo)
 53 | 
 54 | 
 55 | 
 56 | 	# read in details on our wind farms
 57 | 	wf = read_csv(fnchk(windFarmFile))
 58 | 	colnames(wf) = windFarmCols
 59 | 
 60 | 
 61 | 	# check our wind farm files have all the necessary columns
 62 | 	essentialWindFarmCols = c('name', 'lon', 'lat', 'height', 'power_curve')
 63 | 
 64 | 	check = essentialWindFarmCols %in% windFarmCols
 65 | 	if (sum(check) < length(check)) stop("prepare_windfarms: You must have the following columns in your wind farms file:\n", paste(essentialWindFarmCols, collapse=', '), "\n\n")
 66 | 
 67 | 	# and those columns have valid input
 68 | 	for (e in essentialWindFarmCols)
 69 | 	{
 70 | 		bad = is.na(wf[ , e])
 71 | 		if (sum(bad) > 0) stop("    prepare_windfarms: You must have the complete data for " %&% e %&% " in your wind farms file:\n\n")
 72 | 	}
 73 | 
 74 | 
 75 | 	# assign default capacity if none is specified
 76 | 	if (is.null(wf$capacity))
 77 | 	{
 78 | 		cat("    prepare_windfarms: Farm capacity is not specified - setting all farms to 1 MW...\n")
 79 | 		wf$capacity = 1
 80 | 	}
 81 | 
 82 | 	# assign default performance ratio if none is specified
 83 | 	if (is.null(wf$PR))
 84 | 	{
 85 | 		cat("    prepare_windfarms: Farm performance ratio is not specified - setting all farms to 100% (i.e. no bias correction)...\n")
 86 | 		wf$capacity = 1
 87 | 	}
 88 | 
 89 | 	# assign default start date if none is specified
 90 | 	if (is.null(wf$date))
 91 | 	{
 92 | 		cat("    prepare_windfarms: Farm start date is not specified - setting all farms to be newborns...\n")
 93 | 		wf$date = today()
 94 | 	}
 95 | 
 96 | 
 97 | 	# check our date formats -- dmy_hm -> dmy
 98 | 	fix = grep(':', wf$date)
 99 | 	if (length(fix) > 0)
100 | 	{
101 | 		wf$date[fix] = get_text_before(wf$date[fix], ' ')
102 | 	}
103 | 
104 | 	# check our date formats -- ymd -> dmy
105 | 	fix = grep('-', wf$date)
106 | 	if (length(fix) > 0)
107 | 	{
108 | 		d = ymd(wf$date[fix])
109 | 		wf$date[fix] = format(d, '%d/%m/%Y')
110 | 	}
111 | 
112 | 
113 | 
114 | 
115 | 	# the file our power curves live in
116 | 	powerCurveFile = fnchk(powerCurveFile, stopOnError=FALSE)
117 | 	if (!file.exists(powerCurveFile)) stop("    prepare_windfarms: Cannot find your power curves - please specify a different powerCurveFile:\n", powerCurveFile, "\n\n")
118 | 	curveNames = read.csv(powerCurveFile, nrows=1)
119 | 	curveNames = colnames(curveNames)
120 | 
121 | 	## TODO: HACK
122 | 	#repower_hack = (wf$power_curve == 'REpower 3.4M 104')
123 | 	#wf$power_curve[repower_hack] = 'REpower 3.4M104'
124 | 
125 | 
126 | 	err0r = 0
127 | 	for (i in 1:nrow(wf))
128 | 	{
129 | 		myModel = wf$power_curve[i]
130 | 		myModel = make.names(myModel)
131 | 
132 | 		if ( !(myModel %in% curveNames) )
133 | 		{
134 | 			cat ("    prepare_windfarms: Couldn't find a power curve for", myModel, "\n")
135 | 			err0r = 1
136 | 		}
137 | 	}
138 | 	if (err0r)
139 | 	{
140 | 		cat ("prepare_windfarms: valid names are:\n")
141 | 		print(curveNames)
142 | 		stop("\n")
143 | 	}
144 | 
145 | 
146 | 	# convert wind farm names into valid column names (to match those in the wind speed file)
147 | 	wf$name = make.unique( make.names(wf$name) )
148 | 
149 | 
150 | 	# simplify the farm heights to speed up the extrapolation stage
151 | 	# have these sort-of equally spaced in log terms (~0.015 resolution in log10)
152 | 	h = wf$height
153 | 
154 | 	h[ h >= 0   ] = 0.5 * round(h[ h >= 0   ] / 0.5)
155 | 	h[ h >= 30  ] =   1 * round(h[ h >= 30  ] / 1)
156 | 	h[ h >= 60  ] =   2 * round(h[ h >= 60  ] / 2)
157 | 	h[ h >= 90  ] =   3 * round(h[ h >= 90  ] / 3)
158 | 	h[ h >= 120 ] =   4 * round(h[ h >= 120 ] / 4)
159 | 
160 | 	wf$height = h
161 | 
162 | 
163 | 	# and return
164 | 	wf
165 | 
166 | }
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | #######################################################################################################################
175 | ##
176 | ##  read a list of farms and coordinates
177 | ##  optionally specify the column names, if they are not the default
178 | ##
179 | ##  return the data
180 | ##
181 | read_farms_file = function(filename, colnames=NA)
182 | {
183 | 	# read the data
184 | 	data = read_csv(filename)
185 | 
186 | 	# if colnames=NA, define default column names
187 | 	if (sum(is.na(colnames)) > 0)
188 | 		colnames = c('id', 'region', 'name', 'capacity', 'lat', 'lon', 'model', 'height', 'nTurbines')
189 | 
190 | 	# check that the names match the data
191 | 	if (ncol(data) != length(colnames))
192 | 		stop("    read_farms_file -- data has ", ncol(data), " columns, but you want to name ", length(colnames), " of them!\n\n")
193 | 
194 | 	# warn people if we don't have what I consider to be the necessary columns
195 | 	essentialCols = c('lon', 'lat', 'height', 'name');
196 | 	check = essentialCols %in% colnames;
197 | 
198 | 	if (sum(check) < length(check))
199 | 		cat("    read_farms_file -- Woah, you ought to have the following columns in your coordinates files:\n", essentialCols, "\n\n");
200 | 
201 | 	# apply
202 | 	colnames(data) = colnames
203 | 	return(data)
204 | }
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | #######################################################################################################################
213 | ##
214 | ##  define the outer boundary box that contains a set of wind farm coordinates
215 | ##  box will contain all lat and lon coordinates within wf, plus a buffer of 'margin' degrees
216 | ##  the default margin of 10 degrees around all locations gives a minimal error when loessing
217 | ##  
218 | ##
219 | ##  returns a list: extent$lon[1:2] and extent$lat[1:2]
220 | ##
221 | define_farm_region = function(wf, margin=10)
222 | {
223 | 	lonBounds = range(wf$lon);
224 | 	lonBounds = range(c(lonBounds-margin, lonBounds+margin));
225 | 	if (lonBounds[1] < -180) lonBounds[1] = -180;
226 | 	if (lonBounds[1] > 180) lonBounds[1] = 180;
227 | 
228 | 	latBounds = range(wf$lat);
229 | 	latBounds = range(c(latBounds-margin, latBounds+margin));
230 | 	if (latBounds[1] < -90) latBounds[1] = -90;
231 | 	if (latBounds[1] > 90) latBounds[1] = 90;
232 | 
233 | 	return( list(lon=lonBounds, lat=latBounds) );
234 | }
235 | 
236 | 
237 | 
238 | 
239 | #######################################################################################################################
240 | ##
241 | ##  plot a set of wind farms on a map
242 | ##  optionally set cap.weight to a number to scale points by farm capacity (bigger number = bigger points)
243 | ##  optionally set other image properties (inherited from __inc.r:plot_map_points)
244 | ##  optionally add points to an existing plot
245 | ##
246 | ##  return the aspect ratio for saving an image
247 | ##
248 | plot_farms = function(wf, cap.weight=1, add.points=FALSE, ...)
249 | {
250 | 	# imported from __inc.r
251 | 	# plot_map_points = function(lon, lat, lonBounds=NA, latBounds=NA, padding=1,
252 | 	# 	                       mapBorder="darkgrey", mapFill="lightgrey", 
253 | 	# 	                       mapAspect=1.5, shapeFile=NA, 
254 | 	# 	                       resetPar=FALSE, blank=FALSE, ...)
255 | 
256 | 	if (add.points == FALSE)
257 | 	{
258 | 		# plot all points the same size
259 | 		if (cap.weight == FALSE)
260 | 			ar = plot_map_points(wf$lon, wf$lat, ...)
261 | 
262 | 		# size points proportional to capacity
263 | 		if (cap.weight != FALSE)
264 | 		{
265 | 			cap = wf$capacity 
266 | 			cap = sqrt(cap * cap.weight / mean(wf$capacity)) 
267 | 
268 | 			ar = plot_map_points(wf$lon, wf$lat, cex=cap, lwd=cap*2, lend=1, ...)
269 | 		}
270 | 
271 | 		# return the ideal height:width ratio for saving a PNG
272 | 		return(ar)
273 | 
274 | 	}
275 | 
276 | 
277 | 	if (add.points == TRUE)
278 | 	{
279 | 		# plot all points the same size
280 | 		if (cap.weight == FALSE)
281 | 			points(wf$lon, wf$lat, ...)
282 | 
283 | 		# size points proportional to capacity
284 | 		if (cap.weight != FALSE)
285 | 		{
286 | 			cap = wf$capacity 
287 | 			cap = sqrt(cap * cap.weight / mean(wf$capacity))
288 | 
289 | 			points(wf$lon, wf$lat, cex=cap, lwd=cap*2, lend=1, ...)
290 | 		}
291 | 
292 | 		return()
293 | 
294 | 	}
295 | 
296 | }
297 | 
298 | 
299 | 
300 | #######################################################################################################################
301 | ##
302 | ## find the farm in wf that is physically closest to myfarm
303 | ## return a data frame showing the supplied cols for both farms
304 | ##
305 | find_nearest_farm = function(myfarm, wf, cols = c('name', 'capacity', 'height', 'lat', 'lon'))
306 | {
307 | 	w = which.min( (myfarm$lat - wf$lat)^2 + (myfarm$lon - wf$lon)^2)
308 | 	rbind(z[i, cols], o[w, cols])
309 | }
310 | 
311 | 
312 | 
313 | #######################################################################################################################
314 | ##
315 | ##  analyse a set of farm coordinates
316 | ##  draw a map, highlighting any outliers
317 | ##
318 | find_farm_outliers = function(wf)
319 | {
320 | 	padding = 1
321 | 
322 | 	# what are potentially sensible boundaries?
323 | 	latInner = quantile(wf$lat, c(0.01, 0.99))
324 | 	latInner = range(latInner - padding, latInner + padding)
325 | 
326 | 	lonInner = quantile(wf$lon, c(0.01, 0.99))
327 | 	lonInner = range(lonInner - padding, lonInner + padding)
328 | 
329 | 
330 | 	# how many farms lie outside these boundaries?
331 | 	f1 = (wf$lat < latInner[1] | wf$lat > latInner[2])
332 | 	f2 = (wf$lon < lonInner[1] | wf$lon > lonInner[2])
333 | 	filter = f1 | f2
334 | 
335 | 	if (sum(filter) == 0)
336 | 	{
337 | 		cat("\nDon't think I found any silly outliers..\n")
338 | 		return()
339 | 	}
340 | 
341 | 
342 | 	# show ones that aren't..
343 | 	# don't reset par so that we can add more points later
344 | 	plot_farms(wf, pointCol='darkgreen', resetPar=FALSE)
345 | 
346 | 	# highlight the dodgy points
347 | 	points(wf[filter, "lon"], wf[filter, "lat"], pch=21, cex=1.2, col="red3", bg="pink")
348 | 
349 | 	# identify them
350 | 	cat("\nFound", sum(filter), "potential outliers..")
351 | 	wf[filter, ]
352 | 	junk = readline(prompt = "Press ENTER to continue...")
353 | }
354 | 
355 | 
356 | 
357 | #######################################################################################################################
358 | ##
359 | ##  locate which country each farm resides in - separating onshore and offshore
360 | ##
361 | ##  return a data frame of coordinates with country and region
362 | ##
363 | locate_farm_countries = function(wf)
364 | {
365 | 	source("E:/WORK/Code/R/Coordinates to Countries.r");
366 | 
367 | 	# get our map from a custom shapefile which contains EN, WA, SC, NI
368 | 	shapeFile = 'E:/WORK/Z Data/Maps/Natural Earth/Admin Map Subunits/ne_10m_admin_0_map_subunits.shp'
369 | 
370 | 	# set up our coordinates
371 | 	points = data.frame(lon = wf$lon, lat = wf$lat)
372 | 
373 | 	# get a list of country names
374 | 	points = coords2country(points, offshore=TRUE, shapeFile)
375 | 
376 | 	return(points)
377 | }
378 | 
379 | 
380 | 
381 | 


--------------------------------------------------------------------------------
/lib/VWF.MAIN.PREP.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | #####
 12 | ## ##  BIAS CORRECTION FUNCTIONS
 13 | #####
 14 | 
 15 | 
 16 | 	# decide where to get performance ratio data for each farm
 17 | 	# either set PR_column to be a column name in WindFarmFile to use the csv data
 18 | 	# or set to NULL to use hard-coded defaults...
 19 | 	PR_column = 'PR'
 20 | 
 21 | 
 22 | 
 23 | 	# determine the scalar for each farm (if we are using fixed scalars)
 24 | 	# this gives the number that we multiply all wind speeds by, before finding the offset which matches our desired capacity factor
 25 | 	determine_farm_scalar = function(PR, iso='')
 26 | 	{
 27 | 		# if we are using the same scalar for every farm
 28 | 		if (match_method == 1)
 29 | 			scalar = 0.65
 30 | 
 31 | 		# if we are basing the scalar on the farm's PR
 32 | 		if (match_method == 2)
 33 | 		{
 34 | 			scalar = (scalar_alpha * PR) + scalar_beta
 35 | 
 36 | 			# some crazy post-hacking from the Christian Grams era...
 37 | 			# updated for Axpo 2017...
 38 | 			# this relies on having 0.50 x 0.333 as your parameters
 39 | 			if (iso == 'DE') scalar = scalar + 0.10
 40 | 			if (iso == 'DK') scalar = scalar + 0.05
 41 | 			if (iso == 'ES') scalar = scalar - 0.05
 42 | 			if (iso == 'FR') scalar = scalar - 0.03
 43 | 			if (iso == 'GB') scalar = scalar - 0.06
 44 | 			if (iso == 'GR') scalar = scalar + 0.12
 45 | 			if (iso == 'IE') scalar = scalar + 0.05
 46 | 			if (iso == 'IT') scalar = scalar - 0.08
 47 | 			if (iso == 'SE') scalar = scalar + 0.46
 48 | 
 49 | 			# WTF with Sweden ey?
 50 | 		}
 51 | 
 52 | 		return(scalar)
 53 | 	}
 54 | 
 55 | 
 56 | 
 57 | 	# determine the offset for each farm (if we are using fixed offsets)
 58 | 	# this gives the number we subtract from all wind speeds (in m/s), before finding the scalar which matches our desired capacity factor
 59 | 	determine_farm_offset = function(PR)
 60 | 	{
 61 | 		# if we are using the same scalar for every farm
 62 | 		if (match_method == 3)
 63 | 			offset = 1.00
 64 | 
 65 | 		# if we are basing the scalar on the farm's PR
 66 | 		if (match_method == 4)
 67 | 			offset = 3.00 * PR^0.333 / 0.85^0.333
 68 | 
 69 | 		return(offset)
 70 | 	}
 71 | 
 72 | 
 73 | 
 74 | 	# which matching method should we use?
 75 | 	#
 76 | 	# 1 = fixed scalar same for every farm, find offset  <--- this is the first method i used for the decline paper
 77 | 	# 2 = fixed scalar from each farm's PR, find offset
 78 | 	# 3 = fixed offset same for every farm, find scalar
 79 | 	# 4 = fixed offset from each farm's PR, find scalar  <--- this is the method being used in the most recent code
 80 | 	#
 81 | 	# offsets will add or subtract a fixed number to all wind speeds (in m/s)
 82 | 	# scalars will multiply all speeds by a constant (dimensionless)
 83 | 	#
 84 | 	# here we set the default value :)
 85 | 	#
 86 | 	if (!exists('match_method')) match_method = 2
 87 | 
 88 | 
 89 | 
 90 | 	# specify the standard deviation of the convolution
 91 | 	# used to create the multi-turbine farm curve
 92 | 	# (this should be improved!)
 93 | 	get_power_curve_convolution_spread = function(farm)
 94 | 	{
 95 | 		return(convolverStdDev)
 96 | 
 97 | 		# check either tCount or nTurbines
 98 | 		# and adjust based on that
 99 | 	}
100 | 
101 | 
102 | 
103 | 	farms_inflate_offshore_pr = function(farms, offshore_factor=1.16, column='offshore')
104 | 	{
105 | 		if (column %notin% colnames(farms))
106 | 			stop("farms_inflate_offshore_pr: cannot find the " %&% column %&% " column...\n")
107 | 		
108 | 		onshore = farms[ , column] %in% c('No', 'no')
109 | 		offshore = !onshore
110 | 
111 | 		farms$PR[offshore] = farms$PR[offshore] * offshore_factor
112 | 
113 | 		farms
114 | 	}
115 | 
116 | 
117 | 
118 | 
119 | #####
120 | ## ##  CHECK OUR INPUTS AND OUTPUTS
121 | #####
122 | 
123 | 	# check we can save files
124 | 	if (substr_reverse(baseSaveFolder, 1) != '/')
125 | 		baseSaveFolder = baseSaveFolder %&% '/'
126 | 
127 | 	if (!dir.exists(baseSaveFolder))
128 | 	{
129 | 		ok = dir.create(baseSaveFolder)
130 | 
131 | 		if (!ok)
132 | 			stop("Could not create folder to save files in:", baseSaveFolder, "\n")
133 | 	}
134 | 
135 | 
136 | 
137 | 
138 | #####
139 | ## ##  SORT OUT OUR WIND FARM DATA
140 | #####
141 | 
142 | 	##
143 | 	##  read in our wind farms data
144 | 	##
145 | 
146 | 	flush("> Preparing your wind farms...\n")
147 | 
148 | 	windFarms = prepare_windfarms(farmOptionsFile)
149 | 
150 | 	# inflate offshore PR
151 | 	if (inflate_offshore_pr)
152 | 	{
153 | 		windFarms = farms_inflate_offshore_pr(windFarms, 1.16)
154 | 	}
155 | 
156 | 
157 | 	# calculate the boundaries for lat and long that we're interested in
158 | 	# a margin of 10 degrees around all farms should eliminate interpolation errors
159 | 	region = define_farm_region(windFarms, margin=6.666)
160 | 
161 | 	
162 | 	## TODO: FIX -- SHOULD CHECK IF THIS REALLY NEEDS TO BE 6.666 DEGREES... 
163 | 
164 | 
165 | 	# plot on a map to check we're being sensible
166 | 	if (a_few_plots)
167 | 		farm_plot_aspect = plot_farms(windFarms, pch='+', col='green3', cap.weight=TRUE)
168 | 
169 | 
170 | 
171 | 	# make a list of all wind farm heights
172 | 	all_heights = sort(unique(windFarms$height))
173 | 
174 | 
175 | 
176 | 
177 | 	##
178 | 	##  read in power curve data
179 | 	##
180 | 
181 | 	# read in power curves for each turbine model
182 | 	# this must run from 0 to 40 m/s - and will be interpolated to 0.01 m/s steps
183 | 	turbCurve = read_csv(turbCurveFile)
184 | 	turbCurveNames = colnames(turbCurve)[-1]
185 | 
186 | 	farmCurve = read_csv(farmCurveFile)
187 | 	farmCurveNames = colnames(farmCurve)[-1]
188 | 
189 | 
190 | 	# high-res interpolation
191 | 	turbCurve = interpolateTable(turbCurve, seq(0,40,0.01))
192 | 	farmCurve = interpolateTable(farmCurve, seq(0,40,0.01))
193 | 
194 | 
195 | 
196 | 
197 | 	# determine whether we need special measures for crossing the international date line...
198 | 	dateLineMadness = FALSE
199 | 
200 | 	if (min(region$lon) < -180 | max(region$lon) > 180)
201 | 	{
202 | 		dateLineMadness = TRUE
203 | 
204 | 		# remember our original lon range
205 | 		region_lon = region$lon
206 | 		if (region_lon[1] < -180) region_lon[1] = region_lon[1] + 360
207 | 		if (region_lon[2] > 180) region_lon[2] = region_lon[2] - 360
208 | 
209 | 		# grab the entire range of lon from our NetCDF files
210 | 		region$lon = c(-180, 180)
211 | 	}
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | #####
219 | ## ##  SORT OUT OUR MERRA WIND DATA
220 | #####
221 | 
222 | 	if (is.null(windSpeedFile))
223 | 	{
224 | 		# find and prepare all our extrapolation data files
225 | 		flush("> Locating MERRA data")
226 | 		merra_wind = prepare_merra_files(merraFolders, merra_grouping, processNewestFirst)
227 | 		flush(" - Found", length(merra_wind$files), "days...\n")
228 | 
229 | 
230 | 		# create a NetCDF file handler for the wind data
231 | 		fn = merra_wind$files[1]
232 | 		nc_wind = NetCdfClass(fn, reanalysis)
233 | 
234 | 
235 | 		# set up the region we want to read (so that future reads are faster)
236 | 		nc_wind$subset_coords(region)
237 | 
238 | 
239 | 		flush("> Filtering NetCDF dataset down to", round(100 * diff(region$lon) * diff(region$lat) / 360 / 180, 1), "% of the earth's surface..\n")
240 | 	}
241 | 
242 | 
243 | 
244 | 
245 | #####
246 | ## ##  SORT OUT OUR DATA OBJECTS FOR INTERPOLATION AND EXTRAPOLATION
247 | #####
248 | 
249 | 	##  INTERPOLATION METHOD
250 | 
251 | 	# either 'akima' or 'loess'
252 | 	spatial.method = 'akima'
253 | 
254 | 
255 | 	if (is.null(windSpeedFile))
256 | 	{
257 | 
258 | 		if (spatial.method == 'loess')
259 | 		{
260 | 			### loess and tps require data to be passed as a 1d vector, whereas the merra data comes as a 2d array [lon, lat]
261 | 			### so we pre-allocate some storage in 'long format' as a keyed vector
262 | 
263 | 			# read in a merra variable
264 | 			loess_w = nc_wind$get_var('A')
265 | 			loess_w = loess_w[ , , 14]
266 | 			nc_wind$close_file()
267 | 
268 | 
269 | 			# create long-format storage for each of our input variables (holding all grid points for a single hour)
270 | 			# FIXME i should be using melt for this, i now know ;)
271 | 			loess_w = reshapeData(loess_w, nc_wind$lon, nc_wind$lat)
272 | 			colnames(loess_w) = c("lon", "lat", "value")
273 | 
274 | 			# required by thin plate splines (fastTps)
275 | 			tps_xy_in = as.matrix(loess_w[ , 1:2])
276 | 			tps_xy_out = as.matrix(windFarms[ , c('lon', 'lat')])
277 | 
278 | 
279 | 			### loess requires a fitting paramter - how many nearest neighbours to include in the fit
280 | 
281 | 			# determine our loess span based on the geographic area we cover
282 | 			# 12 merra grid points gives almost the minimum RMS error (10 or 11 is better, but slower)
283 | 			loessSpan = 12 / nrow(loess_w)
284 | 
285 | 			### FIXME ### IMPROVE THIS ### FIXME ###
286 | 			loessSpan = sqrt(0.1 * loessSpan)
287 | 		}
288 | 
289 | 
290 | 
291 | 
292 | 		# if we are crossing the date line, set up the way we will process our data
293 | 		if (dateLineMadness)
294 | 		{
295 | 			# figure out where we want both sides of the date-line
296 | 			dateLineWest = which(nc_wind$lon >= region_lon[1])
297 | 			dateLineEast = which(nc_wind$lon <= region_lon[2])
298 | 			dateLineDims = c(nc_wind$lon[dateLineWest], nc_wind$lon[dateLineEast] + 360)
299 | 			originalDims = nc_wind$lon
300 | 		}
301 | 
302 | 	}
303 | 
304 | 
305 | 
306 | 
307 | 
308 | 
309 | #####
310 | ## ##  LAUNCH THE MULTI-CORE CLUSTER
311 | #####
312 | 
313 | 	# build a multi-core parallel cluster
314 | 	if (exists('cl'))
315 | 	{
316 | 		flush("> Reusing existing", length(cl), "core cluster...\n")
317 | 
318 | 	} else {
319 | 
320 | 		flush("> Building", n.cores, "core cluster...\n")
321 | 		cl = makeCluster(n.cores)
322 | 		registerDoParallel(cl)
323 | 	}
324 | 
325 | 	if (spatial.method == 'akima')
326 | 	{
327 | 		library(akima)
328 | 		clusterExport(cl, varlist='bicubic')
329 | 	}
330 | 
331 | 	if (spatial.method == 'loess')
332 | 	{
333 | 		library(MASS)
334 | 	}
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | 
342 | 	# the VWF model should now be ready to run...
343 | 


--------------------------------------------------------------------------------
/lib/VWF.MAIN.PROGRESS.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 	# initialise
 12 | 	library(lubridate)
 13 | 	all_days = NULL
 14 | 	big_loop = 0
 15 | 
 16 | 	# chart functions
 17 | 	chart_coords = function(dates)
 18 | 	{
 19 | 		if (merra_grouping == 'monthly')
 20 | 		{
 21 | 			all_groups = ymd(floor_date(dates, 'month'))
 22 | 		}
 23 | 
 24 | 		if (merra_grouping == 'quarterly')
 25 | 		{
 26 | 			y = year(dates)
 27 | 			m = month(dates)
 28 | 			m = 1 + (floor((m-1)/ 3) * 3)
 29 | 			all_groups = ymd( paste(y, m, '01') )
 30 | 		}
 31 | 
 32 | 		all_d = (dates - all_groups + 1) / ddays(1)
 33 | 
 34 | 		data.frame(x=all_groups, y=all_d)
 35 | 	}
 36 | 
 37 | 
 38 | 
 39 | 	while (1)
 40 | 	{
 41 | 		# search for progress files...
 42 | 		fn = list.files(baseSaveFolder, pattern='~progress~', full.names=TRUE)
 43 | 
 44 | 		# sit and wait...
 45 | 		if (length(fn) == 0)
 46 | 		{
 47 | 			clear_line('No progress files found...')
 48 | 			Sys.sleep(update_time)
 49 | 			next
 50 | 		}
 51 | 		
 52 | 
 53 | 		# count how many good files we have
 54 | 		all_progress = NULL
 55 | 
 56 | 		# read and save progress in each
 57 | 		for (i in 1:length(fn))
 58 | 		{
 59 | 			progress = readLines(fn[i])
 60 | 			progress = suppressWarnings(ymd(progress))
 61 | 			progress = progress[ !is.na(progress) ]
 62 | 			all_progress = c(all_progress, format(progress))
 63 | 		}
 64 | 
 65 | 
 66 | 		# GUI
 67 | 		if (big_loop == 0)
 68 | 		{
 69 | 			# build the list of all dates
 70 | 			all_days = readLines(fn[i])[2]
 71 | 			date_1 = get_text_before(all_days, ' to ')
 72 | 			date_2 = get_text_after(all_days, ' to ')
 73 | 			all_days = seq(ymd(date_1), ymd(date_2), 'day')
 74 | 
 75 | 			# establish a clean plot
 76 | 			all_coords = chart_coords(all_days)
 77 | 			plot(all_coords, cex=1.5, pch=15, col='grey80', xlab='Month', ylab='Day')
 78 | 
 79 | 			all_years = seq(ymd(date_1), ymd(date_2), 'year')
 80 | 			axis(1, labels=FALSE, at=all_years)
 81 | 			axis(3, labels=FALSE, at=all_years)
 82 | 		}
 83 | 
 84 | 		# add on our progress
 85 | 		good_coords = chart_coords(ymd(all_progress))
 86 | 		points(good_coords, cex=1.5, pch=15)
 87 | 
 88 | 
 89 | 
 90 | 		# TUI
 91 | 		n = length(all_progress)
 92 | 		N = nrow(all_coords)
 93 | 		p = sprintf("%1.2f%%", 100*n/N)
 94 | 		msg = paste0('[', n, ' of ', N, '] ', p)
 95 | 
 96 | 		clear_line(msg)
 97 | 
 98 | 		
 99 | 		# wait...
100 | 		big_loop = big_loop + 1
101 | 		Sys.sleep(update_time)
102 | 
103 | 	}
104 | 
105 | 


--------------------------------------------------------------------------------
/lib/VWF.MAIN.RESULTS.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 
 12 | #####
 13 | ## ##  SAVE HOURLY FILES
 14 | #####
 15 | 
 16 | 	flush('> Saving results...\n')
 17 | 
 18 | 
 19 | 
 20 | 	# save the bias corrected wind speeds for each farm
 21 | 	if (save_modified_wind_speeds)
 22 | 	{
 23 | 		fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.corrected.' %&% xtn
 24 | 		write_data(windSpeed, fn, xtn)
 25 | 		flush('  ~ Written', fn, '\n')
 26 | 	}
 27 | 
 28 | 
 29 | 
 30 | 	# save the hourly capacity factors for every farm
 31 | 	if (save_hourly_farm_cf)
 32 | 	{
 33 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(a).farm.MW.' %&% xtn
 34 | 		write_data(powerMW, fn, xtn)
 35 | 		flush('  ~ Written', fn, '\n')
 36 | 	}
 37 | 
 38 | 	# save the hourly capacity factors for every farm
 39 | 	if (save_hourly_farm_cf)
 40 | 	{
 41 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(a).farm.CF.' %&% xtn
 42 | 		write_data(loadFactor, fn, xtn)
 43 | 		flush('  ~ Written', fn, '\n')
 44 | 	}
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 	# save the total aggregate across all farms
 51 | 	if ('*' %in% save_files_split_by)
 52 | 	{
 53 | 		# hourly
 54 | 		if (ncol(powerMW > 1)) 
 55 | 		{
 56 | 			total = rowSums(powerMW, na.rm=TRUE)
 57 | 
 58 | 		} else {
 59 | 
 60 | 			total = powerMW
 61 | 		}
 62 | 
 63 | 		total = data.frame(GMT=ymd_wtf(datecol), MW=total, CF=total/sum(windFarms$capacity))
 64 | 
 65 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(b).hourly.MW.CF.csv'
 66 | 		write_csv(total, fn, row.names=FALSE)
 67 | 		flush('  ~ Written', fn)
 68 | 
 69 | 		# monthly
 70 | 		total_m = aggregate_monthly('GMT', total, mean)
 71 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(b).monthly.MW.CF.csv'
 72 | 		write_csv(total_m, fn, row.names=FALSE)
 73 | 		clear_line('  ~ Written', fn)
 74 | 
 75 | 		# yearly
 76 | 		total_y = aggregate_yearly('GMT', total, mean)
 77 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(b).yearly.MW.CF.csv'
 78 | 		write_csv(total_y, fn, row.names=FALSE)
 79 | 		clear_line('  ~ Written', fn, '\n')
 80 | 
 81 | 	}
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 	# save the total MW and average CF grouped by various things (e.g. by country)
 87 | 	for (split_col in save_files_split_by)
 88 | 	{
 89 | 		# skip doing nothing or everything or something silly
 90 | 		if (split_col == '') next
 91 | 		if (split_col == '*') next
 92 | 		if (split_col %notin% colnames(windFarms))
 93 | 		{
 94 | 			flush('  !! you want to split results by ' %&% split_col %&% ' which isnt a column in your wind farms file...\n')
 95 | 			flush('  !! you need to look at `save_files_split_by` and `windFarmCols`...\n')
 96 | 			next
 97 | 		}
 98 | 
 99 | 
100 | 		# establish results storage
101 | 		n = length(datecol)
102 | 		m = nrow(windFarms)
103 | 		snapshot_capacity = snapshot_output = data.frame(GMT=ymd_wtf(datecol))
104 | 		evolving_capacity = evolving_output = data.frame(GMT=ymd_wtf(datecol))
105 | 
106 | 		# understand what time period this is
107 | 		min_date = min(evolving_output$GMT)
108 | 		max_date = max(evolving_output$GMT)
109 | 		total_duration = (max_date - min_date) / dyears(1)
110 | 
111 | 		iii = 0
112 | 
113 | 		# go through each value in that column
114 | 		for (split_val in sort(unique(windFarms[ , split_col])))
115 | 		{
116 | 			s_c = s_o = rep(0, n)
117 | 			e_c = e_o = rep(0, n)
118 | 
119 | 			# filter our farms - filter gives the farm row / speed column
120 | 			filter = which(windFarms[ , split_col] == split_val)
121 | 			if (length(filter) == 0)
122 | 				next
123 | 
124 | 			# calculate the total output and capacity
125 | 			for (f in filter)
126 | 			{
127 | 				my_output = powerMW[ , f]
128 | 				my_capacity = rep(windFarms$capacity[f], n)
129 | 				my_start = dmy(windFarms$date[f])
130 | 
131 | 				# simply add these on to create the snapshot
132 | 				s_o = s_o + my_output
133 | 				s_c = s_c + my_capacity
134 | 
135 | 				# if we have a start date, then wipe out the period before birth
136 | 				if (!is.na(my_start))
137 | 				{
138 | 					unborn = (evolving_capacity$GMT < my_start)
139 | 
140 | 					# what percentage of the whole period have you lived for?
141 | 					# take that to be your capacity - so that the oldest farms have
142 | 					# the greatest weight when we go so far back that we know nothing
143 | 					my_duration = (max_date - my_start) / dyears(1)
144 | 					my_duration = my_duration / total_duration
145 | 					my_scalar = (0.1 * my_duration) / windFarms$capacity[f]
146 | 
147 | 					# now scale output and capacity by this
148 | 					my_output[unborn] = my_output[unborn] * my_scalar
149 | 					my_capacity[unborn] = my_capacity[unborn] * my_scalar
150 | 				}
151 | 
152 | 				# now add these to create the evolving fleet
153 | 				e_o = e_o + my_output
154 | 				e_c = e_c + my_capacity
155 | 
156 | 
157 | 				# gui
158 | 				iii = iii + 1
159 | 				if (iii %% 10 == 0)
160 | 					clear_line('Processing', iii, '/', m, '-', split_val)
161 | 			}
162 | 
163 | 			# push these into our results storage
164 | 			snapshot_output[ , split_val] = s_o
165 | 			snapshot_capacity[ , split_val] = s_c
166 | 
167 | 			# push these into our results storage
168 | 			evolving_output[ , split_val] = e_o
169 | 			evolving_capacity[ , split_val] = e_c
170 | 		}
171 | 
172 | 		clear_line()
173 | 
174 | 		# save power output to file
175 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.MW.' %&% split_col %&% '.csv'
176 | 		write_csv(snapshot_output, fn, row.names=FALSE)
177 | 		flush('  ~ Written', fn)
178 | 
179 | 		# now normalise to CF
180 | 		for (i in 2:ncol(snapshot_output))
181 | 		{
182 | 			snapshot_output[ , i] = snapshot_output[ , i] / snapshot_capacity[ , i]
183 | 		}
184 | 
185 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.CF.' %&% split_col %&% '.csv'
186 | 		write_csv(snapshot_output, fn, row.names=FALSE)
187 | 		clear_line('  ~ Written', fn)
188 | 
189 | 
190 | 
191 | 
192 | 		# save power output to file
193 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.MW.' %&% split_col %&% '.csv'
194 | 		write_csv(evolving_output, fn, row.names=FALSE)
195 | 		clear_line('  ~ Written', fn)
196 | 
197 | 		# now normalise to CF
198 | 		for (i in 2:ncol(evolving_output))
199 | 		{
200 | 			evolving_output[ , i] = evolving_output[ , i] / evolving_capacity[ , i]
201 | 		}
202 | 
203 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.CF.' %&% split_col %&% '.csv'
204 | 		write_csv(evolving_output, fn, row.names=FALSE)
205 | 		clear_line('  ~ Written', fn)
206 | 
207 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.capacity.' %&% split_col %&% '.csv'
208 | 		write_csv(evolving_capacity, fn, row.names=FALSE)
209 | 		clear_line('  ~ Written', fn)
210 | 
211 | 
212 | 
213 | 		# oh, oh - finally, aggregate to months!
214 | 		snapshot_cf = aggregate_monthly('GMT', snapshot_output, mean, silent=TRUE)
215 | 		evolving_cf = aggregate_monthly('GMT', evolving_output, mean, silent=TRUE)
216 | 
217 | 		#snapshot_cf = aggregate(snapshot_output[ , -1], by=list(years), mean, na.rm=TRUE)
218 | 		#evolving_cf = aggregate(evolving_output[ , -1], by=list(years), mean, na.rm=TRUE)
219 | 
220 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.CF.monthly.' %&% split_col %&% '.csv'
221 | 		write_csv(snapshot_cf, fn, row.names=FALSE)
222 | 
223 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.CF.monthly.' %&% split_col %&% '.csv'
224 | 		write_csv(evolving_cf, fn, row.names=FALSE)
225 | 		clear_line('  ~ Written', fn)
226 | 
227 | 
228 | 
229 | 		# oh, oh - finally, aggregate to years!
230 | 		snapshot_cf = aggregate_yearly('GMT', snapshot_output, mean, silent=TRUE)
231 | 		evolving_cf = aggregate_yearly('GMT', evolving_output, mean, silent=TRUE)
232 | 
233 | 		#snapshot_cf = aggregate(snapshot_output[ , -1], by=list(years), mean, na.rm=TRUE)
234 | 		#evolving_cf = aggregate(evolving_output[ , -1], by=list(years), mean, na.rm=TRUE)
235 | 
236 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(c).snapshot.CF.yearly.' %&% split_col %&% '.csv'
237 | 		write_csv(snapshot_cf, fn, row.names=FALSE)
238 | 
239 | 		fn = baseSaveFolder %&% baseSaveFile %&% '(d).evolving.CF.yearly.' %&% split_col %&% '.csv'
240 | 		write_csv(evolving_cf, fn, row.names=FALSE)
241 | 		clear_line('  ~ Written', fn, '\n')
242 | 
243 | 	}
244 | 
245 | 
246 | 
247 | #	# save the date column
248 | #	fn = baseSaveFolder %&% baseSaveFile %&% '(h).dates.csv'
249 | #	write_csv(datecol, fn)
250 | #	flush('  ~ Written', fn, '\n')
251 | #
252 | 
253 | 
254 | 	# save the model parameters
255 | 	fn = baseSaveFolder %&% baseSaveFile %&% '(e).parameters.csv'
256 | 	write_csv(parms, fn)
257 | 	flush('  ~ Written', fn, '\n')
258 | 


--------------------------------------------------------------------------------
/lib/VWF.MAIN.WINDPOWER.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 	# establish two plots
 12 | 	if (a_few_plots)
 13 | 	{
 14 | 		dev.new(height=5.4, width=12, xpos=0, ypos=450)
 15 | 		good_graphics()
 16 | 		layout(t(1:3))
 17 | 
 18 | 		plot_farms(windFarms, pch='+', cap.weight=1)
 19 | 		dev.set()
 20 | 	}
 21 | 
 22 | 
 23 | 
 24 | 
 25 | 
 26 | ####################################
 27 | 
 28 | 	##
 29 | 	##  plot the mean wind speeds at each site
 30 | 	##
 31 | 
 32 | 	if (ncol(windSpeed) > 2) {
 33 | 		site_means = colMeans(windSpeed[ , -1])
 34 | 	} else {
 35 | 		site_means = windSpeed[ , 2]
 36 | 	}
 37 | 
 38 | 	ylim = range(site_means/1.1, site_means*1.1)
 39 | 
 40 | 	if (a_few_plots)
 41 | 		plot(sort(site_means), type='l', ylim=ylim, main='Average Speeds by Site')
 42 | 
 43 | 
 44 | 
 45 | 	##
 46 | 	##  read in air density data
 47 | 	##
 48 | 
 49 | 	if (doAirDensity) ##### could parallelise this with cbind? iterators over each column
 50 | 	{
 51 | 		airDensity = read_data(airDensityFile)
 52 | 
 53 | 		cat("\n")
 54 | 
 55 | 		for (i in 1:nrow(windFarms))
 56 | 		{
 57 | 			clear_line()
 58 | 			flush( paste("Applying air density correction: ", i, "/", nrow(windFarms), sep='') )
 59 | 
 60 | 			# valid variable version of farm name (column name in csv)
 61 | 			myName = windFarms[i, 'name']
 62 | 			myCol = make.names(myName)
 63 | 			if (!is.na(as.numeric(myName))) { myCol = as.character(myName) } ######
 64 | 		
 65 | 			# get the relevant info
 66 | 			speed = windSpeed[ , myCol]
 67 | 			density = airDensity[ , myCol]
 68 | 			height = windFarms[i, 'height']
 69 | 
 70 | 			windSpeed[ , myCol] = air_density_correction(speed, density, height)
 71 | 		}
 72 | 
 73 | 		clear_line()
 74 | 		flush("Applying air density correction...\n")
 75 | 
 76 | 		if (ncol(windSpeed) > 2) {
 77 | 			site_means = colMeans(windSpeed[ , -1])
 78 | 		} else {
 79 | 			site_means = windSpeed[ , 2]
 80 | 		}
 81 | 
 82 | 		if (a_few_plots)
 83 | 			lines(sort(site_means), col='red')
 84 | 	}
 85 | 
 86 | 
 87 | 	##
 88 | 	##  transform unfathomably low speeds
 89 | 	##  use a cubic hermite to enforce a lower-bound floor
 90 | 	##
 91 | 
 92 | 	if (doTransformLowestSpeeds)
 93 | 	{
 94 | 		# our transformation parameters
 95 | 		# floor is the lowest we can go (m/s)
 96 | 		# width is how smooth the transition is..
 97 | 		# e.g. 4 and 1.5 means speeds of 2.5 go up to 4, speeds of 5.5 are unchanged  
 98 | 		trnsfrm_floor = 4
 99 | 		trnsfrm_width = 1.5
100 | 
101 | 		trnsfrm_min = trnsfrm_floor - trnsfrm_width
102 | 		trnsfrm_max = trnsfrm_floor + trnsfrm_width
103 | 
104 | 		w = which(site_means < trnsfrm_min)
105 | 
106 | 		if (length(w) > 0)
107 | 		{
108 | 			scalars = 4.5 / site_means[w]
109 | 
110 | 			speed = windSpeed[ , w+1]
111 | 			speed = mult_cols_by_vector(speed, scalars)
112 | 			windSpeed[ , w+1] = speed
113 | 		}
114 | 
115 | 		w = which(site_means >= trnsfrm_min & site_means < trnsfrm_max)
116 | 
117 | 		if (length(w) > 0)
118 | 		{
119 | 			scalars = trnsfrm_max - site_means[w]
120 | 			scalars = scalars ^ 2 / (4 * trnsfrm_width)
121 | 			scalars = (site_means[w] + scalars) / site_means[w]
122 | 
123 | 			speed = windSpeed[ , w+1]
124 | 			speed = mult_cols_by_vector(speed, scalars)
125 | 			windSpeed[ , w+1] = speed
126 | 		}
127 | 
128 | 		if (ncol(windSpeed) > 2) {
129 | 			site_means_x = colMeans(windSpeed[ , -1])
130 | 		} else {
131 | 			site_means_x = windSpeed[ , 2]
132 | 		}
133 | 
134 | 		if (a_few_plots)
135 | 		{
136 | 			lines(sort(site_means_x), col='red3')
137 | 			lines(sort(site_means), col='red')
138 | 		}
139 | 	}
140 | 
141 | #################################
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 	##
148 | 	##  process the wind speed data
149 | 	##
150 | 
151 | 	# interpolate to half hourly
152 | 	if (halfHourly)
153 | 	{
154 | 		cat("    Interpolating to half hourly...\n")
155 | 		windSpeed = interpolate_wind_speed(windSpeed, resolution=30)
156 | 	}
157 | 
158 | 	# separate the date column from the numerics
159 | 	if (!exists('datecol'))
160 | 	{
161 | 		datecol = windSpeed[ , 1]
162 | 		windSpeed = windSpeed[ , -1]
163 | 	}
164 | 
165 | 	# inject noise into the new profile
166 | 	if (halfHourly)
167 | 	{
168 | 		cat("    Injecting gaussian noise into profile...\n")
169 | 		windSpeed = inject_noise_into_wind_speeds(windSpeed, noise_stdev)
170 | 	}
171 | 
172 | 	# make sure wind farm names line up in the two files
173 | 	windFarms$name = colnames(windSpeed)
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | #####
181 | ## ##  ESTABLISH THE PERFORMANCE RATIO OF ALL FARMS
182 | #####
183 | 
184 | 	##
185 | 	## transform wind speeds from the nasa ideal to the on-the-ground reality
186 | 
187 | 	# set the base performance for all farms
188 | 	performance = rep(1.000, ncol(windSpeed))
189 | 
190 | 	# modify these with the input file values if desired
191 | 	if (!is.null(PR_column))
192 | 	{
193 | 		performance = performance * windFarms[ , PR_column]
194 | 	}
195 | 
196 | 	# show a histogram of performance ratios...
197 | 	if (a_few_plots)
198 | 		plot(performance)
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | #####
212 | ## ##  CONVERT FROM SPEED TO POWER -- GENERATE LOAD FACTORS
213 | #####
214 | 
215 | 	flush('> Generating load factors with ')
216 | 	if (match_method == 1) flush(" fixed scalar same for every farm, finding offset\n")
217 | 	if (match_method == 2) flush(" fixed scalar from each farm's PR, finding offset\n")
218 | 	if (match_method == 3) flush(" fixed offset same for every farm, finding scalar\n")
219 | 	if (match_method == 4) flush(" fixed offset from each farm's PR, finding scalar\n")
220 | 
221 | 
222 | 	# to avoid passing the whole data.frame...
223 | 	colnames_windSpeed = colnames(windSpeed)
224 | 
225 | 
226 | 	##WEIBULL_HAXX
227 | 	#wxx = seq(0, 1, length.out=1e6)
228 | 	#w20 = qweibull(wxx, 2.0, 10)
229 | 	#w17 = qweibull(wxx, 1.7, 10)
230 | 	#hist(w18, xlim=c(0,40), breaks=100)
231 | 	#hist(w20, xlim=c(0,40), breaks=100)
232 | 	#plot_l(w20, w20/w18)
233 | 	#weibull_spline = spline(w20, w20/w18, xout=seq(0,40,0.001))
234 | 	##WEIBULL_HAXX
235 | 
236 | 
237 | 	# run through each farm in parallel building a huge mega-list of results
238 | 	results = foreach (speed=iter(windSpeed, by='col'), i=icount(), .combine='rbind') %dopar%
239 | 	{
240 | 		# farm name and turbine model
241 | 		myName = windFarms[i, 'name']
242 | 		myModel = windFarms[i, 'power_curve']
243 | 
244 | 		# valid variable version of farm name (column name in csv)
245 | 		# numeric farm names don't need an X on the front
246 | 		#myCol = make.names(myName)
247 | 		myCol = myName
248 | 		if (is.numeric(myCol))
249 | 				myCol = as.character(myCol)
250 | 
251 | 		myModel = make.names(myModel)
252 | 
253 | 		# my individual power curve - unmodified
254 | 		myCurve = turbCurve[ , myModel]
255 | 
256 | 		# check our name is ok
257 | 		if (myCol %notin% colnames_windSpeed)
258 | 			stop("Cannot find the farm:", myName, "\n\n")
259 | 
260 | 
261 | 
262 | 
263 | 		# interpolate any NAs (which i believe are all zeroish)
264 | 		missing = which(is.na(speed))
265 | 		if (length(missing) > 0)
266 | 			speed[missing] = spline(speed, xout=missing)$y
267 | 
268 | 
269 | 		# calculate load factors using the unmodified turbine power curve
270 | 		mylf = wind_speed_to_power_output(speed, myCurve)
271 | 		energyInitial = mean(mylf)
272 | 
273 | 		# calculate our target energy yield
274 | 		energyTarget = energyInitial * performance[i]
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 		##
281 | 		##  calculate modified yield using convoluted farm curve
282 | 
283 | 		# determine how to convolute this to a curve for this whole farm
284 | 		mySpread = get_power_curve_convolution_spread(windFarms[i, ])
285 | 
286 | 		# convolute to produce the power curve for this farm
287 | 		myCurve = farmCurve[ , myModel]
288 | 		myCurve = convoluteFarmCurve(myCurve, 0, mySpread)
289 | 
290 | 
291 | 		##WEIBULL_HAXX
292 | 		#weibull_adjuster = spline(w20, w20/w17, xout=speed)
293 | 		#speed = speed * weibull_adjuster$y
294 | 		##WEIBULL_HAXX
295 | 
296 | 
297 | 		# we choose to match using offsets, with fixed scalars
298 | 		if (match_method == 1 | match_method == 2)
299 | 		{
300 | 			# get the scalar for this farm
301 | 			myScalar = determine_farm_scalar(performance[i], windFarms$iso[i])
302 | 		
303 | 			# find the additive convolution offset that gives us the correct yield
304 | 			myOffset = find_farm_offset(speed, myCurve, myScalar, energyInitial, energyTarget)
305 | 		}
306 | 
307 | 		# we choose to match using scalars, with fixed offsets
308 | 		if (match_method == 3 | match_method == 4)
309 | 		{
310 | 			# determine the offset we should use for this farm
311 | 			myOffset = determine_farm_offset(performance[i])
312 | 
313 | 			# find the multiplicative scalar for wind speeds that gives us the correct yield
314 | 			myScalar = find_farm_scalar(speed, myCurve, myOffset, energyInitial, energyTarget)
315 | 		}
316 | 
317 | 		# calculate the final load factors of this farm
318 | 		mylf = wind_speed_to_power_output(speed, myCurve, myScalar, myOffset)
319 | 
320 | 		# make it clear if we got stuck
321 | 		if (myOffset < -20 | myOffset > 20 | myScalar < 0 | myScalar > 5)
322 | 			mylf = mylf * NA
323 | 
324 | 		# calculate the power output in MW
325 | 		mymw = mylf * windFarms[i, 'capacity']
326 | 
327 | 
328 | 
329 | 		# save the the corrected wind speeds if wanted
330 | 		if (save_modified_wind_speeds)
331 | 			speed = (speed + myOffset) * myScalar
332 | 
333 | 
334 | 
335 | 		# print our VWF convolution parameters
336 | 		mo = sprintf('%1.2f', myOffset)
337 | 		cs = sprintf('%1.2f', convolverStdDev)
338 | 		ms = sprintf('%3.1f%%', myScalar * 100)
339 | 		flush('->  [ws + N(', mo, ', ', cs, ')] x ', ms, '\n', sep='')
340 | 	
341 | 
342 | 		# save the VWF parameters for this farm
343 | 		parms = c(myName, energyInitial, performance[i], energyTarget, myOffset, myScalar, mySpread)
344 | 
345 | 		results = list(
346 | 			speed=speed,
347 | 			mylf=mylf,
348 | 			mymw=mymw,
349 | 			parms=parms
350 | 		)
351 | 
352 | 		return(results)
353 | 	}
354 | 
355 | 
356 | 
357 | 
358 | #####
359 | ## ##  EXTRACT RESULTS
360 | #####
361 | 
362 | 	# now process the mega-results table
363 | 
364 | 	# modified wind speeds
365 | 	windSpeed = as.data.frame(results[ , 1])
366 | 	colnames(windSpeed) = colnames_windSpeed
367 | 
368 | 	# load factors
369 | 	loadFactor = as.data.frame(results[ , 2])
370 | 	colnames(loadFactor) = colnames_windSpeed
371 | 
372 | 	# power outputs
373 | 	powerMW = as.data.frame(results[ , 3])
374 | 	colnames(powerMW) = colnames_windSpeed
375 | 
376 | 	# VWF model parameters
377 | 	parms = as.data.frame(results[ , 4], stringsAsFactors=FALSE)
378 | 	parms = as.data.frame(t(parms), stringsAsFactors=FALSE)
379 | 	for (i in colnames(parms)[-1])
380 | 		parms[ , i] = as.numeric(parms[ , i])
381 | 	colnames(parms) = c('name', 'original_LF', 'PR', 'desired_LF', 'offset', 'scalar', 'stdev')
382 | 
383 | 
384 | 	# conserve memory
385 | 	rm(results)
386 | 	gc()
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | #####
395 | ## ##  POST PROCESSING
396 | #####
397 | 
398 | 	if (a_few_plots)
399 | 	{
400 | 		# histogram of site load factors
401 | 		cf = parms$desired_LF * 100
402 | 		hist(cf, breaks=0:ceiling(max(cf)), col='grey90')
403 | 		abline(v=mean(cf), lwd=2, col='red3')
404 | 
405 | 		# map of capacity factors
406 | 		dev.set()
407 | 		cfc = colour_ramp_data(cf)
408 | 		plot_farms(windFarms, cap.weight=1, add.points=TRUE, pch='+', col=cfc)
409 | 		dev.set()
410 | 	}
411 | 
412 | 
413 | 	# run garbage collection to keep memory in check
414 | 	junk = capture.output(gc())
415 | 	


--------------------------------------------------------------------------------
/lib/VWF.MAIN.WINDSPEED.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 
 12 | #####
 13 | ## ##  READ IN EXISTING SPEEDS IF WE HAVE THEM
 14 | #####
 15 | 
 16 | # we have specified a custom wind speed file to use...
 17 | if (!is.null(windSpeedFile))
 18 | {
 19 | 	# check this file exists...
 20 | 	if (!file.exists(windSpeedFile))
 21 | 	{
 22 | 		flush("  OH NO! You specified a custom windSpeedFile, but it doesn't exist!\n")
 23 | 		 stop("        ", windSpeedFile, "\n")
 24 | 	}
 25 | }
 26 | 
 27 | # consider the default filename for wind speeds
 28 | if (is.null(windSpeedFile))
 29 | {
 30 | 	windSpeedFile = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% xtn
 31 | }
 32 | 
 33 | 
 34 | # if that file exists - read it in!
 35 | use_cached_speeds = file.exists(windSpeedFile)
 36 | 
 37 | if (use_cached_speeds)
 38 | {
 39 | 	flush('> Reading pre-compiled wind speeds...\n')
 40 | 	windSpeed = read_data(windSpeedFile)
 41 | }
 42 | 
 43 | 
 44 | 
 45 | 
 46 | 
 47 | #####
 48 | ## ##  RUN THROUGH EACH GROUP (MONTH / QUARTER) OF MERRA FILES, READ IN THE WIND SPEED FOR EACH WINDFARM & SAVE TO A TEMP FILE
 49 | #####
 50 | 
 51 | if (!use_cached_speeds)
 52 | {
 53 | 	flush('> Generating wind speeds... go and use the [progress meter] file to monitor how things are going...\n')
 54 | 	flush('  Progress files being written to', baseSaveFolder, '\n')
 55 | 
 56 | 	windSpeed = foreach (g = merra_wind$groups, .combine=rbind, .packages=c('ncdf4', 'lubridate')) %dopar%
 57 | 	{
 58 | 
 59 | 		# establish our progress reporting
 60 | 		progress_file = paste0(baseSaveFolder, '~progress~', baseSaveFile, '.', g, '.txt')
 61 | 		write('Wind speed processing started', progress_file)
 62 | 		date_1 = head(merra_wind$dates, 1)
 63 | 		date_2 = tail(merra_wind$dates, 1)
 64 | 		date_range = paste(format(date_1), 'to', format(date_2))
 65 | 		write(date_range, progress_file, append=TRUE)
 66 | 		write('\n', progress_file, append=TRUE)
 67 | 
 68 | 
 69 | 
 70 | 		# prepare this month's input files
 71 | 		nFiles = length(merra_wind$index[[g]])
 72 | 		cat('Processing', g, '=', nFiles, 'files...\n')
 73 | 
 74 | 		# prepare our results storage
 75 | 		# this holds the estimated wind speeds, rows=hours, columns=farms
 76 | 		outputCols = c("Timestamp", windFarms$name)
 77 | 		speed = data.frame( matrix(NA, nrow=nFiles*24, ncol=length(outputCols) ) )
 78 | 		colnames(speed) = outputCols
 79 | 
 80 | 		# initialise our hour counter
 81 | 		hour = 0
 82 | 
 83 | 
 84 | 		###
 85 | 		###  READ EACH FILE (DAY) IN TURN
 86 | 		###
 87 | 		for (f in merra_wind$index[[g]])
 88 | 		{
 89 | 
 90 | 			##
 91 | 			##  READ IN THE NETCDF DATA
 92 | 			##
 93 | 
 94 | 			# read in the wind profile data
 95 | 			fn = merra_wind$files[f]
 96 | 			nc_wind$open_file(fn)
 97 | 
 98 | 			# make a vector of our 24 timestamps
 99 | 			nc_wind_date = ymd(merra_wind$dates[f])
100 | 			timeVector = nc_wind_date %&% ' ' %&% sprintf("%02d:00", nc_wind$hour)  #### horror hack
101 | 
102 | 			# apply this to our storage
103 | 			todays_hours = (hour+1):(hour+24)
104 | 			speed[todays_hours, 1] = timeVector
105 | 
106 | 			# update
107 | 			#flush('Reading', format(nc_wind$date))
108 | 			write(format(nc_wind$date), progress_file, append=TRUE)
109 | 
110 | 
111 | 		
112 | 			# extract our extrapolation parameters - format is A[lon, lat, time]
113 | 			A = nc_wind$get_var('A')
114 | 			z = nc_wind$get_var('z')
115 | 		
116 | 			# close this input file
117 | 			nc_wind$close_file()
118 | 
119 | 
120 | 
121 | 
122 | 			# if we are crossing the date line, reshape our data to contain what we need
123 | 			if (dateLineMadness)
124 | 			{
125 | 				A = A[ c(dateLineWest, dateLineEast), , ]
126 | 				z = z[ c(dateLineWest, dateLineEast), , ]
127 | 				dimnames(A)[[1]] = dateLineDims
128 | 				dimnames(z)[[1]] = dateLineDims
129 | 				nc_wind$lon = dateLineDims
130 | 			}
131 |  
132 | 
133 | 
134 | 
135 | 			##
136 | 			##  SPATIAL INTERPOLATION AND HEIGHT EXTRAPOLATION
137 | 			##
138 | 
139 | 			# interpolate each hour of the day
140 | 			for (h in 1:24)
141 | 			{
142 | 				# initialise the wind speeds for all farms
143 | 				s = rep(NA, nrow(windFarms))
144 | 
145 | 				# run through each height in turn
146 | 				for (height in all_heights)
147 | 				{				
148 | 					# get wind speeds at this height
149 | 					w = A[ , , h] * log(height / z[ , , h])
150 | 
151 | 					# locate the farms we care about
152 | 					my_farms = (windFarms$height == height)
153 | 
154 | 					# loess interpolation
155 | 					if (spatial.method == 'loess')
156 | 					{
157 | 						# create a loess fit
158 | 						loess_w$value = as.vector(w)
159 | 						loess_fit = loess(value ~ lon*lat, data=loess_w, degree=2, span=loessSpan)
160 | 
161 | 						# interpolate speeds to these farms
162 | 						s[my_farms] = predict(loess_fit, newdata=windFarms[my_farms, ])
163 | 					}
164 | 
165 | 					# spline interpolation
166 | 					if (spatial.method == 'akima')
167 | 					{
168 | 						# cubic spline our speeds
169 | 						s[my_farms] = bicubic(nc_wind$lon, nc_wind$lat, w, windFarms$lon[my_farms], windFarms$lat[my_farms])$z
170 | 					}
171 | 
172 | 				}
173 | 
174 | 				# append these extrapolated speeds - forbidding silliness like negative speeds
175 | 				hour = hour + 1
176 | 				speed[hour, -1] = pmax(0, s)
177 | 			}
178 | 
179 | 
180 | 			# if we are crossing the date line, repair the damage we did
181 | 			if (dateLineMadness)
182 | 			{
183 | 				nc_wind$lon = originalDims
184 | 			}
185 | 
186 | 		}
187 | 
188 | 
189 | 		# save the wind speeds if requested
190 | 		if (save_original_wind_speeds)
191 | 		{
192 | 			fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% g %&% '.' %&% xtn
193 | 			write_data(speed, fn, xtn)
194 | 			flush('\n ~ Written', fn)
195 | 		}
196 | 
197 | 
198 | 		# could return the wind speeds for joining in memory?
199 | 		# speed
200 | 
201 | 	} # finished processing all files - end of parallel loop
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 	###
210 | 	###  CLEAR OUT TEMPORARY FILES
211 | 	###  AND BIND ALL WIND SPEEDS TOGETHER
212 | 	###
213 | 	
214 | 	# clear out the progress files
215 | 	progress_file = paste0(baseSaveFolder, '~progress~', baseSaveFile, '.', merra_wind$groups, '.txt')
216 | 	progress_file = progress_file[ file.exists(progress_file) ]
217 | 	if (length(progress_file) > 0) file.remove(progress_file)
218 | 
219 | 	# concatenate all the results files
220 | 	fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% merra_wind$groups %&% '.' %&% xtn
221 | 	windSpeed = read_data_bind_rows(fn, verbose=FALSE)
222 | 
223 | 	# and save
224 | 	big_fn = baseSaveFolder %&% baseSaveFile %&% 'windspeed.' %&% xtn
225 | 	flush('~ Writing', big_fn, '\n')
226 | 	write_data(windSpeed, big_fn, xtn)
227 | 
228 | 	# and remove the intermediates
229 | 	file.remove(fn)
230 | 
231 | }
232 | 
233 | 
234 | 
235 | 
236 | #####
237 | ## ##  FINISH OFF
238 | #####
239 | 
240 | 	# kill the plot of wind farm locations
241 | 	if (a_few_plots) try( dev.off(), silent=TRUE )
242 | 	flush('>', nrow(windSpeed), 'hours of wind speeds calculated for', ncol(windSpeed), 'farms\n')
243 | 


--------------------------------------------------------------------------------
/lib/VWF.NCDF.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | 	suppressPackageStartupMessages(library(ncdf4))
 12 | 	suppressPackageStartupMessages(library(lubridate))
 13 | 
 14 | 
 15 | 
 16 | 
 17 | #######################################################################################################################
 18 | ##
 19 | ##  locate all the MERRA files in the given folder(s) - note that all files are found, regardless of extension!
 20 | ##  extract all their dates and create an index that groups them by 'monthly' or 'daily'
 21 | ##  return a list with $files $dates $groups $index
 22 | ##
 23 | ##  if grouping='daily' then you get faux-groups which allows the group-loop code to still work
 24 | ##
 25 | prepare_merra_files = function(folders, grouping='monthly', processNewestFirst=FALSE)
 26 | {
 27 | 	merra = list()
 28 | 
 29 | 	# find NetCDF files within our folders
 30 | 	merra$files = list.files(folders, full.names=TRUE)
 31 | 
 32 | 	if (length(merra$files) == 0)
 33 | 		stop("prepare_merra_files -- no files found in the given folder:", folders, "\n\n")
 34 | 
 35 | 	# extract the dates from files
 36 | 	if (grepl('Nx', merra$files[1])) {
 37 | 		merra$dates = get_text_between(merra$files, 'Nx.', '.nc')
 38 | 	} else {
 39 | 		merra$dates = get_text_between(merra$files, 'profile.', '.nc')
 40 | 	}
 41 | 	merra$dates = get_text_before(merra$dates, '.SUB')
 42 | 	merra$dates = ymd(merra$dates)
 43 | 
 44 | 	# create an index for grouping files
 45 | 	merra$index = list()
 46 | 
 47 | 	# group files by quarter
 48 | 	if (grouping == 'quarterly')
 49 | 	{
 50 | 		all_quarters = paste0( year(merra$dates), 'Q', ceiling(month(merra$dates)/3))
 51 | 		merra$groups = unique(all_quarters)
 52 | 
 53 | 		for (g in merra$groups)
 54 | 		{
 55 | 			filter = which( all_quarters == g )
 56 | 			merra$index[[g]] = filter
 57 | 		}
 58 | 	}
 59 | 
 60 | 	# group files by months
 61 | 	if (grouping == 'monthly')
 62 | 	{
 63 | 		merra$groups = floor_date(merra$dates, 'month')
 64 | 		merra$groups = format(unique(merra$groups))
 65 | 
 66 | 		for (g in merra$groups)
 67 | 		{
 68 | 			filter = which( year(merra$dates) == year(g) & month(merra$dates) == month(g) )
 69 | 			merra$index[[g]] = filter
 70 | 		}
 71 | 	}
 72 | 
 73 | 	# create a dummy $index which goes through each file individually
 74 | 	if (grouping == 'daily')
 75 | 	{
 76 | 		merra$groups = format(merra$dates)
 77 | 
 78 | 		for (g in merra$groups)
 79 | 		{			
 80 | 			filter = which(merra$dates == g)
 81 | 			merra$index[[g]] = filter
 82 | 		}
 83 | 	}
 84 | 
 85 | 
 86 | 	# run in reverse order if desired
 87 | 	if (processNewestFirst == TRUE)
 88 | 		merra$groups = rev(merra$groups)
 89 | 
 90 | 
 91 | 	# return the list
 92 | 	merra
 93 | }
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | #######################################################################################################################
105 | ##
106 | ##  a class for opening and reading NetCDF files
107 | ##
108 | NetCdfClass = function(filename, model, printInfo=FALSE)
109 | {
110 | 
111 | 	#####
112 | 	## ##  MEMBERS
113 | 	#####
114 | 
115 | 	NetCdf = list(
116 | 		
117 | 		ncdf = NULL,
118 | 
119 | 		all_vars = NULL,
120 | 		all_desc = NULL,
121 | 		all_dims = NULL,
122 | 
123 | 		date = NULL,
124 | 		hour = NULL,
125 | 		levels = NULL,
126 | 
127 | 		lon = NULL,
128 | 		lat = NULL,
129 | 		subset = NULL,
130 | 
131 | 		model = NULL,
132 | 
133 | 		calendar = NULL
134 | 	)
135 | 
136 | 
137 | 
138 | 	#####
139 | 	## ##  METHODS
140 | 	#####
141 | 
142 | 	#
143 | 	# constructor
144 | 	#   opens file
145 | 	#   defines the latitude and longitudes contained within it
146 | 	NetCdf$constructor = function(filename, model, printInfo=FALSE)
147 | 	{
148 | 		# set our reanalysis model
149 | 		NetCdf$model = model
150 | 
151 | 		# open - getting our date and time
152 | 		NetCdf$open_file(filename, printInfo)
153 | 
154 | 		# get our coordinate system
155 | 		if (NetCdf$model == 'MERRA2')
156 | 		{
157 | 			NetCdf$lon = ncvar_get(NetCdf$ncdf, 'lon')
158 | 			NetCdf$lat = ncvar_get(NetCdf$ncdf, 'lat')
159 | 		}
160 | 
161 | 		if (NetCdf$model == 'MERRA1')
162 | 		{
163 | 			NetCdf$lon = ncvar_get(NetCdf$ncdf, 'longitude')
164 | 			NetCdf$lat = ncvar_get(NetCdf$ncdf, 'latitude')
165 | 		}
166 | 
167 | 		if (NetCdf$model == 'MERRA1NEW')
168 | 		{
169 | 			NetCdf$lon = ncvar_get(NetCdf$ncdf, 'XDim')
170 | 			NetCdf$lat = ncvar_get(NetCdf$ncdf, 'YDim')
171 | 		}
172 | 
173 | 		if (NetCdf$model == 'CMIP5')
174 | 		{
175 | 			NetCdf$lon = ncvar_get(NetCdf$ncdf, 'lon')
176 | 			NetCdf$lat = ncvar_get(NetCdf$ncdf, 'lat')
177 | 		}		
178 | 
179 | 
180 | 
181 | 		# get our pressure levels too, if present
182 | 		if ('levels' %in% names(NetCdf$ncdf$dim))
183 | 			NetCdf$levels = ncvar_get(NetCdf$ncdf, 'levels')
184 | 
185 | 		# default to no subsetting
186 | 		NetCdf$subset = FALSE
187 | 
188 | 		# save
189 | 		assign('NetCdf', NetCdf, envir=NetCdf)
190 | 	}
191 | 
192 | 
193 | 	#
194 | 	# open a NetCDF file
195 | 	# set ncdf (the file handle), date and hour
196 | 	#
197 | 	# state which reanalysis this is: 'MERRA1' or 'MERRA2'
198 | 	#
199 | 	# optionally describes the file contents
200 | 	#
201 | 	NetCdf$open_file = function(filename, printInfo=FALSE)
202 | 	{
203 | 		# open
204 | 		NetCdf$ncdf = nc_open(filename)
205 | 		nc = NetCdf$ncdf
206 | 
207 | 		# list all the variables inside it
208 | 		for (i in 1:nc$nvar)
209 | 		{
210 | 			var = nc$var[[i]]
211 | 			push(NetCdf$all_vars, var$name)
212 | 
213 | 			desc = var$longname %&% ' [' %&% paste(var$size[1], var$size[2], var$size[3], sep=', ') %&% ']'	
214 | 			push(NetCdf$all_desc, desc)
215 | 		}
216 | 
217 | 		# list all the dimensions inside it
218 | 		for (i in 1:nc$ndim)
219 | 		{
220 | 			dim = nc$dim[[i]]
221 | 			push(NetCdf$all_dims, dim$name)
222 | 		}
223 | 
224 | 
225 | 
226 | 		# get the date and time for this file
227 | 		if (NetCdf$model == 'MERRA2')
228 | 		{
229 | 			NetCdf$date = ncatt_get(nc, 0, 'RangeBeginningDate')$value
230 | 			NetCdf$date = ymd(NetCdf$date)
231 | 
232 | 			NetCdf$hour = ncvar_get(nc, 'time') / 60
233 | 		}
234 | 
235 | 		if (NetCdf$model == 'MERRA1')
236 | 		{
237 | 			NetCdf$date = ncatt_get(nc, 'time', 'units')$value
238 | 			NetCdf$date = strsplit(NetCdf$date, ' ')[[1]][3]
239 | 			NetCdf$date = ymd(NetCdf$date)
240 | 
241 | 			NetCdf$hour = ncvar_get(nc, 'time')
242 | 		}
243 | 
244 | 		if (NetCdf$model == 'MERRA1NEW')
245 | 		{
246 | 			NetCdf$date = ncatt_get(nc, 'TIME', 'units')$value
247 | 			NetCdf$date = strsplit(NetCdf$date, ' ')[[1]][3]
248 | 			NetCdf$date = ymd(NetCdf$date)
249 | 
250 | 			NetCdf$hour = ncvar_get(nc, 'TIME') / 60
251 | 		}
252 | 
253 | 		# get the date and time for this file
254 | 		if (NetCdf$model == 'CMIP5')
255 | 		{
256 | 			# note that CMIP is just completely stupid and has 360 day years... 
257 | 			# we embed a strange string into the data dimnames just for the record...
258 | 			# and we set up the NetCdf$calendar variable to allow for easy access to all the date components
259 | 			calendar = data.frame(index=1:14400, year=NA, season=NA, hour=NA)
260 | 
261 | 			# build a calendar to know the years,seasons,hours
262 | 			calendar$year = rep(2026:2030, each=360*8)
263 | 
264 | 			calendar$month = rep(1:12, each=30*8)
265 | 
266 | 			calendar$season = calendar$month
267 | 			f = (calendar$season == 12)
268 | 			calendar$season[f] = 0
269 | 			calendar$season = floor(calendar$season / 3)  # winter = 0, spring = 1, summer = 2, autumn = 3
270 | 
271 | 			calendar$hour = rep(c(3, 6, 9, 12, 15, 18, 21, 0), times=360*5)
272 | 
273 | 			# assign
274 | 			NetCdf$hour = paste('Y', calendar$year, 'M', calendar$month, 'H', calendar$hour, sep='')
275 | 
276 | 			NetCdf$calendar = calendar
277 | 		}
278 | 
279 | 
280 | 		# HACK - i didn't update the dates correctly in the MERRA2 profile files...
281 | 		# HACK - they are identifiable because they are called .profile.
282 | 		if (grepl('profile', filename))
283 | 		{
284 | 			# so... extract the date from the filename
285 | 			d = get_text_between(filename, 'profile.', '.nc')
286 | 			d = get_text_before(d, '.SUB')
287 | 			NetCdf$date = ymd(d)
288 | 		}
289 | 
290 | 
291 | 
292 | 		# describe contents
293 | 		if (printInfo)
294 | 		{
295 | 			# annoyingly, this is now hyper-detailed...
296 | 			# print(NetCdf$ncdf)
297 | 
298 | 			cat(nc$ndim, 'Dimensions:\n')
299 | 			for (i in 1:nc$ndim)
300 | 			{
301 | 				d = NetCdf$all_dims[i]
302 | 
303 | 				cat('\t')
304 | 
305 | 				if (d == 'time')
306 | 				{
307 | 					cat(sprintf('%10s', 'time'), '\t')
308 | 
309 | 					t_min = suppressWarnings(min(NetCdf$hour))
310 | 					t_max = suppressWarnings(max(NetCdf$hour))
311 | 
312 | 					cat(format(NetCdf$date, "%Y-%m-%d"), ' - ', t_min, ':', t_min)
313 | 
314 | 				} else if (d == 'longitude' | d == 'lon')
315 | 				{
316 | 					lon = ncvar_get(NetCdf$ncdf, d)
317 | 					cat(sprintf('%10s', 'longitude'), '\t')
318 | 					cat(min(lon), ':', max(lon))
319 | 
320 | 				} else if (d == 'latitude' | d == 'lat')
321 | 				{
322 | 					lat = ncvar_get(NetCdf$ncdf, d)
323 | 					cat(sprintf('%10s', 'latitude'), '\t')
324 | 					cat(min(lat), ':', max(lat))
325 | 
326 | 				} else
327 | 				{
328 | 					cat(d)
329 | 				}
330 | 
331 | 				cat('\n')
332 | 			}
333 | 
334 | 			cat(nc$nvar, 'Variables:\n')
335 | 			for (i in 1:nc$nvar)
336 | 				cat('\t', sprintf('%10s', NetCdf$all_vars[i]), '\t', NetCdf$all_desc[i], '\n')
337 | 		}
338 | 
339 | 		# save
340 | 		assign('NetCdf', NetCdf, envir=NetCdf)
341 | 	}
342 | 
343 | 
344 | 	#
345 | 	# close a NetCDF file
346 | 	#
347 | 	NetCdf$close_file = function()
348 | 	{
349 | 		nc_close(NetCdf$ncdf)
350 | 		assign('NetCdf', NetCdf, envir=NetCdf)
351 | 	}
352 | 
353 | 
354 | 
355 | 
356 | 	#
357 | 	# set up the internals so that future reads from NetCDF files are subsetted to a particular region
358 | 	# 
359 | 	# region is a list containing $lon[min,max] and $lat[min,max]
360 | 	#
361 | 	NetCdf$subset_coords = function(region)
362 | 	{
363 | 		# remove fools
364 | 		if (length(region$lon) != 2 | length(region$lat) != 2)
365 | 		{
366 | 			cat("NetCdf::subset_coords() -- must pass a list of $lon[min,max] $lat[min,max]\n")
367 | 			return()
368 | 		}
369 | 
370 | 		# get our spatial resolution
371 | 		dx = diff(NetCdf$lon)[1]
372 | 		dy = diff(NetCdf$lat)[1]
373 | 
374 | 		# expand the region to make sure we include the points adjacent to our range
375 | 		region$lon[1] = region$lon[1] - dx - 0.01
376 | 		region$lon[2] = region$lon[2] + dx + 0.01
377 | 		region$lat[1] = region$lat[1] - dy - 0.01
378 | 		region$lat[2] = region$lat[2] + dy + 0.01
379 | 
380 | 		# define which parts of the file live within our region
381 | 		myLon = which(NetCdf$lon >= region$lon[1] & NetCdf$lon <= region$lon[2])
382 | 		myLat = which(NetCdf$lat >= region$lat[1] & NetCdf$lat <= region$lat[2])
383 | 
384 | 		# store the start and count parameters to use when reading variables
385 | 		NetCdf$start = c(min(myLon), min(myLat), 1)
386 | 		NetCdf$count = c(length(myLon), length(myLat), -1)
387 | 
388 | 		# chop down our lon and lat vectors to this region
389 | 		NetCdf$lon = NetCdf$lon[myLon]
390 | 		NetCdf$lat = NetCdf$lat[myLat]
391 | 
392 | 		# and note that we have done this chopping
393 | 		NetCdf$subset = TRUE
394 | 
395 | 		# save
396 | 		assign('NetCdf', NetCdf, envir=NetCdf)
397 | 	}
398 | 
399 | 
400 | 
401 | 
402 | 	#
403 | 	# read a variable from the open NetCDF file
404 | 	# if you have defined a region with subset_coords(), employ subsetted reading to save time
405 | 	#
406 | 	NetCdf$get_var = function(var)
407 | 	{
408 | 		# check the var we are reading exists
409 | 		var_ok = (var %in% NetCdf$all_vars)
410 | 		if (!var_ok)
411 | 		{
412 | 			# check if it's a stupid case error in MERRA
413 | 			var_ok = (toupper(var) %in% toupper(NetCdf$all_vars))
414 | 			if (!var_ok)
415 | 			{
416 | 				flush("NetCdf$get_var -- your var [", var, "] doesn't exist in this reanalysis...\n")
417 | 				flush("Available variables are:", NetCdf$all_vars, "\n\n")
418 | 			}
419 | 
420 | 			if (var_ok)
421 | 			{
422 | 				# figure out the actual name
423 | 				old_var = var
424 | 				var = match(toupper(var), toupper(NetCdf$all_vars))
425 | 				var = NetCdf$all_vars[var]
426 | 
427 | 				flush("NetCdf$get_var -- you requested [", old_var, "], giving you [", var, "]\n")
428 | 			}
429 | 		}
430 | 
431 | 		# if we want to read from the entire file...
432 | 		if (NetCdf$subset == FALSE)
433 | 			data = ncvar_get(NetCdf$ncdf, var)
434 | 		
435 | 		# if we want to read from a specific region...
436 | 		if (NetCdf$subset == TRUE)
437 | 			data = ncvar_get(NetCdf$ncdf, var, NetCdf$start, NetCdf$count)
438 | 
439 | 		# encode our longitude, latitude and time into the variable's dimensions
440 | 		dimnames(data) = list(NetCdf$lon, NetCdf$lat, NetCdf$hour)
441 | 		return(data)
442 | 	}
443 | 
444 | 	#
445 | 	# same as above, but works for 3d variables (with various levels)
446 | 	# 
447 | 	NetCdf$get_var_3d = function(var)
448 | 	{
449 | 		# if we want to read from the entire file...
450 | 		if (NetCdf$subset == FALSE)
451 | 			data = ncvar_get(NetCdf$ncdf, var)
452 | 		
453 | 		# if we want to read from a specific region...
454 | 		if (NetCdf$subset == TRUE)
455 | 		{
456 | 			# modify start and count to include the fourth dimension (pressure levels)
457 | 			start = c(NetCdf$start, 1)
458 | 			count = c(NetCdf$count, -1)
459 | 
460 | 			data = ncvar_get(NetCdf$ncdf, var, start, count)
461 | 		}
462 | 
463 | 		# encode our longitude, latitude and time into the variable's dimensions
464 | 		dimnames(data) = list(NetCdf$lon, NetCdf$lat, NetCdf$levels, NetCdf$hour)
465 | 
466 | 		return(data)
467 | 	}
468 | 
469 | 
470 | 
471 | 
472 | 	#
473 | 	# read the wind speed at 50 metres
474 | 	#
475 | 	NetCdf$get_speed50m = function(min=0, max=NA)
476 | 	{
477 | 		# format is u50m[lon, lat, time]
478 | 		if (NetCdf$model == 'MERRA2')
479 | 		{
480 | 			u50m = NetCdf$get_var('U50M')
481 | 			v50m = NetCdf$get_var('V50M')
482 | 		}
483 | 
484 | 		if (NetCdf$model == 'MERRA1')
485 | 		{
486 | 			u50m = NetCdf$get_var('u50m')
487 | 			v50m = NetCdf$get_var('v50m')
488 | 		}
489 | 
490 | 		if (NetCdf$model == 'MERRA1NEW')
491 | 		{
492 | 			u50m = NetCdf$get_var('U50M')
493 | 			v50m = NetCdf$get_var('V50M')
494 | 		}
495 | 
496 | 		# calculate the speed from pythagoras
497 | 		speed50m = sqrt(u50m^2 + v50m^2)
498 | 
499 | 		# constrain speeds to our given limits (useful if our plot colours don't go higher than that)
500 | 		if (!is.na(min))
501 | 			speed50m[ speed50m < min ] = min
502 | 
503 | 		if (!is.na(max))
504 | 			speed50m[ speed50m > max ] = max
505 | 
506 | 		# return
507 | 		return(speed50m)
508 | 	}
509 | 
510 | 	NetCdf$get_speed10m = function(min=0, max=NA)
511 | 	{
512 | 		# format is u10m[lon, lat, time]
513 | 		if (NetCdf$model == 'MERRA2')
514 | 		{
515 | 			u10m = NetCdf$get_var('U10M')
516 | 			v10m = NetCdf$get_var('V10M')
517 | 		}
518 | 
519 | 		if (NetCdf$model == 'MERRA1')
520 | 		{
521 | 			u10m = NetCdf$get_var('u10m')
522 | 			v10m = NetCdf$get_var('v10m')
523 | 		}
524 | 
525 | 		if (NetCdf$model == 'MERRA1NEW')
526 | 		{
527 | 			u10m = NetCdf$get_var('U10M')
528 | 			v10m = NetCdf$get_var('V10M')
529 | 		}
530 | 
531 | 		# calculate the speed from pythagoras
532 | 		speed10m = sqrt(u10m^2 + v10m^2)
533 | 
534 | 		# constrain speeds to our given limits (useful if our plot colours don't go higher than that)
535 | 		if (!is.na(min))
536 | 			speed10m[ speed10m < min ] = min
537 | 
538 | 		if (!is.na(max))
539 | 			speed10m[ speed10m > max ] = max
540 | 
541 | 		# return
542 | 		return(speed10m)
543 | 	}
544 | 
545 | 	NetCdf$get_speed02m = function(min=0, max=NA)
546 | 	{
547 | 		# format is u02m[lon, lat, time]
548 | 		if (NetCdf$model == 'MERRA2')
549 | 		{
550 | 			u02m = NetCdf$get_var('U2M')
551 | 			v02m = NetCdf$get_var('V2M')
552 | 		}
553 | 
554 | 		if (NetCdf$model == 'MERRA1')
555 | 		{
556 | 			u02m = NetCdf$get_var('u2m')
557 | 			v02m = NetCdf$get_var('v2m')
558 | 		}
559 | 
560 | 		if (NetCdf$model == 'MERRA1NEW')
561 | 		{
562 | 			u02m = NetCdf$get_var('U2M')
563 | 			v02m = NetCdf$get_var('V2M')
564 | 		}
565 | 
566 | 		# calculate the speed from pythagoras
567 | 		speed02m = sqrt(u02m^2 + v02m^2)
568 | 
569 | 		# constrain speeds to our given limits (useful if our plot colours don't go higher than that)
570 | 		if (!is.na(min))
571 | 			speed02m[ speed02m < min ] = min
572 | 
573 | 		if (!is.na(max))
574 | 			speed02m[ speed02m > max ] = max
575 | 
576 | 		# return
577 | 		return(speed02m)
578 | 	}
579 | 
580 | 
581 | 
582 | 
583 | 
584 | 
585 | 	#####
586 | 	## ##  BUILD CLASS
587 | 	#####
588 | 
589 | 	# define this as an environment
590 | 	NetCdf = list2env(NetCdf)
591 | 
592 | 	# set the class type
593 | 	class(NetCdf) = 'NetCdfClass'
594 | 
595 | 	# run the constructor
596 | 	NetCdf$constructor(filename, model, printInfo)
597 | 
598 | 	# return
599 | 	return(NetCdf)
600 | }
601 | 


--------------------------------------------------------------------------------
/lib/VWF.PLOTS.R:
--------------------------------------------------------------------------------
  1 | ##################################################################
  2 | #                                                                #
  3 | #  BSD 3-Clause License                                          #
  4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
  5 | #  All rights reserved.                                          #
  6 | #                                                                #
  7 | ##################################################################
  8 | 
  9 | 
 10 | 
 11 | #######################################################################################################################
 12 | ##
 13 | ##
 14 | ##   generic plotting functions:
 15 | ##
 16 | ##      plot_ncdf(data, mainTitle, legendTitle, latRange, lonRange, zRange, zConstrain, levels, aspect, myColours)
 17 | ##      .
 18 | ##      .
 19 | ##      .  data = a single hour of netcdf data e.g. w50m[lon, lat]
 20 | ##		          the longitude and latitute are read from the dimension names of data
 21 | ##      .
 22 | ##      .  mainTitle = text to go above the main plot (i.e. your variable)
 23 | ##      .  legendTitle = text to go above the legend scale (i.e. your units)
 24 | ##      .
 25 | ##      .  latRange and lonRange = optionally control the extent of the map, defaults to something sensible 
 26 | ##      .  zRange = optionally control the range of values to be plotted, defaults to the full data range
 27 | ##      .  zConstrain = set to TRUE to cap speed data to this range.  without this, the plot breaks outside the limits
 28 | ##      .
 29 | ##      .  levels = optionally control the number of colours to use in the plot, high numbers look smoother but render slower
 30 | ##      .  aspect = optionally specify the aspect ratio for displaying and printing the map
 31 | ##      .
 32 | ##      .  myColours = optionally specify a colorRampPalette to use, defaults to some nice heat colours
 33 | ##
 34 | ##
 35 | ##      full_ncdf(data, ..., dateTime) - same as above but with no axes, legends, etc.  suitable for converting into videos
 36 | ##      .
 37 | ##      . dateTime = set to TRUE to add three boxes in the southern oceans giving the current date and time
 38 | ##
 39 | ##
 40 | ##      print_ncdf(filename, data, width, height, res, ...) - draws a fullscreen plot straight to file - saving on rendering time
 41 | ##
 42 | ##
 43 | ##
 44 | ##
 45 | ##
 46 | ##   wind plotting functions: (same as above, but with good formatting for wind speeds)
 47 | ##
 48 | ##      plot_ncdf_wind(data)
 49 | ##      full_ncdf_wind(data)
 50 | ##      print_ncdf_wind(filename, data)
 51 | ##
 52 | ##
 53 | ##
 54 | ##
 55 | ##   then there are some testing functions (looking at other ways to plot, which aren't as good)
 56 | ##   and the base function that they all rely on:
 57 | ##
 58 | ##      filled.contour.is(...)
 59 | ##
 60 | ##
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | #######################################################################################################################
 67 | ##
 68 | ##  GENERIC PLOTTING FUNCTIONS
 69 | ##
 70 | ##
 71 | ##
 72 | ##  a generic function for plotting any NetCDF variable
 73 | ##
 74 | plot_ncdf = function(data, mainTitle='', legendTitle='', lonRange=NULL, latRange=NULL, zRange=range(data), zConstrain=FALSE, levels=64, aspect=NULL, myColours=NULL)
 75 | {
 76 | 	# get our world map shape (low res, for faster drawing)
 77 | 	if (!exists('world.map'))
 78 | 		world.map <<- get_world_map(res='low')
 79 | 
 80 | 
 81 | 	# get our coordinates
 82 | 	lon = as.numeric(dimnames(data)[[1]])
 83 | 	lat = as.numeric(dimnames(data)[[2]])
 84 | 
 85 | 
 86 | 	# calculate our default lon & lat ranges if needed
 87 | 	if (is.null(lonRange))
 88 | 		lonRange = range(lon)
 89 | 
 90 | 	if (is.null(latRange))
 91 | 		latRange = range(lat)
 92 | 
 93 | 
 94 | 	# colour scheme for the plot
 95 | 	if (is.null(myColours))
 96 | 	{
 97 | 		heat.colours = c('#3F168A', '#2049D0', '#3288BD', '#66C2A5', '#ABDDA4', '#E6F598', '#FFFFBF', '#FEE08B', '#FDAE61', '#F46D43', '#D53E4F', '#9E0142')
 98 | 		myColours = colorRampPalette(heat.colours)
 99 | 	}
100 | 		
101 | 
102 | 	# constrain data to our desired zRange
103 | 	if (zConstrain)
104 | 	{
105 | 		data[data < zRange[1] ] = zRange[1];
106 | 		data[data > zRange[2] ] = zRange[2];
107 | 	}
108 | 
109 | 
110 | 	# aspect ratio of the map
111 | 	if (is.null(aspect))
112 | 		aspect = 2.5 * (max(lat)-min(lat)) / (max(lon)-min(lon))
113 | 
114 | 
115 | 	# plot
116 | 	filled.contour.is(lon, lat, data, 
117 | 		nlevels=levels, color.palette=myColours,
118 | 		asp=aspect, xlim=lonRange, ylim=latRange, zlim=zRange,
119 | 		key.title = title(main=legendTitle), plot.title = title(main=mainTitle, xlab='Longitude', ylab='Latitude'),
120 | 		plot.axes = { plot(world.map, xlim=lonRange, ylim=latRange, add=TRUE); axis(1); axis(2) }
121 | 	)
122 | }
123 | 
124 | 
125 | 
126 | 
127 | 
128 | ##
129 | ##  a generic function for plotting any NetCDF variable fullscreen
130 | ##
131 | full_ncdf = function(data, mainTitle='', legendTitle='', lonRange=NULL, latRange=NULL, zRange=range(data), zConstrain=FALSE, levels=1024, aspect=NULL, myColours=NULL, dateTime=FALSE)
132 | {
133 | 	# get our world map shape (high res for better visuals)
134 | 	if (!exists('world.map'))
135 | 		world.map <<- get_world_map(res='high')
136 | 
137 | 
138 | 	# get our coordinates
139 | 	lon = as.numeric(dimnames(data)[[1]])
140 | 	lat = as.numeric(dimnames(data)[[2]])
141 | 
142 | 
143 | 	# calculate our default lon & lat ranges if needed
144 | 	if (is.null(lonRange))
145 | 		lonRange = range(lon)
146 | 
147 | 	if (is.null(latRange))
148 | 		latRange = range(lat)
149 | 
150 | 
151 | 	# colour scheme for the plot
152 | 	if (is.null(myColours))
153 | 	{
154 | 		heat.colours = c('#3F168A', '#2049D0', '#3288BD', '#66C2A5', '#ABDDA4', '#E6F598', '#FFFFBF', '#FEE08B', '#FDAE61', '#F46D43', '#D53E4F', '#9E0142')
155 | 		myColours = colorRampPalette(heat.colours)
156 | 	}
157 | 		
158 | 
159 | 	# constrain data to our desired zRange
160 | 	if (zConstrain)
161 | 	{
162 | 		data[data < zRange[1] ] = zRange[1];
163 | 		data[data > zRange[2] ] = zRange[2];
164 | 	}
165 | 
166 | 
167 | 	# aspect ratio of the map
168 | 	if (is.null(aspect))
169 | 		aspect = 2.5 * (max(lat)-min(lat)) / (max(lon)-min(lon))
170 | 
171 | 
172 | 	# remove the borders for full-screen
173 | 	omar <- par(mar = par('mar'))
174 | 	on.exit(par(omar))
175 | 	par(mar=rep(0,4))
176 | 
177 | 
178 | 	# plot
179 | 	filled.contour.is(lon, lat, data, 
180 | 		nlevels=levels, color.palette=myColours,
181 | 		asp=aspect, xlim=lonRange, ylim=latRange, zlim=zRange,
182 | 		plot.axes = { plot(world.map, xlim=lonRange, ylim=latRange, add=TRUE) },
183 | 		axes=FALSE, add.legend=FALSE
184 | 	)
185 | 
186 | 	# add datetime
187 | 	if (dateTime == TRUE)
188 | 	{
189 | 		# box centres
190 | 		bx = c(-120,0,120)
191 | 		by = c(-50,-50,-50)
192 | 
193 | 		# box width & height
194 | 		bw = 25
195 | 		bh = 5
196 | 
197 | 		# plot rectangle and text
198 | 		rect(bx-bw/2, by-bh/2, bx+bw/2, by+bh/2, col=rgb(1,1,1,0.4))
199 | 		text(bx, by, t, adj=c(0.5,0.5))
200 | 	}
201 | 
202 | }
203 | 
204 | 
205 | 
206 | ##
207 | ##  a generic function for plotting any NetCDF variable directly to a PNG file
208 | ##
209 | print_ncdf = function(filename, data, width, height, res, ...)
210 | {
211 | 	png(filename, height=height, width=width, res=res)
212 | 
213 | 	full_ncdf(data, lon, lat, ...)
214 | 
215 | 	dev.off()	
216 | }
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | #######################################################################################################################
224 | ##
225 | ##  WIND PLOTTING FUNCTIONS
226 | ##
227 | ##
228 | ##
229 | ## defaults for plotting wind speeds
230 | ##   legend is fixed to 0-30 m/s - plot is done in blues
231 | ##
232 | plot_ncdf_wind = function(data, mainTitle='', legendTitle='m/s', lonRange=NULL, latRange=NULL, zRange=c(0,30), zConstrain=FALSE, levels=64, aspect=NULL, myColours=NULL)
233 | {
234 | 	# default colour scheme
235 | 	if (is.null(myColours))
236 | 		myColours = colorRampPalette(c('#F7FBFF', '#DEEBF7', '#C6DBEF', '#9ECAE1', '#6BAED6', '#4292C6', '#2171B5', '#08519C', '#08306B'))
237 | 
238 | 	# plot it
239 | 	plot_ncdf(data, mainTitle, legendTitle, lonRange, latRange, zRange, zConstrain, levels, aspect, myColours)
240 | }
241 | 
242 | 
243 | full_ncdf_wind = function(data, latRange=NULL, lonRange=NULL, zRange=c(0,30), zConstrain=FALSE, levels=1024, aspect=NULL, myColours=NULL, dateTime=FALSE)
244 | {
245 | 	# default colour scheme
246 | 	if (is.null(myColours))
247 | 		myColours = colorRampPalette(c('#F7FBFF', '#DEEBF7', '#C6DBEF', '#9ECAE1', '#6BAED6', '#4292C6', '#2171B5', '#08519C', '#08306B'))
248 | 
249 | 	# plot it
250 | 	full_ncdf(data, mainTitle, legendTitle, latRange, lonRange, zRange, zConstrain, levels, aspect, myColours)
251 | }
252 | 
253 | 
254 | print_ncdf_wind_axes = function(filename, data, width, height, res, ...)
255 | {
256 | 	png(filename, height=height, width=width, res=res)
257 | 
258 | 	plot_ncdf_wind(data, ...)
259 | 
260 | 	dev.off()	
261 | }
262 | 
263 | print_ncdf_wind_full = function(filename, data, width, height, res, ...)
264 | {
265 | 	png(filename, height=height, width=width, res=res)
266 | 
267 | 	full_ncdf_wind(data, ...)
268 | 
269 | 	dev.off()	
270 | }
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | #######################################################################################################################
278 | ##
279 | ##  a better filled contour function for R 3.x.x
280 | ##  iain staffell ~ 2014
281 | ##
282 | ##  by default it no longer draws borders between levels in the legend
283 | ##  this lets you draw smooth plots with lots of levels..
284 | ##
285 | ##  if you run with the new option add.legend=FALSE, you just end up with
286 | ##  the level plot.  this means subsequent calls to points(), lines(), etc..
287 | ##  will work as expected, as you remain in the coordinate system of the plot body
288 | ##
289 | ##  if you call par(mar=rep(0,4)), then call this with axes=FALSE, add.legend=FALSE
290 | ##  you can obtain a full-screen plot with no borders, no nothing. 
291 | ##
292 | filled.contour.is = function (x = seq(0, 1, length.out = nrow(z)), y = seq(0, 1, 
293 |     length.out = ncol(z)), z, xlim = range(x, finite = TRUE), 
294 |     ylim = range(y, finite = TRUE), zlim = range(z, finite = TRUE), 
295 |     levels = pretty(zlim, nlevels), nlevels = 20, color.palette = cm.colors, 
296 |     col = color.palette(length(levels) - 1), plot.title, plot.axes, 
297 |     key.title, key.axes, asp = NA, xaxs = "i", yaxs = "i", las = 1, 
298 |     axes = TRUE, frame.plot = axes, add.legend=TRUE, ...) 
299 | {
300 |     # sort out data
301 |     if (missing(z)) {
302 |         if (!missing(x)) {
303 |             if (is.list(x)) {
304 |                 z <- x$z
305 |                 y <- x$y
306 |                 x <- x$x
307 |             }
308 |             else {
309 |                 z <- x
310 |                 x <- seq.int(0, 1, length.out = nrow(z))
311 |             }
312 |         }
313 |         else stop("no 'z' matrix specified")
314 |     }
315 |     else if (is.list(x)) {
316 |         y <- x$y
317 |         x <- x$x
318 |     }
319 |     if (any(diff(x) <= 0) || any(diff(y) <= 0)) 
320 |         stop("increasing 'x' and 'y' values expected")
321 | 
322 |     mar.orig <- (par.orig <- par(c("mar", "las", "mfrow")))$mar
323 |     on.exit(par(par.orig))
324 | 
325 |     # plot legend
326 |     if (add.legend)
327 |     {
328 |         w <- (3 + mar.orig[2L]) * par("csi") * 2.54
329 |         layout(matrix(c(2, 1), ncol = 2L), widths = c(1, lcm(w)))
330 |         par(las = las)
331 |         mar <- mar.orig
332 |         mar[4L] <- mar[2L]
333 |         mar[2L] <- 1
334 |         par(mar = mar)
335 | 
336 |         plot.new()
337 |         plot.window(xlim = c(0, 1), ylim = range(levels), xaxs = "i", yaxs = "i")
338 |         rect(0, levels[-length(levels)], 1, levels[-1L], col = col, border = NA)  ## this removes legend borders
339 |         if (missing(key.axes)) {
340 |             if (axes) 
341 |                 axis(4)
342 |         }
343 |         else key.axes
344 |         box()
345 |         if (!missing(key.title)) 
346 |             key.title
347 |         mar <- mar.orig
348 |         mar[4L] <- 1
349 |         par(mar = mar)
350 |     }
351 | 
352 |     # plot body
353 |     plot.new()
354 |     plot.window(xlim, ylim, "", xaxs = xaxs, yaxs = yaxs, asp = asp)
355 |     .filled.contour(x, y, z, levels, col)
356 |     if (missing(plot.axes)) {
357 |         if (axes) {
358 |             title(main = "", xlab = "", ylab = "")
359 |             Axis(x, side = 1)
360 |             Axis(y, side = 2)
361 |         }
362 |     }
363 |     else plot.axes
364 |     if (frame.plot) 
365 |         box()
366 |     if (missing(plot.title)) 
367 |         title(...)
368 |     else plot.title
369 |     invisible()
370 | }
371 | 
372 | 


--------------------------------------------------------------------------------
/lib/VWF.R:
--------------------------------------------------------------------------------
 1 | ##################################################################
 2 | #                                                                #
 3 | #  BSD 3-Clause License                                          #
 4 | #  Copyright (C) 2012-2017  Iain Staffell  <staffell@gmail.com>  #
 5 | #  All rights reserved.                                          #
 6 | #                                                                #
 7 | ##################################################################
 8 | 
 9 | 
10 | 	# external dependencies (that are always needed)
11 | 	suppressPackageStartupMessages(library(ncdf4))
12 | 	suppressPackageStartupMessages(library(lubridate))
13 | 	suppressPackageStartupMessages(library(rworldmap))
14 | 	suppressPackageStartupMessages(library(doParallel))
15 | 	suppressPackageStartupMessages(library(data.table))
16 | 	suppressPackageStartupMessages(library(akima))
17 | 
18 | 	# external dependencies (not needed by all things, so not loaded by default)
19 | 	#library(MASS)
20 | 	#library(fields)
21 | 
22 | 
23 | 
24 | 
25 | 	# figure out the working directory for this VWF code
26 | 	if (!is.null(parent.frame(2)$ofile))
27 | 	{
28 | 		vwf.dir = dirname(parent.frame(2)$ofile) %&% '/'
29 | 	}
30 | 
31 | 	# if that failed, establish the default
32 | 	if (is.null(vwf.dir))
33 | 	{
34 | 		vwf.dir = 'Q:/VWF/lib/'
35 | 	}
36 | 
37 | 
38 | 
39 | 	# i'm a concatinator, twisted concatinator
40 | 	`%&%` = function(a, b) paste0(a, b)
41 | 
42 | 	# load the background code
43 | 	source(vwf.dir %&% 'VWF.STDLIB.R')
44 | 	source(vwf.dir %&% 'VWF.NCDF.R')
45 | 	source(vwf.dir %&% 'VWF.EXTRAPOLATE.R')
46 | 	source(vwf.dir %&% 'VWF.FARMS.R')
47 | 	source(vwf.dir %&% 'VWF.PLOTS.R')
48 | 	source(vwf.dir %&% 'VWF.EXTRAS.R')
49 | 
50 | 	flush('Welcome to the VWF v17.07.21\n')
51 | 


--------------------------------------------------------------------------------
/lib/VWF.STDLIB.R:
--------------------------------------------------------------------------------
   1 | #############################################################
   2 | #####       ____    _                       _____       #####
   3 | #####      |  _ \  | |                     |  __ \      #####
   4 | #####      | |_) | | |   __ _   _ __ ___   | |__) |     #####
   5 | #####      |  _ <  | |  / _` | | '_ ` _ \  |  _  /      #####
   6 | #####      | |_) | | | | (_| | | | | | | | | | \ \      #####
   7 | #####      |____/  |_|  \__,_| |_| |_| |_| |_|  \_\     #####
   8 | #####                                                   #####
   9 | #############################################################
  10 | #####  INCLUDE BLAM.R  -  18-05-2017  -  IAIN STAFFELL  #####
  11 | #############################################################
  12 | #########################  CONTENTS  ########################
  13 | #############################################################
  14 | 
  15 | 
  16 | 
  17 | 
  18 | #############################################################
  19 | #########  GENERAL STUFF 
  20 | ##
  21 | ##  %&%																Concatenate strings like a %&% b
  22 | ##  %notin%															Opposite of %in%
  23 | ##
  24 | ##  flush(...)														Flush a message to console
  25 | ##  clear_line(...)													Erase the last line of console text and flush a new message
  26 | ##
  27 | ##  rc(text=FALSE, header=FALSE)									Read data from clipboard, returns 1d vector or 2d table depending on what's there
  28 | ##  																- Converts to numeric by default, set text=TRUE if you paste in text
  29 | ##																	- Set header=TRUE to make first row of the clipboard into your colnames
  30 | ##											
  31 | ##  wc()															Write a 1d vector or 2d table to clipboard, for easy pasting into Excel
  32 | ##
  33 | ##  xc()															Execute the contents of the clipboard (faster and cleaner than pasting code in)
  34 | ##
  35 | ##  memory_usage(...) / mem_use(...)								List objects and their memory use (pass a string to search by object name)
  36 | ##
  37 | ##  sleep(seconds)													ZZzz..
  38 | ##
  39 | ##
  40 | ##
  41 | #############################################################
  42 | #########  READ DATA
  43 | ##  
  44 | ##  read_data(filename)												Lazy man's read - will read big csv, rdata or rds as required
  45 | ## 
  46 | ##  read_csv(filename)												Read in data 3x faster than read.csv()
  47 | ##
  48 | ##  read_big_csv(filename)											Read in data 10x faster than read.csv(), but with longer initialise time, good for >100 MB
  49 | ##
  50 | ##  read_zipped_csv(filename)										Read a ZIP file which contains a single CSV file, in-memory decompression, no junk files made
  51 | ##
  52 | ##  read_data_bind_rows(filenames, ...)								Read in several files, binding data on rows or columns...
  53 | ##  read_data_bind_cols(filenames, ...)
  54 | ##
  55 | ##
  56 | ##
  57 | ##  write_data(data, filename, format)								Lazy man's write - will save as csv, rdata or rds as required.
  58 | ##
  59 | ##  write_csv(data, filename)										Removes some of the oddities of write.csv()
  60 | ##
  61 | ##  csv_to_rds(filename)											Convert CSV data to RDS (r binary) format, which is smaller and faster
  62 | ##  rds_to_csv(filename)											
  63 | ##
  64 | ##  merge_csv_files(inputFiles, outputFile)							Merge a set of CSV files into one file
  65 | ##
  66 | ##
  67 | ##
  68 | #############################################################
  69 | #########  GRAPHICS
  70 | ##
  71 | ##  good_graphics(mar, mgp, padding, captions)						Set up good graphics paramaeters
  72 | ##                                                                  - Pass your own mar(b,l,t,r) or mgp(caption,label,0)
  73 | ##                                                                  - Change padding=TRUE if you want to keep that, or captions=FALSE if you like them rotated
  74 | ##
  75 | ##  good_png(filename, height, width, res)							Print a good png
  76 | ##																	- height, width and res are optional (defaults give 8pt text at 8cm width ish)
  77 | ##
  78 | ##  plot_p(x, y, ...)												Plot points as transparent greys
  79 | ##  plot_l(x, y, ...)												Plot as a line
  80 | ##
  81 | ##  boxplot.pars													Make boxplots pretty with boxplot(y ~ x, pars=boxplot.pars)
  82 | ##
  83 | ##  heat.colours, rainbow.colours, blue.colours						Nicer sets of colours
  84 | ##
  85 | ##  colour_ramp_data(pal=NULL, range=NULL)							Generate a colour ramp for your data (automatically normalises it, etc.)
  86 | ##
  87 | ##  stepped_line_graph(x, y)										Build a stepped data set from x and y which can be plotted like a histogram in Excel
  88 | ##  stepped_line_graphs(x, col)										Build a stepped data set with multiple columns, 'col' is the name of the date/x column
  89 | ##
  90 | ##  log_gridlines(min, max)											Return the values of major and minor gridlines for a log chart (e.g. major = 10, 100, 1000; minor = 20, 30, ...)
  91 | ##
  92 | ##  hist_and_fit(data, x, y, title)									Plot a histogram and line fit that you have created with pnormal(), pweibull(), etc.
  93 | ##  hist_and_normal_fit(data, breaks, title)						Plot a historgram and automatically fit a normal distribution to it
  94 | ##  hist_log_x(data, ...)											Plot a histogram with a logged x axis
  95 | ##  hist_log_y(data, ...)											Plot a histogram with a logged y axis
  96 | ##
  97 | ##
  98 | ##
  99 | #############################################################
 100 | #########  MAPS
 101 | ##
 102 | ##  get_world_map(shapeFile)										Returns a map from a given shapeFile, or the rWorldMap library if none passed
 103 | ##  get_europe_map(resolution, disembodied)
 104 | ##  get_europe_north_africa_map(resolution)
 105 | ##
 106 | ##  plot_map_points(lon, lat, ...)									Plot a map with points at the given lon and lat
 107 | ##																	- See below for the 100 other options
 108 | ##
 109 | ##
 110 | #############################################################
 111 | #########  TEXT TOOLS
 112 | ##
 113 | ##  strrev('Hello World!')											Reverse a string
 114 | ##
 115 | ##  substr_reverse = function(x, n)									Get a substring from the end backwards
 116 | ##
 117 | ##  str_replace('e', 'u', 'Hello World!')							Replace 'x' for 'y' in 'string'
 118 | ##
 119 | ##  get_text_before(string, token, last=F)							Return the text in 'string' that comes before 'token'
 120 | ##																	- First instance by default, or the last instance if last=T
 121 | ##  
 122 | ##  get_text_after(string, token, last=F)							Ditto
 123 | ##
 124 | ##  get_text_between(string, token1, token2)						Return the text between 'token1' and 'token2'
 125 | ##																	- Uses the first instance of each
 126 | ##
 127 | ##  is_string_numeric(data)											Test if a string (or array of strings) holds valid numbers
 128 | ##
 129 | ##  keyval_to_df(keyval, splitter)									Convert an 1d array of key-value pairs into a 1 row data frame
 130 | ##  all_keyval_to_df(keyval_array, splitter)						Convert an array of arrays of key-value pairs into a 2d data frame
 131 | ##
 132 | ##  percent(val, dp=2)                                              Convert one/some numbers into percentage format
 133 | ##
 134 | ##
 135 | ##
 136 | #############################################################
 137 | #########  ARRAYS AND DATA FRAMES
 138 | ##  
 139 | ##  push(array, item)												Push and pop for vectors
 140 | ##  pop(array)														(yes, i'm that lazy)
 141 | ##
 142 | ##  insert_row(DF, row, r)											Insert 'row' into 'DF' at row number 'r'
 143 | ##
 144 | ##  xbind(array, array, dimension)									Bind two multi-dimensional arrays on a specific dimension (as opposed to rbind, cbind)
 145 | ##
 146 | ##  nth_largest(array, n)											Return nth biggest or 
 147 | ##  nth_smallest(array, n)											smallest number in an array
 148 | ##
 149 | ##  which.closest(array, x)											Return the index of the array element that is closest to x
 150 | ##
 151 | ##  mult_cols_by_vector(matrix, vector)								Multiply each column of a matrix by each element of the vector (i.e. each row gets multiplied by the whole vector)
 152 | ##  mult_rows_by_vector(matrix, vector)								Multiply each row of a matrix by each element of the vector (i.e. each column gets multiplied by the whole vector)
 153 | ##
 154 | ##  midPoints(x)														Return the midpoint of all adjacent elements (e.g. midPoints(1:5) = 1.5:4.5
 155 | ##
 156 | ##  freq_table(data)												Return a frequency table in ascending count order
 157 | ##
 158 | ##  move_column(data, myCol, where, relCol)							Move columns within a dataframe - e.g. data = move_column(data, 'a_column', 'before', 'another_column')
 159 | ##
 160 | ##
 161 | ## 	moving_average(data, n)											Calculate the moving average for a vector of data, with n smoothing width either side of the value
 162 | ##
 163 | ##  biggest_diff(data, lag)											Calculate the largest swing within a given number of periods
 164 | ##
 165 | ##  na.everyone(dataframe, cols)									Copy NA values across all columns in a data frame
 166 | ##
 167 | ##  list_to_data_frame(l)											...
 168 | ##
 169 | ##  aggregate_n(values, n, FUN)										Aggregate a vector or data-frame into fixed-sized groups
 170 | ##
 171 | ##
 172 | ##
 173 | #############################################################
 174 | #########  TIME-SERIES DATA
 175 | ##
 176 | ##  aggregate_yearly(x, y, FUN)										Aggregate a time-series to yearly level.
 177 | ##           _monthly()													x = a lubridate time series
 178 | ##           _weekly()													y = the data series
 179 | ##           _daily()													FUN = aggregate function (defaults to mean)
 180 | ##
 181 | ##  seasonal_diurnal(x, y, FUN)
 182 | ##  plot_diurnal(dd)
 183 | ##
 184 | ##  dmy_hs(string)													Additional options for lubridate
 185 | ##  ymd_hs(string)
 186 | ##
 187 | ##	lag_data(data, steps)											Lag values in a vector of data by n steps by shifting them backwards (accepts non-integer steps)
 188 | ##	lead_data(data, steps)											Lead values in a vector of data by shifting them forwards
 189 | ##
 190 | ##  align_data(df1, col1, df2, col2)								Align two data frames so that df1$col1 and df2$col2 are identical.  Useful in lining up time-series data.
 191 | ##
 192 | ##  tz_search('City')												Search for the timezone name that's relevant to your location
 193 | ##
 194 | ##  tz_shift(data, zone)											Calculate the shift needed to move your data into that timezone (works on a series of input times, accounts for daylight savings)
 195 | ##
 196 | ##  fill_missing_dates(x, datecol, period, fill)					Pad out your dataframe to have a complete time series of a given period (defaults to days).  Optionally, fill all missing timesteps with data = fill
 197 | ##
 198 | ##
 199 | ##
 200 | #############################################################
 201 | #########  MATHS AND STATISTICS
 202 | ##
 203 | ##  mode(data)														Calculate the mode (most common)
 204 | ##
 205 | ##  weighted_mean(data, weights)									Mean and stdev with weighted data points
 206 | ##  weighted_sd(data, weights)
 207 | ##
 208 | ##  stderr(data)													Calculate the standard error on a mean	
 209 | ##  ci95(data)														Calculate the 95% confidence interval (based on the stderr)
 210 | ##
 211 | ##  r2(fit)															Return the adjusted R² value from an lm() fit
 212 | ##
 213 | ##  coef.err(fit)													Return the coefficients and standard errors from an lm() fit
 214 | ##  lm.coef.err(formula)											Run an lm() fit and return its coeffients and errors
 215 | ##
 216 | ##  rms(residuals)													Calculate the root mean square from a set of residuals - e.g. call rms(fit - data)
 217 | ##
 218 | ##  rescale(data, min, max)											Rescale a vector to be in the range of [min, max]
 219 | ##
 220 | ##  constrained_distro(distro, length, min, max, ...)				Generate a random distribtion that is constrained within set limits
 221 | ##
 222 | ##
 223 | ##
 224 | 
 225 | 
 226 | 
 227 | 
 228 | 
 229 | ###################################################################################################################
 230 | ##################      GENERAL STUFF      ########################################################################
 231 | ###################################################################################################################
 232 | 
 233 | 
 234 | 
 235 | 	`%&%` = function(a, b) paste0(a, b)
 236 | 
 237 | 	`%notin%` = Negate(`%in%`)
 238 | 
 239 | 
 240 | 
 241 | 	# console - clear line and flush line
 242 | 	clear_line = function(...)
 243 | 	{
 244 | 		cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b")
 245 | 		cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b")
 246 | 		cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b")
 247 | 		cat("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b")
 248 | 
 249 | 		if (length(list(...)) > 0)
 250 | 		{
 251 | 			cat(...)
 252 | 			flush.console()
 253 | 		}
 254 | 	}
 255 | 
 256 | 	flush = function(...)
 257 | 	{
 258 | 		cat(...)
 259 | 		flush.console()
 260 | 	}
 261 | 
 262 | 
 263 | 
 264 | 	#####
 265 | 	## ##  read data from clipboard
 266 | 	## ##  write data to clipboard
 267 | 	#####
 268 | 
 269 | 	# if reading in text, set text=TRUE to return character rather than numeric data
 270 | 	# if reading in multiple columns set header=TRUE to take your first row and set as column names
 271 | 	rc = function(text=FALSE, header=FALSE)
 272 | 	{
 273 | 		# get the contents
 274 | 		x = readClipboard()
 275 | 
 276 | 		# deal with one column
 277 | 		if (length( grep('\t', x)) == 0)
 278 | 		{
 279 | 			# convert to numeric data
 280 | 			if (text == FALSE)
 281 | 			{
 282 | 				x = as.numeric(x)
 283 | 			}
 284 | 
 285 | 			return (x)
 286 | 		}
 287 | 
 288 | 		# deal with multiple columns
 289 | 		if (length( grep('\t', x)) > 0)
 290 | 		{
 291 | 			x = read.table('clipboard', stringsAsFactors=FALSE)
 292 | 
 293 | 			# turn the header row into column names
 294 | 			if (header)
 295 | 			{
 296 | 				colnames(x) = x[1, ]
 297 | 				x = x[-1, ]
 298 | 				rownames(x) = 1:nrow(x)
 299 | 			}
 300 | 
 301 | 			# convert to numeric data
 302 | 			if (text == FALSE)
 303 | 			{
 304 | 				cols = colnames(x)
 305 | 				x[ , cols] = sapply(x[ , cols], as.numeric)
 306 | 			}
 307 | 
 308 | 			return (x)
 309 | 		}
 310 | 	}
 311 | 
 312 | 
 313 | 
 314 | 
 315 | 	# write an object to clipboard
 316 | 	# this uses various trickery to get around the clipboard size limit!
 317 | 	wc = function(x, row.names=FALSE)
 318 | 	{
 319 | 		# write a 1d vector
 320 | 		if (is.null(dim(x)))
 321 | 		{
 322 | 			writeClipboard( as.character(x) )
 323 | 			return()
 324 | 		}
 325 | 
 326 | 		# write a 2d data frame
 327 | 		if (length(dim(x)) == 2)
 328 | 		{
 329 | 			# this tries the 16MB clipboard
 330 | 			tryCatch(
 331 | 			{
 332 | 				write.table(x, "clipboard-16384", sep="\t", row.names=row.names)
 333 | 			},
 334 | 
 335 | 			# if it fails, it does some funky business (as strings are unlimited??)
 336 | 			warning = function(cond)
 337 | 			{
 338 | 				# convert x to a data.frame of char 
 339 | 				for(col in 1:ncol(x))
 340 | 					x[ , col] = as.character(x[ , col])
 341 | 
 342 | 				# now convert its transpose into a string - so we get c(1st row), c(2nd row), ...
 343 | 				x = as.data.frame( t(x), stringsAsFactors=FALSE)
 344 | 				x = toString(x)
 345 | 
 346 | 				# convert the delimiter from comma to tab
 347 | 				x = gsub('\", \"', '\t', x, fixed=TRUE)
 348 | 
 349 | 				# convert EOL to a newline character
 350 | 				x = gsub('\"), c(\"', '\n', x, fixed=TRUE)
 351 | 
 352 | 				# chop off the first c(\" and the last \")
 353 | 				x = substr(x, 4, nchar(x)-2)
 354 | 
 355 | 				# now paste your goodies into excel
 356 | 				writeClipboard(x)
 357 | 			})
 358 | 
 359 | 			return()
 360 | 		}
 361 | 	}
 362 | 
 363 | 
 364 | 
 365 | 
 366 | 	# execute the contents of the clipboard
 367 | 	# this makes things work nicer, stop() and wait_for_key() both work, you don't get massive print-outs of code, etc...
 368 | 	xc = function() { source('clipboard') }
 369 | 
 370 | 
 371 | 
 372 | 	# make the console wait until a key is pressed
 373 | 	wait_for_key = function(txt='Press [enter] to continue')
 374 | 	{
 375 | 		invisible(readline(prompt=txt))
 376 | 	}
 377 | 
 378 | 
 379 | 
 380 | 	##  list objects and memory usage
 381 | 	##  beefed up from http://tinyurl.com/pgxwl6c
 382 | 
 383 | 	## memory_usage()
 384 | 	## memory_usage("b", n=20)
 385 | 	mem_use = function(...) { memory_usage(...) }
 386 | 	memory_usage = function(pattern, pos=1, order.by="Size", decreasing=TRUE, n=10)
 387 | 	{
 388 | 		napply = function(names, fn) sapply(names, function(x) fn(get(x, pos = pos)))
 389 | 		names = ls(pos = pos, pattern = pattern)
 390 | 		obj.class = napply(names, function(x) as.character(class(x))[1])
 391 | 		obj.mode = napply(names, mode)
 392 | 		obj.type = ifelse(is.na(obj.class), obj.mode, obj.class)
 393 | 		obj.prettysize = napply(names, function(x) { capture.output(print(object.size(x), units = "auto")) })
 394 | 		obj.size = napply(names, object.size)
 395 | 		obj.dim = t(napply(names, function(x) as.numeric(dim(x))[1:2]))
 396 | 		vec = is.na(obj.dim)[, 1] & (obj.type != "function")
 397 | 		obj.dim[vec, 1] = napply(names, length)[vec]
 398 | 		out = data.frame(obj.size, obj.prettysize, obj.type, obj.dim)
 399 | 		names(out) = c("Size", "PrettySize", "Type", "Rows", "Columns")
 400 | 		out = out[order(out[[order.by]], decreasing=decreasing), ]
 401 | 		out[ , 1] = out[ , 2]
 402 | 		out = out[ , -2]
 403 | 		if (n>0)
 404 | 			out = head(out, n)
 405 | 		out
 406 | 	}
 407 | 
 408 | 
 409 | 
 410 | 	## sleep with a bit of animated output...
 411 | 	sleep = function(seconds)
 412 | 	{
 413 | 		zzz = function(t)
 414 | 		{
 415 | 			flush('Z'); Sys.sleep(t);
 416 | 			flush('Z'); Sys.sleep(t);
 417 | 			flush('z'); Sys.sleep(t);
 418 | 			flush('z'); Sys.sleep(t);
 419 | 			flush('.'); Sys.sleep(t);
 420 | 			flush('.'); Sys.sleep(t);
 421 | 			flush('\b\b\b\b\b\b'); Sys.sleep(t);			
 422 | 		}
 423 | 
 424 | 		# by default, you should print the 7 characters every 3.5 seconds...
 425 | 		loops = ceiling(seconds / 3.5)
 426 | 		t = seconds / loops / 7
 427 | 		for (i in 1:loops) zzz(t)
 428 | 	}
 429 | 
 430 | 
 431 | 
 432 | 
 433 | 
 434 | ###################################################################################################################
 435 | ##################      READ BIG DATA      ########################################################################
 436 | ###################################################################################################################
 437 | 
 438 | 
 439 | 	# cope with me having data in several locations...
 440 | 	# if my usb stick isn't plugged in (E:/WORK) try reading data from various hard drive backups..
 441 | 	fnchk = function(filename, stopOnError=TRUE)
 442 | 	{
 443 | 		# is SONY available?
 444 | 		fn = filename
 445 | 		if (file.exists(fn)) return(fn)
 446 | 
 447 | 		# am i at work?
 448 | 		fn = gsub('E:/', 'M:/', filename)
 449 | 		if (file.exists(fn)) return(fn)
 450 | 
 451 | 		# am i at home?
 452 | 		fn = gsub('E:/', 'C:/Users/istaffel/BLAM!/', filename)
 453 | 		if (file.exists(fn)) return(fn)
 454 | 
 455 | 		# am i on my laptop?
 456 | 		fn = gsub('E:/', 'D:/!SONY!/', filename)
 457 | 		if (file.exists(fn)) return(fn)
 458 | 
 459 | 		# is this a dropbox path at home?
 460 | 		fn = gsub('istaffel', 'TiTS', filename)
 461 | 		if (file.exists(fn)) return(fn)
 462 | 
 463 | 		# is this a dropbox path at work?
 464 | 		fn = gsub('TiTS', 'istaffel', filename)
 465 | 		if (file.exists(fn)) return(fn)
 466 | 
 467 | 
 468 | 		# spit an error
 469 | 		if (stopOnError) stop('OH NO - FNCHK() CANNOT FIND ' %&% filename %&% '\n')
 470 | 
 471 | 		# or just return junk
 472 | 		return(fn)
 473 | 	}
 474 | 
 475 | 
 476 | 
 477 | 
 478 | 	# read csv, rdata or rds as required...
 479 | 	read_data = function(filename, verbose=FALSE)
 480 | 	{
 481 | 		xtn = get_text_after(filename, '.', last=TRUE)
 482 | 
 483 | 		if (tolower(xtn) == 'csv')
 484 | 		{
 485 | 			data = read_big_csv(filename, verbose)
 486 | 			return(data)
 487 | 		}
 488 | 
 489 | 		if (tolower(xtn) == 'rdata')
 490 | 		{
 491 | 			load(filename)
 492 | 			return(data)
 493 | 		}
 494 | 
 495 | 		if (tolower(xtn) == 'rds')
 496 | 		{
 497 | 			data = readRDS(filename)
 498 | 			return(data)
 499 | 		}
 500 | 
 501 | 		stop('Error in read_data(', filename, ')\n')
 502 | 	}
 503 | 
 504 | 
 505 | 	# any time you read data use these tweaked options for 3x speed!
 506 | 	read_csv = function(filename, ...)
 507 | 	{
 508 | 		filename = fnchk(filename)
 509 | 
 510 | 		# read the first few lines to understand the column classes
 511 | 		heading = read.csv(filename, header=TRUE, stringsAsFactors=FALSE, nrows=10, ...)
 512 | 		classes = sapply(heading, class)
 513 | 
 514 | 		# class integers as numeric (in case not all of them are ints)
 515 | 		classes[ classes == 'integer' ] = 'numeric'
 516 | 
 517 | 		data = tryCatch(
 518 | 			# read the whole file, stating what classes we expect for 6x speed
 519 | 			read.csv(filename, header=TRUE, colClasses=classes, comment.char='', ...),
 520 | 
 521 | 			# if that doesn't work then just read normally (sometimes colClasses causes an error in scan())
 522 | 			error = function(e) { read.csv(filename, header=TRUE, stringsAsFactors=FALSE) }
 523 | 		)
 524 | 
 525 | 		return(data)
 526 | 	}
 527 | 
 528 | 
 529 | 	# any time you read more than 100MB use this for 100x speed!
 530 | 	# use data.table=TRUE to return a data.table
 531 | 	read_big_csv = function(filename, data.table=FALSE, verbose=FALSE)
 532 | 	{
 533 | 		filename = fnchk(filename)
 534 | 
 535 | 		suppressPackageStartupMessages(library(data.table))
 536 | 		data = fread(filename, sep=',', na.strings=c('NA','N/A',''), showProgress=verbose, data.table=data.table)
 537 | 
 538 | 		return(data)
 539 | 	}
 540 | 
 541 | 
 542 | 	# read a csv file stuffed inside a zip file
 543 | 	read_zipped_csv = function(file, ...)
 544 | 	{
 545 | 		zipFileInfo = unzip(file, list=TRUE)
 546 | 		contents = as.character(zipFileInfo$Name)
 547 | 
 548 | 		if (nrow(zipFileInfo) > 1)
 549 | 			stop("read_zipped_csv: More than one data file inside your file:", file, "\n")
 550 | 		
 551 | 		read.csv( unz(file, contents), stringsAsFactors=FALSE, ...)
 552 | 	}
 553 | 
 554 | 
 555 | 
 556 | 	# read several files binding on rows or columns
 557 | 	read_data_bind_rows = function(filenames, ...)
 558 | 	{
 559 | 		# library(plyr)
 560 | 		# do.call(rbind.fill, lapply(filenames, function(x) read_data(x, ...)))
 561 | 
 562 | 		library(plyr)
 563 | 		LIST = list()
 564 | 		n = length(filenames)
 565 | 
 566 | 		# read in all files to a list (this doesn't require memory reallocation)
 567 | 		for (i in 1:n)
 568 | 		{
 569 | 			LIST[[i]] = read_data(filenames[i])
 570 | 			sz = capture.output(print(object.size(LIST), units = "auto"))
 571 | 			clear_line('Read', sz, '-', i, 'of', n, '-', filenames[i])
 572 | 		}
 573 | 
 574 | 		# convert from list to DF (yay, efficiency...)
 575 | 		clear_line('Reformatting as a data.frame...')
 576 | 		DF = do.call(rbind.fill, LIST)
 577 | 		clear_line()
 578 | 		DF
 579 | 	}
 580 | 
 581 | 
 582 | 	read_data_bind_cols = function(filenames, ...)
 583 | 	{
 584 | 		do.call(cbind, lapply(filenames, function(x) read_data(x, ...)))
 585 | 	}
 586 | 
 587 | 
 588 | 
 589 | 
 590 | 
 591 | 
 592 | 
 593 | 
 594 | 	# write csv, rdata or rds as required...
 595 | 	write_data = function(data, filename, format)
 596 | 	{
 597 | 		# check and correct the extension of our filename
 598 | 		xtn = get_text_after(filename, '.', last=TRUE)
 599 | 		xtn = tolower(xtn)
 600 | 		format = tolower(format)
 601 | 
 602 | 		if (xtn != format)
 603 | 		{
 604 | 			filename = get_text_before(filename, '.', last=TRUE)
 605 | 			filename = filename %&% '.' %&% format
 606 | 		}
 607 | 
 608 | 		if (format == 'csv')
 609 | 		{
 610 | 			write_csv(data, filename, row.names=FALSE)
 611 | 			return()
 612 | 		}
 613 | 
 614 | 		if (format == 'rds')
 615 | 		{
 616 | 			saveRDS(data, filename)
 617 | 			return()
 618 | 		}
 619 | 
 620 | 		if (format == 'rdata')
 621 | 		{
 622 | 			save(data, file=filename)
 623 | 			return()
 624 | 		}
 625 | 
 626 | 		stop('Error in write_data(', filename, ',', format, ')\n')
 627 | 	}
 628 | 
 629 | 
 630 | 	# write csv:
 631 | 	#   accepts col.names=FALSE as an option
 632 | 	#   you may also want row.names=FALSE
 633 | 	#   defaults to blank NA strings
 634 | 	#   accepts append=TRUE as an option
 635 | 	#
 636 | 	write_csv = function(data, filename, col.names=TRUE, na='', append=FALSE, ...)
 637 | 	{
 638 | 		if (append == TRUE)
 639 | 		{
 640 | 			write.table(data, filename, append=TRUE, sep=',', col.names=FALSE, na=na, ...)
 641 | 			return()
 642 | 		}
 643 | 
 644 | 		if (col.names == TRUE)
 645 | 		{
 646 | 			write.csv(data, filename, na=na, ...)
 647 | 			return()
 648 | 		}
 649 | 
 650 | 		if (col.names == FALSE)
 651 | 		{
 652 | 			write.table(data, filename, sep=',', col.names=FALSE, na=na, ...)
 653 | 			return()
 654 | 		}
 655 | 	}
 656 | 
 657 | 
 658 | 
 659 | 	# convert csv to rds and vice versa
 660 | 	#   pass the name of the file to convert 
 661 | 	#   the format of the input file ('csv', 'rds', 'rdata')
 662 | 	#   and the format to convert it to
 663 | 	#
 664 | 	#   optionally, say which columns contain dates (which should be converted to lubridate)
 665 | 	#
 666 | 	convert_data_format = function(filename_in, format_in, format_out, date_cols=NULL)
 667 | 	{
 668 | 		# figure out filenames
 669 | 		filename_out = gsub(        format_in,          format_out,  filename_in)
 670 | 		filename_out = gsub(tolower(format_in), tolower(format_out), filename_out)
 671 | 		filename_out = gsub(toupper(format_in), toupper(format_out), filename_out)
 672 | 
 673 | 		if (filename_out == filename_in)
 674 | 			stop("convert_data_format(", filename_in, ", ", format_in, ", ", format_out, "): I don't want to overwrite your shit!")
 675 | 
 676 | 		# read data
 677 | 		flush("Reading", filename_in)
 678 | 		data = read_data(filename_in)
 679 | 
 680 | 		# convert dates to POSIX
 681 | 		if (!is.null(date_cols))
 682 | 		{
 683 | 			clear_line('Converting date_cols...')
 684 | 			library(lubridate)
 685 | 
 686 | 			for (d in date_cols)
 687 | 			{
 688 | 				test = data[1, d]
 689 | 
 690 | 				# test 2000-01-01 00:00:00
 691 | 				t = suppressWarnings( ymd_hms(test) )
 692 | 				if (!is.na(t))
 693 | 				{
 694 | 					data[ , d] = ymd_hms(data[ , d])
 695 | 					next
 696 | 				}
 697 | 
 698 | 				# test 01/01/2000 00:00
 699 | 				t = suppressWarnings( dmy_hm(test) )
 700 | 				if (!is.na(t))
 701 | 				{
 702 | 					data[ , d] = dmy_hm(data[ , d])
 703 | 					next
 704 | 				}
 705 | 
 706 | 				# test 2000-01-01 00:00
 707 | 				t = suppressWarnings( ymd_hm(test) )
 708 | 				if (!is.na(t))
 709 | 				{
 710 | 					data[ , d] = ymd_hm(data[ , d])
 711 | 					next
 712 | 				}
 713 | 
 714 | 				# test 01/01/2000 00:00:00
 715 | 				t = suppressWarnings( dmy_hms(test) )
 716 | 				if (!is.na(t))
 717 | 				{
 718 | 					data[ , d] = dmy_hms(data[ , d])
 719 | 					next
 720 | 				}
 721 | 
 722 | 				# if its not one of those formats... ugh, ignore it
 723 | 			}
 724 | 		}
 725 | 
 726 | 		clear_line('Writing', filename_out)
 727 | 		write_data(data, filename_out, format_out)
 728 | 
 729 | 		clear_line()
 730 | 	}
 731 | 
 732 | 	csv_to_rds = function(filename_in, ...) { convert_data_format(filename_in, 'csv', 'rds', ...) }
 733 | 	rds_to_csv = function(filename_in, ...) { convert_data_format(filename_in, 'rds', 'csv', ...) }
 734 | 
 735 | 
 736 | 
 737 | 
 738 | 	##
 739 | 	##  merge a set of csv files together, writing to disk
 740 | 	##  this is done on-disk rather than in-memory, allowing you to create very big files
 741 | 	##
 742 | 	merge_csv_files = function(inputFiles, outputFile)
 743 | 	{
 744 | 		# check all the input files exist
 745 | 		if (FALSE %in% file.exists(inputFiles))
 746 | 			stop("merge_csv_files: some of your input files don't exist!!!\n\n")
 747 | 
 748 | 		# write the first month with column headers
 749 | 		data = read_big_csv(inputFiles[1])
 750 | 
 751 | 		cat('Merging file:', basename(inputFiles[1]))
 752 | 		flush()
 753 | 
 754 | 		write_csv(data, outputFile, row.names=FALSE)
 755 | 
 756 | 		# write the rest of them, appending
 757 | 		for (i in 2:length(inputFiles))
 758 | 		{
 759 | 			data = read_big_csv(inputFiles[i])
 760 | 		
 761 | 			clear_line()
 762 | 			cat('Merging file:', basename(inputFiles[i]))
 763 | 			flush()
 764 | 
 765 | 			write_csv(data, outputFile, row.names=FALSE, append=TRUE)
 766 | 		}
 767 | 
 768 | 		clear_line()
 769 | 		cat('Saved files to:', outputFile, '\n')
 770 | 		flush()
 771 | 	}
 772 | 
 773 | 
 774 | 
 775 | 
 776 | ###################################################################################################################
 777 | ##################      GRAPHICS       ############################################################################
 778 | ###################################################################################################################
 779 | 
 780 | 
 781 | 	###
 782 | 	# #  set up good graphics parameters..  
 783 | 	# #    pass your own values for mar (margins) or mgp (distance between axes and labels)
 784 | 	# #    options to keep the interior padding, or rotated y-axis labels
 785 | 	###
 786 | 		good_graphics = function
 787 | 		(
 788 | 			mar = c(3, 3, 1.5, 1.5),
 789 | 			mgp = c(1.75, 0.5, 0),
 790 | 			padding = FALSE,
 791 | 			captions = TRUE
 792 | 		)
 793 | 		{
 794 | 			# maximise graphics space
 795 | 			par(bg="white")					# white background for drawing pngs
 796 | 			par(mar=mar)					# shrink the margins (bottom, left, top, right)
 797 | 			par(tck=0.01)					# tick marks go inwards
 798 | 			par(mgp=mgp)					# move axis captions and labels closer
 799 | 			par(font.lab=2)					# bold axis captions
 800 | 			
 801 | 			if (captions == TRUE)  par(las=1)				# make all tick-mark labels horizontal
 802 | 			if (captions == FALSE) par(las=0)
 803 | 
 804 | 			if (padding == FALSE) par(xaxs="i", yaxs="i")	# remove padding between the graph and the axes)
 805 | 			if (padding == 'y')   par(xaxs="i", yaxs="r")	# just remove padding from the x axis (so y axis is padded)
 806 | 			if (padding == TRUE)  par(xaxs="r", yaxs="r")
 807 | 		}
 808 | 
 809 | 
 810 | 
 811 | 	###
 812 | 	# #  print a decent quality png of your graph
 813 | 	# #    pass the filename to save as
 814 | 	# #    pass your own height and width (in pixels) and resolution (in ppi)
 815 | 	# #    the defaults equate to ~8pt font at 8.5cm width
 816 | 	###
 817 | 		good_png = function
 818 | 		(
 819 | 			filename,
 820 | 			height = 2500,
 821 | 			width = 2500,
 822 | 			res = 400
 823 | 		)
 824 | 		{
 825 | 			dev.print(png, file=filename, height=height, width=width, res=res)
 826 | 		}
 827 | 
 828 | 
 829 | 
 830 | 	##
 831 | 	##  simple plot functions for lazy fuckers
 832 | 	##
 833 | 
 834 | 	plot_p = function(x, y=NULL, pch=16, cex=0.75, col=rgb(0,0,0,0.1), xmin=NULL, xmax=NULL, ymin=NULL, ymax=NULL, xlim=NULL, ylim=NULL, ...)
 835 | 	{
 836 | 		# deal with passing a single variable
 837 | 		if (is.null(y))
 838 | 		{
 839 | 			if (is.vector(x))
 840 | 			{
 841 | 				y = x
 842 | 				x = seq_along(y)
 843 | 
 844 | 			} else {
 845 | 
 846 | 				y = x[ , 2]
 847 | 				x = x[ , 1]
 848 | 			}
 849 | 
 850 | 		}
 851 | 
 852 | 		# default axis limits
 853 | 		if (is.null(xlim)) xlim = range(x, na.rm=TRUE)
 854 | 		if (is.null(ylim)) ylim = range(y, na.rm=TRUE)
 855 | 
 856 | 		# modified axis limits
 857 | 		if (!is.null(xmin)) xlim[1] = xmin
 858 | 		if (!is.null(xmax)) xlim[2] = xmax
 859 | 		if (!is.null(ymin)) ylim[1] = ymin
 860 | 		if (!is.null(ymax)) ylim[2] = ymax
 861 | 
 862 | 		# plot
 863 | 		plot(x, y, pch=pch, cex=cex, col=col, xlim=xlim, ylim=ylim, ...)
 864 | 	}
 865 | 
 866 | 
 867 | 
 868 | 
 869 | 
 870 | 	plot_l = function(x, y=NULL, xmin=NULL, xmax=NULL, ymin=NULL, ymax=NULL, xlim=NULL, ylim=NULL, ...)
 871 | 	{
 872 | 		# deal with passing a single variable
 873 | 		if (is.null(y))
 874 | 		{
 875 | 			if (is.vector(x))
 876 | 			{
 877 | 				y = x
 878 | 				x = seq_along(y)
 879 | 
 880 | 			} else {
 881 | 
 882 | 				y = x[ , 2]
 883 | 				x = x[ , 1]
 884 | 			}
 885 | 
 886 | 		}
 887 | 
 888 | 		# default axis limits
 889 | 		if (is.null(xlim)) xlim = range(x, na.rm=TRUE)
 890 | 		if (is.null(ylim)) ylim = range(y, na.rm=TRUE)
 891 | 
 892 | 		# modified axis limits
 893 | 		if (!is.null(xmin)) xlim[1] = xmin
 894 | 		if (!is.null(xmax)) xlim[2] = xmax
 895 | 		if (!is.null(ymin)) ylim[1] = ymin
 896 | 		if (!is.null(ymax)) ylim[2] = ymax
 897 | 
 898 | 		# plot
 899 | 		plot(x, y, type='l', xlim=xlim, ylim=ylim, ...)
 900 | 	}
 901 | 
 902 | 
 903 | 
 904 | 	# plot_l = function(x, y=NULL, ...)
 905 | 	# {
 906 | 	# 	if (!is.null(y))
 907 | 	# 		plot(x, y, type='l', ...)
 908 | 	#
 909 | 	# 	if (is.null(y))
 910 | 	# 		plot(x, type='l', ...)
 911 | 	# }
 912 | 
 913 | 
 914 | 
 915 | 	##
 916 | 	##  add transparency to a given colour (e.g. named one)
 917 | 	##  http://stackoverflow.com/questions/8047668/transparent-equivalent-of-given-color
 918 | 	##
 919 | 	alphaColour = function(someColor, alpha=100)
 920 | 	{
 921 | 		newColor =col2rgb(someColor)
 922 | 		apply(newColor, 2, function(curcoldata){rgb(red=curcoldata[1], green=curcoldata[2], blue=curcoldata[3], alpha=alpha, maxColorValue=255)})
 923 | 	}
 924 | 
 925 | 
 926 | 
 927 | 	# a list of parameters to make boxplots nice
 928 | 	# see http://stat.ethz.ch/R-manual/R-devel/library/graphics/html/bxp.html
 929 | 	boxplot.pars = list(boxfill = 'grey90', whisklty=1, whiskcol='grey40', outpch=16, outcex=0.4, outcol='grey70')
 930 | 
 931 | 
 932 | 
 933 | 
 934 | 
 935 | 
 936 | 	heat.colours = c('#3F168A', '#2049D0', '#3288BD', '#66C2A5', '#ABDDA4', '#E6F598', '#FFFFBF', '#FEE08B', '#FDAE61', '#F46D43', '#D53E4F', '#9E0142')
 937 | 	blue.colours = c('#F7FBFF', '#DEEBF7', '#C6DBEF', '#9ECAE1', '#6BAED6', '#4292C6', '#2171B5', '#08519C', '#08306B')
 938 | 
 939 | 	# go see the random hacks in E:\WORK\Code\R\colour ramps - perceptual hue colour wheel.R
 940 | 	rainbow.colours.10 = c('#CC00FF', '#8F00FF', '#000AFF', '#00A3FF', '#00FFC2', '#8FFF00', '#FAFF00', '#FFB800', '#FF7A00', '#F00000', '#AA0000')
 941 | 	rainbow.colours.8 = c('#9E00FF', '#000AFF', '#00A3FF', '#00FFC2', '#70FF00', '#FFF500', '#FF7A00', '#FF0000', '#AA0000')
 942 | 	rainbow.colours = rainbow.colours.8
 943 | 
 944 | 
 945 | 
 946 | 
 947 | 
 948 | 	# function to turn your data into a ramp
 949 | 	# accepts a vector of numbers, returns a vector of hex RGB colours
 950 | 	#   optionally pass the palette to use (defaults to rainbow.colours)
 951 | 	#   optionally pass the min-max range to enforce on your values
 952 | 	# 
 953 | 	colour_ramp_data = function(x, pal=NULL, range=NULL)
 954 | 	{
 955 | 		# normalise our input data
 956 | 		normalise = function(x, range)
 957 | 		{
 958 | 			if (!is.null(range))
 959 | 			{
 960 | 				min = min(range)
 961 | 				max = max(range)
 962 | 
 963 | 			} else {
 964 | 
 965 | 				min = min(x, na.rm=TRUE)
 966 | 				max = max(x, na.rm=TRUE)
 967 | 			}
 968 | 
 969 | 		 	x = (x - min) / (max - min)
 970 | 		 	x[ x < 0 ] = 0
 971 | 		 	x[ x > 1 ] = 1
 972 | 		 	x
 973 | 		}
 974 | 
 975 | 		x = normalise(x, range)
 976 | 
 977 | 		# create a palette if needs be
 978 | 		if (length(pal) < 2)
 979 | 			pal = rainbow.colours
 980 | 
 981 | 		# convert the x data to rgb
 982 | 		myRamp = colorRampPalette(pal)(length(pal))
 983 | 		cols = colorRamp(myRamp)(x)
 984 | 
 985 | 		# convert to an rgb array to hex strings
 986 | 		rgb_to_hex = function(x) { if (is.na(x[1])) return(NA); rgb(x[1], x[2], x[3], maxColorValue=255) }
 987 | 		apply(cols, 1, rgb_to_hex)
 988 | 	}
 989 | 
 990 | 
 991 | 
 992 | 	#
 993 | 	#  turn standard x-y data series into a stepped x-y series
 994 | 	#  with vertical transitions at the mid-point between each data point
 995 | 	#
 996 | 	#  x and y must be of equal length
 997 | 	#
 998 | 	#  this lets you plot a line chart in excel that looks like line(type='s') in R
 999 | 	#
1000 | 	#  e.g. 
1001 | 	#    h = hist(data, plot=FALSE)
1002 | 	#    h = stepped_line_graph(h$mids, h$counts)
1003 | 	#    plot(h, type='l')
1004 | 	#
1005 | 	stepped_line_graph = function(x, y, mids=TRUE)
1006 | 	{
1007 | 		# check
1008 | 		n = length(x)
1009 | 		if (n != length(y))
1010 | 			stop(paste('stepped_line_graph: x and y vectors must have equal length. x =', length(x), '- y =', length(y), '\n'))
1011 | 
1012 | 		# find the x midpoints
1013 | 		if (mids)
1014 | 		{
1015 | 			dx = diff(x) / 2
1016 | 			push(dx, tail(dx, 1))
1017 | 			x = c(x[1]-dx[1], x+dx)
1018 | 		}
1019 | 
1020 | 		# or bolt something on the end
1021 | 		if (!mids)
1022 | 		{
1023 | 			dx = diff(x)
1024 | 			push(x, tail(x,1)+tail(dx,1))
1025 | 		}
1026 | 
1027 | 		# build a stepped x sequence
1028 | 		x = rep(x, each=2)
1029 | 		x = tail( head(x, -1), -1)
1030 | 
1031 | 		# build a stepped y sequence
1032 | 		y = rep(y, each=2)
1033 | 
1034 | 		data.frame(x=x, y=y)
1035 | 	}
1036 | 
1037 | 	# run on multiple columns
1038 | 	# pass the data frame and column name
1039 | 	stepped_line_graphs = function(df, x, mids=TRUE)
1040 | 	{
1041 | 		# get the data columns
1042 | 		y = colnames(df)
1043 | 		y = y[y %notin% x]
1044 | 
1045 | 		if (length(y) == 0)
1046 | 			stop('stepped_line_graphs(): no data columns found\n')
1047 | 
1048 | 		# run through the first
1049 | 		out = stepped_line_graph(df[ , x], df[ , y[1]], mids)
1050 | 		colnames(out) = c(x, y[1])
1051 | 
1052 | 		if (length(y) == 1)
1053 | 			return(out)
1054 | 
1055 | 		# run through the rest
1056 | 		for (i in 2:length(y))
1057 | 		{
1058 | 			o = stepped_line_graph(df[ , x], df[ , y[i]], mids)
1059 | 			out = cbind(out, o[ , 2])
1060 | 		}
1061 | 
1062 | 		colnames(out) = c(x, y)
1063 | 
1064 | 		return(out)
1065 | 	}
1066 | 
1067 | 
1068 | 
1069 | 
1070 | 	# return the value of grid lines on a log chart between min and max
1071 | 	log_gridlines = function(min, max)
1072 | 	{
1073 | 		x = list()
1074 | 
1075 | 		log_min = floor(log10(min))
1076 | 		log_max = ceiling(log10(max))
1077 | 
1078 | 		m = log_min:log_max
1079 | 		m = 10 ^ m
1080 | 
1081 | 		x$major = m
1082 | 		x$minor = c(m*2, m*3, m*4, m*5, m*6, m*7, m*8, m*9)
1083 | 		x$minor = sort(x$minor)
1084 | 
1085 | 		x$major = x$major[ x$major > min & x$major < max ]
1086 | 		x$minor = x$minor[ x$minor > min & x$minor < max ]
1087 | 
1088 | 		x
1089 | 	}
1090 | 
1091 | 
1092 | 
1093 | 
1094 | 
1095 | 	# plot a histogram with a line fit
1096 | 	# data = values to histogram
1097 | 	#    x = sequence of breaks for the histogram
1098 | 	#  fit = the pnorm(), pweibull(), etc. function
1099 | 	hist_and_fit = function(data, x, fit, title='')
1100 | 	{
1101 | 		# pre-calculate the histogram
1102 | 		h = hist(data, breaks=x, plot=FALSE)
1103 | 
1104 | 		# calculate the density of our fit
1105 | 		fit = diff(fit) * length(data)
1106 | 
1107 | 		# use that to determine the ylimits
1108 | 		ylim = c(0, max(h$counts, fit))
1109 | 
1110 | 		# and plot
1111 | 		plot(h, ylim=ylim, main=title, col='grey95')
1112 | 		lines(midPoints(x), fit, col='red', lwd=2)
1113 | 	}
1114 | 
1115 | 
1116 | 
1117 | 	# hist_and_fit for the lazy man
1118 | 	# automatically fit a normal distribution to your data
1119 | 	# pass the data, the number of breaks (if you want), and the title (if you want)
1120 | 	# returns the mean and standard deviation
1121 | 	hist_and_normal_fit = function(data, breaks=NA)
1122 | 	{
1123 | 		if (is.na(breaks)) h = hist(data, plot=FALSE)
1124 | 		else h = hist(data, breaks=breaks, plot=FALSE)
1125 | 
1126 | 		x = h$breaks
1127 | 		mu = mean(data, na.rm=TRUE)
1128 | 		sd = sd(data, na.rm=TRUE)
1129 | 		y = pnorm(x, mu, sd)
1130 | 		title = paste(signif(mu, 3), '\U00B1', signif(sd, 3))
1131 | 
1132 | 		hist_and_fit(data, x, y, title)
1133 | 
1134 | 		c(mu, sd)
1135 | 	}
1136 | 
1137 | 
1138 | 
1139 | 	# histogram with logged x axis
1140 | 	# isn't very good - it should have funny shaped bars perhaps?
1141 | 	#
1142 | 	hist_log_x = function(data, ...)
1143 | 	{
1144 | 		# plot your logged data without an x axis
1145 | 		data = log10(data)
1146 | 		hist(data, xaxt='n', ...)
1147 | 
1148 | 		# design a nice x axis
1149 | 		t = axTicks(1)
1150 | 		axis(1, at=t, labels=prettyNum(10**t))
1151 | 	}
1152 | 
1153 | 
1154 | 
1155 | 	# histogram with logged y axis
1156 | 	#
1157 | 	hist_log_y = function(data, ...)
1158 | 	{
1159 | 		# calculate the histogram
1160 | 		h = hist(data, plot=FALSE, ...)
1161 | 
1162 | 		# log the counts and add on a nubbin so that counts of 1 are visible
1163 | 		nubbin = 0.05
1164 | 		h$counts = log10(h$counts) + nubbin
1165 | 
1166 | 		# plot without a y-axis
1167 | 		plot(h, yaxt='n', ylim=c(0, max(h$counts, na.rm=TRUE)*1.02))
1168 | 
1169 | 		# get the tick locations - round them off so you get nice numbers - then add on our nubbin
1170 | 		t = axTicks(2)
1171 | 		t = signif(10**t, 1)
1172 | 		t = log10(t) + nubbin
1173 | 
1174 | 		# print the axis values (without our nubbin)
1175 | 		axis(2, at=t, labels=prettyNum(10**(t-nubbin)))
1176 | 	}
1177 | 
1178 | 
1179 | 
1180 | 
1181 | ##################################################################################################################
1182 | ##################      MAP TOOLS      ###########################################################################
1183 | ##################################################################################################################
1184 | 
1185 | 
1186 | 	##
1187 | 	##  Grab a world map from either:
1188 | 	##     the shapefile of your choosing
1189 | 	##     rworldmap's getMap with the resolution of your choosing
1190 | 	##
1191 | 	##  return the spatial data frame thing...
1192 | 	##
1193 | 	##  my usual maps are:
1194 | 	##     E:/WORK/Z Data/Maps/Natural Earth/50m Countries/ne_50m_admin_0_countries.shp
1195 | 	##     E:/WORK/Z Data/Maps/Natural Earth/Admin Map Subunits/ne_10m_admin_0_map_subunits.shp
1196 | 	##     E:/WORK/Z Data/Maps/TM_WORLD_BORDERS-0.3/TM_WORLD_BORDERS-0.3.shp
1197 | 	##
1198 | 	get_world_map = function(shapeFile=NULL, res='high', wgsProjection=TRUE)
1199 | 	{
1200 | 		# use a custom shapefile
1201 | 		if (!is.null(shapeFile))
1202 | 		{
1203 | 			# if we used this shapefile previously
1204 | 			if (exists('world.map.shape') & exists('world.map'))
1205 | 			{
1206 | 				# then don't load it again!
1207 | 				if (world.map.shape == shapeFile)
1208 | 				{
1209 | 					return(world.map) 
1210 | 				}
1211 | 			}
1212 | 
1213 | 			# load the shapefile
1214 | 			suppressPackageStartupMessages(library(maptools))
1215 | 			world.map = readShapeSpatial(shapeFile)
1216 | 
1217 | 			# not necessary, but yields nicer default axis labels and aspect ratio
1218 | 			if (wgsProjection == TRUE)
1219 | 				proj4string(world.map) = "+proj=longlat +datum=WGS84"
1220 | 
1221 | 			# remember the shapefile we used
1222 | 			world.map.shape <<- shapeFile
1223 | 		}
1224 | 
1225 | 		# otherwise use rWorldMap
1226 | 		if (!exists('world.map'))
1227 | 		{
1228 | 			suppressPackageStartupMessages(library(rworldmap))
1229 | 			world.map = getMap(res)
1230 | 		}
1231 | 
1232 | 		return(world.map)
1233 | 	}
1234 | 
1235 | 
1236 | 
1237 | 	##
1238 | 	##  Grab a map of Europe, with non-members cut out
1239 | 	##  and far-flung colonies cropped out using a bounding box
1240 | 	##  
1241 | 	##  optionally set disembodied=TRUE to remove CIS and Turkey, making EU look like an island of paradise..
1242 | 	##
1243 | 	##  only works with the rWorldMap maps for now...
1244 | 	##
1245 | 	get_europe_map = function(resolution='high', disembodied=FALSE)
1246 | 	{
1247 | 		# load a world map
1248 | 		suppressPackageStartupMessages(library(rworldmap))
1249 | 		world.map <<- getMap(resolution)
1250 | 		
1251 | 
1252 | 		# figure out which countries are in europe
1253 | 
1254 | 		# identify countries by their general region
1255 | 		eu = world.map@data$REGION == 'Europe'
1256 | 		eu[is.na(eu)] = FALSE
1257 | 
1258 | 		# filter out ones that aren't there
1259 | 		if (disembodied)
1260 | 		{
1261 | 			not.really.europe = c('RUS', 'BLR', 'UKR', 'GRL', 'TUR', 'ARM', 'AZE', 'GEO')
1262 | 			eu[world.map$ISO3 %in% not.really.europe] = FALSE
1263 | 		}
1264 | 
1265 | 		# filter down our world map
1266 | 		europe.map = world.map[eu, ]
1267 | 
1268 | 		# filter out the colonies
1269 | 		suppressPackageStartupMessages(library(raster))
1270 | 
1271 | 		if (disembodied)
1272 | 			europe.map = crop(europe.map, extent(-25, 32, 34, 72))
1273 | 
1274 | 		if (!disembodied)
1275 | 			europe.map = crop(europe.map, extent(-25, 32+10, 34, 72))
1276 | 
1277 | 
1278 | 		return(europe.map)
1279 | 	}
1280 | 
1281 | 	get_europe_north_africa_map = function(resolution='high')
1282 | 	{
1283 | 		# load a world map
1284 | 		suppressPackageStartupMessages(library(rworldmap))
1285 | 		world.map <<- getMap(resolution)
1286 | 		
1287 | 
1288 | 		# figure out which countries are in europe
1289 | 
1290 | 		# identify countries by their general region
1291 | 		eu = world.map@data$REGION == 'Europe'
1292 | 		eu[is.na(eu)] = FALSE
1293 | 
1294 | 		# filter out ones that aren't there
1295 | 		not.really.europe = c('RUS', 'BLR', 'UKR', 'GRL', 'TUR', 'ARM', 'AZE', 'GEO')
1296 | 		eu[world.map$ISO3 %in% not.really.europe] = FALSE
1297 | 
1298 | 		# also grab north africa
1299 | 		na = world.map@data$ISO3 %in% c('MAR', 'DZA', 'LBY', 'EGY')
1300 | 
1301 | 		# filter down our world map
1302 | 		europe.map = world.map[eu | na, ]
1303 | 
1304 | 
1305 | 		# filter out the colonies
1306 | 		suppressPackageStartupMessages(library(raster))
1307 | 		europe.map = crop(europe.map, extent(-25, 37, 18, 72))
1308 | 
1309 | 
1310 | 		return(europe.map)
1311 | 	}
1312 | 
1313 | 	# iso3 should come from world.map@data$ISO3
1314 | 	# extent should be c(left, right, bottom, top)
1315 | 	get_country_map = function(iso3, extent=NULL, resolution='high')
1316 | 	{
1317 | 		# load a world map
1318 | 		suppressPackageStartupMessages(library(rworldmap))
1319 | 		world.map <<- getMap(resolution)
1320 | 
1321 | 		# filter down to our countries
1322 | 		keep = world.map@data$ISO3 %in% iso3
1323 | 		my.map = world.map[keep, ]
1324 | 
1325 | 		# crop to a specific extent
1326 | 		if (length(extent) == 4)
1327 | 		{
1328 | 			library(raster)
1329 | 			my.map = crop(my.map, extent(extent))
1330 | 		}
1331 | 
1332 | 		return (my.map)
1333 | 	}
1334 | 
1335 | 
1336 | 
1337 | 
1338 | 
1339 | 	##
1340 | 	##  Plot a map with points at your specified coordinates
1341 | 	##   - pass longitude and latitude as numeric vectors
1342 | 	##   - optionally set the boundaries of the map with lonBounds and latBounds = c(minimum, maximum)
1343 | 	##     otherwise these are calculated automatically to cover all points
1344 | 	##   - optionally set how many degrees of padding to add around auto-generated bounds
1345 | 	##   - optionally specify the border and fill colour for countries
1346 | 	##   - optionally specify the aspect ratio to use (1.5 default for UK)
1347 | 	##   - optionally specify your own shapefile to render
1348 | 	##   - optionally specify the height of the dev() window
1349 | 	##   - optionally send other parameters on to points()
1350 | 	##
1351 | 	##   - returns the ratio of width to height you should use if printing the file as a png
1352 | 	##
1353 | 	##   - will create and store map shape in the world.map global variable to save time on future calls...
1354 | 	##
1355 | 	##
1356 | 	##  if you want to plot a blank map with no points (useful to get the right boundaries) then just set col=NULL
1357 | 	##
1358 | 	##  note that this makes a new graphics window with new par().  this will let you continue editing the map, 
1359 | 	##  but if you start making new plots in this window they will look funny.  just call dev.new() afterwards.
1360 | 	##
1361 | 	##
1362 | 	##  TO DO:
1363 | 	##    - different projections, motherfucker!!!
1364 | 	##
1365 | 	##    - if i want to repeat this with colour fills for each country
1366 | 	##      see http://stackoverflow.com/questions/1260965/developing-geographic-thematic-maps-with-r
1367 | 	##
1368 | 	plot_map_points = function(lon, lat, xlim=range(lon, na.rm=TRUE), ylim=range(lat, na.rm=TRUE), lonBounds=NULL, latBounds=NULL, padding=1,
1369 | 		                       mapBorder='grey75', mapFill='grey95', mapLwd=1,
1370 | 		                       mapAspect=1.5, shapeFile=NULL, height=7, new.plot=TRUE, add.points=FALSE, ...)
1371 | 	{
1372 | 		# get our world map and save globally for future
1373 | 		world.map <<- get_world_map(shapeFile)
1374 | 
1375 | 		# add some padding around the map
1376 | 		xlim = range(xlim-padding, xlim+padding)
1377 | 		ylim = range(ylim-padding, ylim+padding)
1378 | 
1379 | 		# work out the aspect ratio
1380 | 		ar = diff(xlim) / diff(ylim) / mapAspect
1381 | 
1382 | 		# build a new plot window with the correct ratio
1383 | 		if (new.plot == TRUE)
1384 | 		{
1385 | 			dev.new(height=height, width=height*ar)
1386 | 		}
1387 | 
1388 | 		if (add.points == FALSE)
1389 | 		{
1390 | 			# remove our plot margins
1391 | 			par(bg="white")
1392 | 			par(mar = rep(0, 4))
1393 | 
1394 | 			# plot the map
1395 | 			plot(world.map, xlim=xlim, ylim=ylim, border=mapBorder, col=mapFill, asp=mapAspect, lwd=mapLwd)
1396 | 		}
1397 | 
1398 | 		# plot the points
1399 | 		points(lon, lat, ...)
1400 | 
1401 | 		# return our ideal width:height ratio
1402 | 		# then call good_png(height=1000, width=1000*ar)
1403 | 		ar
1404 | 	}
1405 | 
1406 | 
1407 | 
1408 | 
1409 | ###################################################################################################################
1410 | ##################      TEXT TOOLS      ###########################################################################
1411 | ###################################################################################################################
1412 | 
1413 | 
1414 | 	# reverse a string
1415 | 	strrev = function(x) { reverse_string(x) }
1416 | 	reverse_string = function(x)
1417 | 	{
1418 | 		sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
1419 | 	}
1420 | 
1421 | 	# get a subsection of a string, working from the end backwards
1422 | 	substr_reverse = function(x, n)
1423 | 	{
1424 | 		substr(x, nchar(x)-n+1, nchar(x))
1425 | 	}
1426 | 
1427 | 	# replace 'x' for 'y' in a string
1428 | 	str_replace = function(...)
1429 | 	{
1430 | 		gsub(..., fixed=TRUE)
1431 | 	}
1432 | 
1433 | 
1434 | 	# get text before, after, or between tokens
1435 | 	get_text_before = function(string, token, last=FALSE)
1436 | 	{
1437 | 		chunks = strsplit(string, token, fixed=TRUE)
1438 | 
1439 | 		if (last == FALSE)
1440 | 			fun = function(x) { x[1] } 
1441 | 
1442 | 		if (last == TRUE)
1443 | 			fun = function(x) { paste(head(x, -1), collapse=token) }
1444 | 
1445 | 		sapply(chunks, fun)
1446 | 	}
1447 | 
1448 | 
1449 | 	get_text_after = function(string, token, last=FALSE)
1450 | 	{
1451 | 		chunks = strsplit(string, token, fixed=TRUE)
1452 | 
1453 | 		if (last == TRUE)
1454 | 			fun = function(x) { tail(x, 1) } 
1455 | 
1456 | 		if (last == FALSE)
1457 | 			fun = function(x) { paste(tail(x, -1), collapse=token) }
1458 | 
1459 | 		sapply(chunks, fun)
1460 | 	}
1461 | 
1462 | 
1463 | 	get_text_between = function(string, token1, token2)
1464 | 	{
1465 | 		string = get_text_after(string, token1)
1466 | 		string = get_text_before(string, token2)
1467 | 		string
1468 | 	}
1469 | 
1470 | 
1471 | 
1472 | 
1473 | 	# convert an array of key-val pairs into a dataframe
1474 | 	# keyval = array('name=Iain', 'hair=NA')
1475 | 	# splitter = '='
1476 | 	keyval_to_df = function(keyval, splitter)
1477 | 	{
1478 | 		# parse your string to a data frame
1479 | 		df = as.data.frame(sapply(strsplit(keyval, splitter), rbind), stringsAsFactors=FALSE)
1480 | 		
1481 | 		# apply the header
1482 | 		names(df) = df[1,]
1483 | 		df = df[-1,]
1484 | 
1485 | 		return(df)
1486 | 	}
1487 | 
1488 | 	# convert an array of arrays of key-val pairs into a dataframe
1489 | 	all_keyval_to_df = function(keyval_array, splitter)
1490 | 	{
1491 | 		# initialise results storage
1492 | 		DF = NULL
1493 | 
1494 | 		# run through each keyval string
1495 | 		for (keyval in keyval_array)
1496 | 		{
1497 | 			df = keyval_to_df(keyval, splitter)
1498 | 			DF = merge(DF, df, all=TRUE, sort=TRUE)
1499 | 		}
1500 | 
1501 | 		return(DF)
1502 | 	}
1503 | 
1504 | 
1505 | 
1506 | 	# convert one/some numbers into percentage format
1507 | 	percent = function(val, dp=2)
1508 | 	{
1509 | 		paste0(round(100*val, dp), "%")
1510 | 	}
1511 | 
1512 | 
1513 | 
1514 | 
1515 | ###################################################################################################################
1516 | ##################      ARRAY TOOLS      ##########################################################################
1517 | ###################################################################################################################
1518 | 
1519 | 
1520 | 	# push a value onto the end of a vector
1521 | 	# in case you're too lazy to type array[length(array)+1] = item...
1522 | 	push = function(array, item)
1523 | 	{
1524 | 		new_array = c(array, item)
1525 | 		eval.parent(substitute(array <- new_array))
1526 | 	}
1527 | 
1528 | 	# pop an item off the end of a vector
1529 | 	# and return the item
1530 | 	pop = function(array)
1531 | 	{
1532 | 		item = array[ length(array) ]
1533 | 		new_array = array[ -length(array) ]
1534 | 		eval.parent(substitute(array <- new_array))
1535 | 		item
1536 | 	}
1537 | 
1538 | 
1539 | 	# http://stackoverflow.com/questions/11561856/add-new-row-to-dataframe
1540 | 	# rbind a new row into a specific place of a data frame
1541 | 	insert_row = function(DF, row, r)
1542 | 	{
1543 | 		s = seq(r, nrow(DF))
1544 | 		DF[s+1, ] = DF[s, ]
1545 | 		DF[r, ] = row
1546 | 		DF
1547 | 	}
1548 | 
1549 | 
1550 | 
1551 | 	#
1552 | 	#  multi-dimensional rbind/cbind equivalent...
1553 | 	#
1554 | 	#  binds two arrays on the xth dimension
1555 | 	#  the arrays must have the same number of dimensions, and this can be any number
1556 | 	#
1557 | 	#  for example:
1558 | 	#  xbind(a[x,y,z1], b[x,y,z2], 3) returns c[x,y,z1+z2]...
1559 | 	#
1560 | 	#  xbind(a[x,y], b[x,y], 1) == rbind(a[x,y], b[x,y])
1561 | 	#  xbind(a[x,y], b[x,y], 2) == cbind(a[x,y], b[x,y])
1562 | 	#
1563 | 	xbind = function(a, b, x)
1564 | 	{
1565 | 
1566 | 		# check we are passing arrays
1567 | 		if (!is.array(a) | !is.array(b) | !is.numeric(x))
1568 | 			stop('serious? this ting deals with arrays...')
1569 | 
1570 | 		# check the number of dimensions
1571 | 		n = length(dim(a))
1572 | 
1573 | 		if (length(dim(b)) != n)
1574 | 			stop('xbind only does arrays with the same number of dimensions fam!\n')
1575 | 
1576 | 		
1577 | 		# check the sizes are correct in other dimensions
1578 | 		for (i in 1:n)
1579 | 		{
1580 | 			if (i == x)
1581 | 				next
1582 | 
1583 | 			if (dim(a)[i] != dim(b)[i])
1584 | 				stop('blud are you mad? xbind only needs your arrays to have the same size in the non-x dimension!\n')
1585 | 		}
1586 | 
1587 | 		
1588 | 		# work out the interleaving
1589 | 		cuts = 1
1590 | 
1591 | 		for (i in n:1)
1592 | 		{
1593 | 			if (i == x)
1594 | 				break
1595 | 
1596 | 			cuts = cuts * dim(a)[i]
1597 | 		}
1598 | 
1599 | 		
1600 | 		# convert to a set of 1d vectors
1601 | 		aa = matrix(a, ncol=cuts)
1602 | 		bb = matrix(b, ncol=cuts)
1603 | 
1604 | 		
1605 | 		# join together and reshape
1606 | 		cc = rbind(aa, bb)
1607 | 
1608 | 		newdim = (1:n %in% x) * 1
1609 | 		newdim = dim(a) + dim(b) * newdim
1610 | 
1611 | 		c = array(cc, dim=newdim)
1612 | 
1613 | 		
1614 | 		# add on the dimension names
1615 | 		for (i in 1:n)
1616 | 		{
1617 | 			if (i == x)
1618 | 			{
1619 | 				dimnames(c)[[i]] = c(dimnames(a)[[i]], dimnames(b)[[i]])
1620 | 			}
1621 | 
1622 | 			if (i != x)
1623 | 			{
1624 | 				dimnames(c)[[i]] = dimnames(a)[[i]]
1625 | 			}
1626 | 		}
1627 | 
1628 | 
1629 | 		# bumbaraasclart
1630 | 		# n.a.bass
1631 | 		c
1632 | 	}
1633 | 
1634 | 	if (0)
1635 | 	{
1636 | 		a = array(c(111, 211, 121, 221, 112, 212, 122, 222), dim=c(2,2,2))
1637 | 		a[1, , ]
1638 | 		a[ , 1, ]
1639 | 		a[ , , 1]
1640 | 
1641 | 		b = array(c(311, 411, 321, 421, 312, 412, 322, 422), dim=c(2,2,2))
1642 | 		c = array(c(131, 231, 141, 241, 132, 232, 142, 242), dim=c(2,2,2))
1643 | 		d = array(c(113, 213, 123, 223, 114, 214, 124, 224), dim=c(2,2,2))
1644 | 
1645 | 		xbind(a, b, 1)
1646 | 		xbind(a, c, 2)
1647 | 		xbind(a, d, 3)
1648 | 	}
1649 | 
1650 | 
1651 | 
1652 | 	# return the array indices for the min/max element
1653 | 	# e.g. x = array(runif(10000), dim=c(10,10,100)); which.min.arr(x);
1654 | 	which.min.arr = function(x)
1655 | 	{
1656 | 		w = which.min(x)
1657 | 		w = which(x == x[w], arr.ind=TRUE)
1658 | 		w
1659 | 	}
1660 | 
1661 | 	which.max.arr = function(x)
1662 | 	{
1663 | 		w = which.max(x)
1664 | 		w = which(x == x[w], arr.ind=TRUE)
1665 | 		w
1666 | 	}
1667 | 
1668 | 	# return the array element that is closest to x
1669 | 	which.closest = function(data, x)
1670 | 	{
1671 | 		which.min(abs(data - x))
1672 | 	}
1673 | 
1674 | 
1675 | 	# http://stackoverflow.com/questions/2453326/fastest-way-to-find-second-third-highest-lowest-value-in-vector-or-column
1676 | 	nth_largest = function(data, n)
1677 | 	{
1678 | 		n = length(data) + 1 - n
1679 | 		if (n < 1) return (NA)
1680 | 
1681 | 		if (is.unsorted(data))
1682 | 			return ( sort(data, partial=n)[n] )
1683 | 
1684 | 		return (data[n])
1685 | 	}
1686 | 
1687 | 	nth_smallest = function(data, n)
1688 | 	{
1689 | 		if (n < 1 | n > length(data)) return (NA)
1690 | 		
1691 | 		if (is.unsorted(data))
1692 | 			return ( sort(data, partial=n)[n] )
1693 | 
1694 | 		return (data[n])
1695 | 	}
1696 | 
1697 | 
1698 | 
1699 | 	# Multiply each column of a matrix by each element of the vector (i.e. each row gets multiplied by the whole vector)
1700 | 	# http://stackoverflow.com/questions/3643555/multiply-rows-of-matrix-by-vector
1701 | 	multiply_each_column = function(matrix, vector)
1702 | 	{
1703 | 		result = t( t(matrix) * vector )
1704 | 		as.data.frame(result)
1705 | 	}
1706 | 
1707 | 	mult_cols_by_vector = function(matrix, vector)
1708 | 	{
1709 | 		result = t( t(matrix) * vector )
1710 | 		as.data.frame(result)
1711 | 	}
1712 | 
1713 | 	multiply_each_row = function(matrix, vector)
1714 | 	{
1715 | 		result = matrix * vector
1716 | 		as.data.frame(result)
1717 | 	}
1718 | 
1719 | 	mult_rows_by_vector = function(matrix, vector)
1720 | 	{
1721 | 		result = matrix * vector
1722 | 		as.data.frame(result)
1723 | 	}
1724 | 
1725 | 
1726 | 	# calculate the arithmetic center of adjacent values in an array
1727 | 	midPoints = function(x)
1728 | 	{
1729 | 		(x[-length(x)] + x[-1]) / 2
1730 | 	}
1731 | 
1732 | 
1733 | 	# test if a string (or array of strings) holds valid numbers
1734 | 	is_string_numeric = function(data)
1735 | 	{
1736 | 		suppressWarnings(!is.na(as.numeric(data)))
1737 | 	}
1738 | 
1739 | 
1740 | 	# return a table in ascending frequency order
1741 | 	freq_table = function(data)
1742 | 	{
1743 | 		# create the table
1744 | 		tbl = as.data.frame(table(data))
1745 | 		colnames(tbl) = c('data', 'freq')
1746 | 
1747 | 		# convert data from factors to strings or numeric
1748 | 		tbl$data = as.character(tbl$data)
1749 | 		is_numeric = is_string_numeric(tbl$data)
1750 | 		if (length(is_numeric) == sum(is_numeric))
1751 | 			tbl$data = as.numeric(tbl$data)
1752 | 
1753 | 		# return
1754 | 		tbl[ order(tbl[2]), ]
1755 | 	}
1756 | 
1757 | 
1758 | 
1759 | 
1760 | 	#
1761 | 	#  move a column in a data frame to a different position
1762 | 	#
1763 | 	#  data = the data frame
1764 | 	#  myCol = the column(s) you wish to move
1765 | 	#  where = 'first', 'last', 'before', 'after'
1766 | 	#  relCol = the relative column to move before or after
1767 | 	#
1768 | 	#  e.g. move_column(data, c('b','c'), 'before', 'e')
1769 | 	#
1770 | 	#  http://stackoverflow.com/questions/18339370/reordering-columns-in-a-large-dataframe
1771 | 	#
1772 | 	move_column = function(data, myCol, where = "last", relCol = NULL)
1773 | 	{
1774 | 		otherCols = setdiff(names(data), myCol)
1775 | 
1776 | 		x = switch(where,
1777 | 
1778 | 			first = data[c(myCol, otherCols)],
1779 | 
1780 | 			last = data[c(otherCols, myCol)],
1781 | 
1782 | 			before =
1783 | 			{
1784 | 				if (is.null(relCol)) stop("must specify relCol column")
1785 | 				if (length(relCol) > 1) stop("relCol must be a single character string")
1786 | 				data[append(otherCols, values = myCol, after = (match(relCol, otherCols)-1))]
1787 | 			},
1788 | 
1789 | 			after =
1790 | 			{
1791 | 				if (is.null(relCol)) stop("must specify relCol column")
1792 | 				if (length(relCol) > 1) stop("relCol must be a single character string")
1793 | 				data[append(otherCols, values = myCol, after = (match(relCol, otherCols)))]
1794 | 			}
1795 | 		)
1796 | 
1797 | 		x
1798 | 	}
1799 | 
1800 | 
1801 | 
1802 | 	# calculate the moving average of a vector of data
1803 | 	moving_average = function(data, n=5)
1804 | 	{
1805 | 		# integer filter width
1806 | 		if (n%%1 == 0)
1807 | 			return( as.numeric( filter(data, rep(1/n, n), sides=2) ) )
1808 | 
1809 | 		# otherwise interpolate between the nearest integers
1810 | 		nlo = floor(n)
1811 | 		nhi = ceiling(n)
1812 | 
1813 | 		a = filter(data, rep(1/nlo, nlo), sides=2)
1814 | 		b = filter(data, rep(1/nhi, nhi), sides=2)
1815 | 
1816 | 		return( as.numeric( a*(nhi-n) + b*(n-nlo) ))
1817 | 	}
1818 | 
1819 | 
1820 | 	moving_average_full = function(data, n=5)
1821 | 	{
1822 | 		data = c(head(data, n), data, tail(data, n))
1823 | 
1824 | 		smooth = moving_average(data, n)
1825 | 
1826 | 		smooth = head(smooth, -n)
1827 | 		smooth = tail(smooth, -n)
1828 | 
1829 | 		smooth
1830 | 	}
1831 | 
1832 | 
1833 | 
1834 | 
1835 | 	# do the same as diff(x, lag), except return the biggest swing
1836 | 	# seen from now across the next lag elements...
1837 | 	# i.e. x = c(3, 0, 3, 0); diff(x, 2) will return 0, 0
1838 | 	#      but biggest_diff(x, 2) will return 3, 3
1839 | 	biggest_diff = function(x, lag)
1840 | 	{
1841 | 		if (lag == 1)
1842 | 			return( diff(x) )
1843 | 
1844 | 		# initialise
1845 | 		DX = x*0
1846 | 
1847 | 		for (i in 1:lag)
1848 | 		{
1849 | 			# pop the last element off our previous array
1850 | 			DX = DX[ -length(DX) ]
1851 | 
1852 | 			# calculate this new array
1853 | 			dx = diff(x, i)
1854 | 
1855 | 			# figure out which is biggest
1856 | 			w = (abs(dx) > abs(DX))
1857 | 			DX[w] = dx[w]
1858 | 		}
1859 | 
1860 | 		DX
1861 | 	}
1862 | 
1863 | 
1864 | 
1865 | 	# censor all rows in a data.frame where any column is already censored
1866 | 	# i.e. mirror NAs across all of the chosen columns if any of them is NA
1867 | 	#
1868 | 	na.everyone = function(dataframe, cols=colnames(dataframe))
1869 | 	{
1870 | 		# find NA in all rows/columns
1871 | 		filter = is.na(dataframe[ , cols])
1872 | 
1873 | 		# find rows where any column is NA
1874 | 		filter = apply(filter, 1, any)
1875 | 		filter = as.vector(filter)
1876 | 
1877 | 		# NA all columns in those rows
1878 | 		for (c in cols)
1879 | 		{
1880 | 			dataframe[filter, c] = NA
1881 | 		}
1882 | 
1883 | 		# return
1884 | 		dataframe
1885 | 	}
1886 | 
1887 | 
1888 | 
1889 | 	list_to_data_frame = function(l)
1890 | 	{
1891 | 		# we need stringsAsFactors to be false...
1892 | 		osaf = options('stringsAsFactors')
1893 | 		options(stringsAsFactors=FALSE)
1894 | 
1895 | 		# convert the list to the data frame
1896 | 		d = do.call(rbind.data.frame, l) # this but it requires options(stringsAsFactors=FALSE) first ...
1897 | 
1898 | 		# reset stringsAsFactors
1899 | 		options(stringsAsFactors=osaf[[1]][[1]])
1900 | 
1901 | 		d
1902 | 	}
1903 | 
1904 | 
1905 | 
1906 | 	# aggregate a vector or a data.frame into groups of a fixed size
1907 | 	# e.g. aggregate_n(my_data, 2) to average half-hourly to hourly
1908 | 	#
1909 | 	aggregate_n = function(values, n, FUN='mean')
1910 | 	{
1911 | 
1912 | 		# work out our counter
1913 | 		if (!is.data.frame(values))
1914 | 			y = seq_along(values)
1915 | 
1916 | 		if (is.data.frame(values))
1917 | 			y = seq_along(values[ , 1])
1918 | 
1919 | 		# aggregate our counter every n
1920 | 		y = floor( (y-1) / n )
1921 | 
1922 | 		# now aggregate our values
1923 | 		z = aggregate(values, by=list(y), FUN)
1924 | 
1925 | 		# return without the Group.1
1926 | 		z[ , -1]
1927 | 
1928 | 	}
1929 | 
1930 | 
1931 | 
1932 | 
1933 | ###################################################################################################################
1934 | ##################      TIME-SERIES      ##########################################################################
1935 | ###################################################################################################################
1936 | 
1937 | 	aggregate_yearly = function(dates, values, FUN='mean')		aggregate_ts(dates, values, 'yearly', FUN)
1938 | 	aggregate_monthly = function(dates, values, FUN='mean')		aggregate_ts(dates, values, 'monthly', FUN)
1939 | 	aggregate_weekly = function(dates, values, FUN='mean')		aggregate_ts(dates, values, 'weekly', FUN)
1940 | 	aggregate_daily = function(dates, values, FUN='mean')		aggregate_ts(dates, values, 'daily', FUN)
1941 | 	aggregate_hourly = function(dates, values, FUN='mean')		aggregate_ts(dates, values, 'hourly', FUN)
1942 | 
1943 | 	aggregate_ts = function(dates, values, by='yearly', FUN='mean')
1944 | 	{
1945 | 		# forbid failure
1946 | 		if (by %notin% c('daily', 'weekly', 'monthly', 'yearly', 'hourly', 'seasonal diurnal'))
1947 | 			stop("aggregate_ts: don't know how you want to aggregate things dear boy...")
1948 | 
1949 | 
1950 | 		# pre-process our dates into the relevant chunks
1951 | 		process_dates = function(dates, by)
1952 | 		{
1953 | 			if (by == 'yearly')  dates = format(dates, "%Y-01-01")
1954 | 			if (by == 'monthly') dates = format(dates, "%Y-%m-01")
1955 | 			if (by == 'weekly')  dates = round_date(dates, 'week')
1956 | 			if (by == 'daily')   dates = format(dates, "%Y-%m-%d")
1957 | 			if (by == 'hourly')  dates = format(dates, "%Y-%m-%d %H:00")
1958 | 			
1959 | 			if (by == 'MUSE') # this gives something appropriate for MUSE and other energy systems models quarterly and three hourly...
1960 | 			{
1961 | 				season = floor( ((month(dates) + 1) %% 12) / 3)
1962 | 				hour = 3 * floor(hour(dates) / 3)	
1963 | 				dates = sprintf("Y%4d Q%i H%02i", year(dates), season, hour)
1964 | 			}
1965 | 
1966 | 			dates
1967 | 		}
1968 | 
1969 | 
1970 | 		# we want to process a single time series
1971 | 		# dates = {.....}, values = {.....}
1972 | 		if (!is.data.frame(values))
1973 | 		{
1974 | 			my_dates = process_dates(dates, by)
1975 | 			return( aggregate_ts_once(my_dates, values, by, FUN) )
1976 | 		}
1977 | 
1978 | 		# we want to process a simple data frame
1979 | 		# dates = 'colname', values = data.frame(.....)
1980 | 		if (is.character(dates) & is.data.frame(values) & ncol(values) == 2)
1981 | 		{
1982 | 			my_dates = values[ , colnames(values) == dates]
1983 | 			my_dates = process_dates(my_dates, by)
1984 | 
1985 | 			my_values  = values[ , colnames(values) != dates]
1986 | 
1987 | 			return( aggregate_ts_once(my_dates, my_values, by, FUN) )
1988 | 		}
1989 | 
1990 | 		# we want to process a whole data frame
1991 | 		# dates = 'colname', values = data.frame(.....)
1992 | 		if (is.character(dates) & is.data.frame(values))
1993 | 		{
1994 | 			# strip out dates and data
1995 | 			my_dates = values[ , colnames(values) == dates]
1996 | 			my_values  = values[ , colnames(values) != dates]
1997 | 		}
1998 | 
1999 | 		# we want to process a data frame of pure data
2000 | 		if (!is.character(dates) & is.data.frame(values))
2001 | 		{
2002 | 			my_dates = dates
2003 | 			my_values = values
2004 | 		}
2005 | 
2006 | 		cols = colnames(my_values)
2007 | 		my_dates = process_dates(my_dates, by)
2008 | 
2009 | 		# process the first column, creating the output structure
2010 | 		c = cols[1]
2011 | 		results = aggregate_ts_once(my_dates, my_values[ , c], by, FUN)
2012 | 		colnames(results)[2] = c
2013 | 
2014 | 		# determine frequency of updates
2015 | 		i_tick = 1
2016 | 		if (nrow(values) < 10000) i_tick = 10
2017 | 		if (nrow(values) < 100)   i_tick = 100
2018 | 
2019 | 
2020 | 
2021 | 		# process subsequent columns
2022 | 		for (i in 2:length(cols))
2023 | 		{
2024 | 			c = cols[i]
2025 | 			res = aggregate_ts_once(my_dates, my_values[ , c], by, FUN, lube.dates=FALSE)
2026 | 			results[ , c] = res$value
2027 | 
2028 | 			if (i %% i_tick == 0) clear_line('Aggregating', i, 'of', length(cols))
2029 | 		}
2030 | 		clear_line()
2031 | 
2032 | 		return( results )
2033 | 	}
2034 | 
2035 | 
2036 | 	aggregate_ts_once = function(dates, values, by='yearly', FUN='mean', lube.dates=TRUE)
2037 | 	{
2038 | 		# do something different and special if we want a seasonal-diurnal aggregation
2039 | 		if (by == 'seasonal diurnal')
2040 | 		{
2041 | 			hour = hour(dates)
2042 | 			month = month(dates)
2043 | 
2044 | 			season = floor( (month %% 12) / 3)
2045 | 
2046 | 			results = aggregate(values, by=list(hour, season), FUN=mean, na.rm=TRUE)
2047 | 			results = data.frame(array(results$x, dim=c(24,4)))
2048 | 			colnames(results) = c('winter', 'spring', 'summer', 'autumn')
2049 | 
2050 | 			# this hands back a custom format, so skip the rest
2051 | 			return(	results )
2052 | 		}
2053 | 
2054 | 		# aggregate our data as desired
2055 | 		results = aggregate(values, by=list(dates), FUN=FUN)
2056 | 
2057 | 		# convert to a nice format
2058 | 		colnames(results) = c('date', 'value')
2059 | 
2060 | 		if (lube.dates)
2061 | 		{
2062 | 			if (by != 'hourly') results$date = ymd(results$date)
2063 | 			if (by == 'hourly') results$date = ymd_hm(results$date)
2064 | 		}
2065 | 
2066 | 		return( results )
2067 | 	}
2068 | 
2069 | 
2070 | 
2071 | 
2072 | 	# yea boy 
2073 | 	seasonal_diurnal = function(dates, values, FUN='mean')		aggregate_ts(dates, values, 'seasonal diurnal', FUN)
2074 | 
2075 | 	# a function to help with plotting diurnal data
2076 | 	plot_diurnal = function(diurnal, ...)
2077 | 	{
2078 | 		good_graphics(padding=TRUE)
2079 | 
2080 | 		range = range(diurnal*0.98, diurnal*1.02)
2081 | 
2082 | 		 plot(0:23, diurnal$summer, col='red', type='l', xlab="Hour", ylab="CF", ylim=range, xaxt='n', ...)
2083 | 		lines(0:23, diurnal$spring, col='green')
2084 | 		lines(0:23, diurnal$autumn, col='brown')
2085 | 		lines(0:23, diurnal$winter, col='blue')
2086 | 
2087 | 		axis(1, at=seq(0,24,6))
2088 | 		axis(1, at=seq(0,24,3), labels=FALSE)
2089 | 
2090 | 		axis(3, at=seq(0,24,3), labels=FALSE)
2091 | 
2092 | 		axis(4, labels=FALSE)
2093 | 
2094 | 	}
2095 | 
2096 | 
2097 | 
2098 | 	# additional lubridate functions because I am exceptionally lazy
2099 | 	dmy_hs = function(string) { dmy_hms(paste(string, '00', sep=':')) }
2100 | 	ymd_hs = function(string) { ymd_hms(paste(string, '00', sep=':')) }
2101 | 
2102 | 
2103 | 
2104 | 
2105 | 
2106 | 
2107 | 	# lag a vector of data by a specified number of (non-integer) steps
2108 | 	# i.e. lagging by one will turn [4,1,2] into [ ,4,1]
2109 | 	# you can lag by fractional amounts, which uses linear interpolation
2110 | 	lag_data = function(data, steps)
2111 | 	{
2112 | 		while (steps >= 1)
2113 | 		{
2114 | 			# lag by 1
2115 | 			data = c(NA, head(data, -1))
2116 | 			steps = steps - 1
2117 | 		}
2118 | 
2119 | 		if (steps > 0)
2120 | 		{
2121 | 			# lag by a fraction
2122 | 			data = c(NA, head(data, -1) + diff(data)*steps)
2123 | 		}
2124 | 
2125 | 		data
2126 | 	}
2127 | 
2128 | 	# opposite of lagging data
2129 | 	lead_data = function(data, steps)
2130 | 	{
2131 | 		data = rev(data)
2132 | 		data = lag_data(data, steps)
2133 | 		rev(data)
2134 | 	}
2135 | 
2136 | 
2137 | 
2138 | 
2139 | 	#
2140 | 	#  align two data frames based on the values of the given columns
2141 | 	#  e.g. you have two sets of time-series data, and you want them aligned in time
2142 | 	#       so that sim$date == act$date
2143 | 	#       
2144 | 	#       res = align_date(sim, 'date', act, 'date')
2145 | 	#       sim = res[[1]]; act = res[[2]];
2146 | 	#
2147 | 	align_data = function(df1, col1, df2, col2)
2148 | 	{
2149 | 		# shrink
2150 | 		df1 = df1[ (df1[ , col1] %in% df2[ , col2]), ]
2151 | 		df2 = df2[ (df2[ , col2] %in% df1[ , col1]), ]
2152 | 
2153 | 		# align
2154 | 		df1 = df1[ match(df2[ , col2], df1[ , col1]), ]
2155 | 		df2 = df2[ match(df1[ , col1], df2[ , col2]), ]
2156 | 
2157 | 		list(df1, df2)
2158 | 	}
2159 | 
2160 | 
2161 | 
2162 | 	# search through the list of lubridate's timezones
2163 | 	# e.g. tz_search('Paris')
2164 | 	# 
2165 | 	tz_search = function(str)
2166 | 	{
2167 | 		tz = olson_time_zones()
2168 | 		tz[ grep(str, tz) ]
2169 | 	}
2170 | 
2171 | 
2172 | 	# calculate the shift in hours needed to move to that time-zone
2173 | 	# this accounts for daylight savings time, and works on a series of lubridates
2174 | 	#	
2175 | 	tz_shift = function(t, zone)
2176 | 	{
2177 | 		z = force_tz(t, zone)
2178 | 		(t - z) / dhours(1)
2179 | 	}
2180 | 
2181 | 	# e.g. 
2182 | 	# t = seq(dmy('01/01/2013'), dmy('01/01/2015'), by='hour')
2183 | 	# shift = tz_shift(t, 'Europe/Paris')
2184 | 	# plot(shift)
2185 | 
2186 | 
2187 | 
2188 | 	# x is your data frame
2189 | 	# datecol is the column where the dates lives
2190 | 	# period can be day, hour, or anythingelse that seq() accepts
2191 | 	# fill optionally contains what you'd like your missing timesteps to contain (e.g. 0)
2192 | 	fill_missing_dates = function(x, datecol, period='day', fill=NA)
2193 | 	{
2194 | 		min = min(x[ , datecol])
2195 | 		max = max(x[ , datecol])
2196 | 
2197 | 		all_dates = seq(from=min, to=max, by=period)
2198 | 
2199 | 		m = match(all_dates, x[ , datecol])
2200 | 		f = is.na(m)
2201 | 
2202 | 		all_dates = data.frame(all_dates)
2203 | 
2204 | 		for (c in 2:ncol(x))
2205 | 		{
2206 | 			all_dates[ , c] = x[m, c]
2207 | 			all_dates[f, c] = fill
2208 | 		}
2209 | 
2210 | 		colnames(all_dates) = colnames(x)
2211 | 		all_dates
2212 | 	}
2213 | 
2214 | 
2215 | 
2216 | 
2217 | ###################################################################################################################
2218 | ##################      STATISTICS      ###########################################################################
2219 | ###################################################################################################################
2220 | 
2221 | 
2222 | 	# calculate the mode
2223 | 	mode = function(x)
2224 | 	{
2225 | 		ux = unique(x)
2226 | 		ux[which.max(tabulate(match(x, ux)))]
2227 | 	}
2228 | 
2229 | 
2230 | 	# already exists in the base package
2231 | 	weighted_mean = function(x, w)
2232 | 	{
2233 | 		weighted.mean(x, w)
2234 | 	}
2235 | 
2236 | 
2237 | 	# calculate weighted standard deviation
2238 | 	weighted_sd = function(x, w)
2239 | 	{
2240 | 		w = w / sum(w)
2241 | 		mu = weighted.mean(x, w)
2242 | 		sd = sqrt( sum(w * (x - mu)^2) )
2243 | 		return(sd)
2244 | 	}
2245 | 
2246 | 
2247 | 	# calculate standard error on the mean
2248 | 	stderr = function(x) { sqrt(var(x, na.rm=TRUE) / length(na.omit(x))) }
2249 | 
2250 | 
2251 | 	# function to calculate 95% confidence interval for a normally distributed variable with unknown mean and standard deviation
2252 | 	ci95 = function(x)
2253 | 	{
2254 | 		x = na.omit(x)
2255 | 		n = length(x)
2256 | 
2257 | 		return( qt(0.975, df=(n-1)) * sd(x) / sqrt(length(x)) )
2258 | 	}
2259 | 
2260 | 
2261 | 	# return the adjusted R² value from an lm() fit
2262 | 	r2 = function(fit)
2263 | 	{
2264 | 		summary(fit)$adj.r.squared
2265 | 	}
2266 | 
2267 | 
2268 | 	# return the coefficients and errors of a regression as a single row
2269 | 	coef.err = function(fit)
2270 | 	{
2271 | 		coef = summary(fit)$coefficients[ , 1]
2272 | 		err = summary(fit)$coefficients[ , 2]
2273 | 
2274 | 		cbind(t(coef), t(err))
2275 | 	}
2276 | 
2277 | 	lm.coef.err = function(formula)
2278 | 	{
2279 | 		fit = lm(formula)
2280 | 		coef.err(fit)
2281 | 	}
2282 | 
2283 | 
2284 | 	# calculate root mean squared
2285 | 	rms = function(x)
2286 | 	{
2287 | 		sqrt( mean( x^2, na.rm=TRUE) )
2288 | 	}
2289 | 
2290 | 
2291 | 	# rescale a vector to be in the range of [min, max]
2292 | 	rescale = function(data, min=0, max=1)
2293 | 	{
2294 | 		# rescale to [0, 1]
2295 | 		data = (data - min(data)) / diff(range(data))
2296 | 
2297 | 		# rescale to our limits
2298 | 		data = min + (data * (max - min))
2299 | 
2300 | 		# return
2301 | 		data
2302 | 	}
2303 | 
2304 | 
2305 | 	## generate a sequence of random numbers following a given distribution
2306 | 	## but, constrain the distribution to lie within set bounds (min to max)
2307 | 	## any numbers lying outside those bounds will be re-evaluated
2308 | 	## 
2309 | 	## e.g. constrained_distro('rnorm', 1000, 1, 2, 0, 1)
2310 | 	##
2311 | 	constrained_distro = function(distro, length, min, max, ...)
2312 | 	{
2313 | 		# e.g. rnorm(length, parm1, parm2)
2314 | 		x = do.call(distro, list(length, ...) )
2315 | 
2316 | 		while (1)
2317 | 		{
2318 | 			filter = (x < min | x > max)
2319 | 			if (sum(filter) == 0) break
2320 | 			x[filter] = do.call(distro, list(sum(filter), ...) )
2321 | 		}
2322 | 
2323 | 		x
2324 | 	}
2325 | 
2326 | 
2327 | 
2328 | 


--------------------------------------------------------------------------------
/power_curves/LICENSE:
--------------------------------------------------------------------------------
1 | Contents of this folder are excluded from the BSD 3 Clause license and remain the property of the respective copyright holders.
2 | 


--------------------------------------------------------------------------------