├── README.md
└── rplot.R


/README.md:
--------------------------------------------------------------------------------
  1 | ### Example usage
  2 | 
  3 | 	# Get the script
  4 | 	curl https://raw.githubusercontent.com/geotheory/r-plot/master/rplot.R > rplot.R
  5 | 
  6 | 	# Help
  7 | 	Rscript rplot.R -h | more
  8 | 
  9 | 	# Get some data
 10 | 	Rscript -e "write.csv(mtcars, 'mtcars.csv')"; head -6 mtcars.csv
 11 | 
 12 | 	# Scatterplot - car weight vs fuel efficiency (with correlation)
 13 | 	Rscript rplot.R mtcars.csv wt mpg -R
 14 | 
 15 | 	# Hashplot - car fuel efficiencies
 16 | 	Rscript rplot.R mtcars.csv 1 mpg
 17 | 
 18 | 	# Ordering plot data
 19 | 	Rscript rplot.R mtcars.csv 1 mpg -o
 20 | 
 21 | 	# bash function (e.g. for .bash_profile)
 22 | 	rplot() { Rscript rplot.R "$1" ${*:2}; }
 23 | 
 24 | 	# Iris dataset
 25 | 	Rscript -e "write.csv(iris, 'iris.csv')"; head -6 iris.csv
 26 | 
 27 | 	# Aggregating data for categorical averages
 28 | 	rplot iris.csv Species Sepal.Length -am
 29 | 
 30 | 	# Plotting a single numeric variable
 31 | 	rplot iris.csv Sepal.Width
 32 | 
 33 | 	# Reordered and without summary
 34 | 	rplot iris.csv Sepal.Width -ox
 35 | 
 36 | 	# Change scatterplot size
 37 | 	rplot iris.csv Sepal.Width -ox -r 40 -c 80
 38 | 
 39 | 	# Histogram of a single numeric variable (20 bins)
 40 | 	rplot iris.csv Sepal.Width -F -b 20
 41 | 
 42 | 	# Single categorical variable frequency (ie. aggregate by length)
 43 | 	rplot iris.csv Species -al
 44 | 
 45 | 	# Other data formats (eg. semicolon-seperated)
 46 | 	Rscript -e "print(names(airquality)); write.table(airquality, 'airquality.csv', sep=';', row.names=T, col.names=F)"; head -3 airquality.csv
 47 | 
 48 | 	# Specifying seperating-character, no-header row, and fields by column index (eg. Ozone 'V2' and Temp 'V5')
 49 | 	rplot airquality.csv 2 5 -ns ";"
 50 | 
 51 | 	# Passing r-plot a bash text object instead of file
 52 | 	rplot "$(cat mtcars.csv)" mpg disp -x
 53 | 
 54 | 	# Just view aggregated data without plot
 55 | 	rplot mtcars.csv gear mpg -HamzQ
 56 | 
 57 | 	# Useful functions to report header row - e.g. `headz iris.csv ";"` (specify delimiter if not comma)
 58 | 	
 59 | 	headx() { Rscript -e "l<-scan(text='$(head -1 $1)',what='character',sep=ifelse('$2'=='',',','$2'),quiet=T); for(i in 1:length(l)) cat(i,l[i],'\\\n')"; }
 60 | 
 61 | 	headz() { Rscript -e "a=commandArgs(T);l=read.table(text=a[1],se=ifelse('$2'=='',',','$2'),strin=F,h=T);for(i in 1:ncol(l)){f=l[,i];cat(i,' ',names(l)[i],' <',class(f),'> ',strtrim(paste(f,collapse=', '),min(80,as.integer(system('tput cols',int=T)))),'\\\n',sep='')}" "$(head -60 $1)"; }
 62 | 
 63 | -----------------------------------------------
 64 | 
 65 | ### Output from above
 66 | 
 67 | 	$ curl https://raw.githubusercontent.com/geotheory/r-plot/master/rplot.R > rplot.R
 68 | 	  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
 69 | 	                                 Dload  Upload   Total   Spent    Left  Speed
 70 | 	100 13386  100 13386    0     0   5739      0  0:00:02  0:00:02 --:--:--  5740
 71 | 
 72 | 	$ # Help
 73 | 
 74 | 	$ Rscript rplot.R -h | more
 75 | 
 76 | 	**********************
 77 | 	******* R-PLOT *******
 78 | 	**********************
 79 | 
 80 | 	This library plots a scatterplot or hashbar plot (bars made of hashes!) of a csv or a similarly formatted
 81 | 	file or string in your console. If 2 numeric id_fields are provided a scatterplot will default, else hashbars.
 82 | 	Required arguments: csv file/string, then column name(s)/index(ices) (values-column last for hashbars)
 83 | 
 84 | 	NB read.table check.names=T so e.g. numeric colnames prepend 'X' and those with spaces have spaces replaced by '.'.
 85 | 	Use '-Pz | head' to suppress the plot and see the colnames that are read in..
 86 | 
 87 | 	USAGE
 88 | 	Example csv call - scatterplot (by column name):
 89 | 	    "Rscript rplot.R file.csv num_field1 num_field2"
 90 | 	Example csv call - scatterplot (by column index):
 91 | 	    "Rscript rplot.R file.csv col#1 col#3"
 92 | 	Example csv call - hashbar plot:
 93 | 	    "Rscript rplot.R file.csv id_field1 id_field2 value_field"
 94 | 	Example csv text string call:
 95 | 	    "Rscript rplot.R "$(cat file.csv)" id_field value_field"
 96 | 	For convenience you can set up an alias in .bash_profile or equivalent, e.g.
 97 | 	    "rplot() { Rscript /pathto/rplot.R "$1" ${*:2}; }"
 98 | 	and call with:
 99 | 	    "rplot file.csv field1 field2 etc.."
100 | 
101 | 	OPTIONS:
102 | 	  Data handling:
103 | 	    -n   Specify no header row for input data. Use col indices instead
104 | 	    -s   sep character for input data (default `,`). Requires value e.g. ";"  "\t"  "" (inc. quotes)
105 | 	    -a   Aggregate (default `sum`) a hashbar plot data by its categorical variables
106 | 	    -m   Aggregate by `mean` if `-a` selected
107 | 	    -l   Aggregate by `length` (count instances) if `-a` selected
108 | 	  Plotting:
109 | 	    -o   Reorder hashbar chart by value (also reorders data.frames)
110 | 	    -H   Override a default scatterplot with hashbar plot
111 | 	    -S   Override a default hashbar plot with scatterplot (NA values are removed)
112 | 	    -r   Scatterplot rows/height (default 20). Requires following value.
113 | 	    -c   Scatterplot cols/width (default 50). Requires following value.
114 | 	    -p   pch char (defaults: `#` hashbars, `*` scatterplots without overplotting,
115 | 	         `. : ■ █` scatterplots with o/p). Requires 1 char eg. `-p "."` (eg. with -y)
116 | 	         or a 4 char string eg. ".°*@" to change overplot symbols (inc. quotes)
117 | 	    -R   Add r2 correlation (bivariate only))
118 | 	    -x   Suppress summary in case of scatterplot
119 | 	    -y   Suppress scatterplot point symbols (that show overplotting)
120 | 	    -z   Suppress plot (eg. use with -P or -Q)
121 | 	  Other:
122 | 	    -h   Call this help (also --help)
123 | 	    -P   Output raw data.frame to console (truncated 1000 rows)
124 | 	    -Q   Output processed data.frame to console (truncated 1000 rows)
125 | 
126 | 	^[[B
127 | 	$ # Get some data
128 | 
129 | 	$ Rscript -e "write.csv(mtcars, 'mtcars.csv')"; head -6 mtcars.csv
130 | 	"","mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb"
131 | 	"Mazda RX4",21,6,160,110,3.9,2.62,16.46,0,1,4,4
132 | 	"Mazda RX4 Wag",21,6,160,110,3.9,2.875,17.02,0,1,4,4
133 | 	"Datsun 710",22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
134 | 	"Hornet 4 Drive",21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
135 | 	"Hornet Sportabout",18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
136 | 
137 | 	$ # Scatterplot - car weight vs fuel efficiency (with correlation)
138 | 
139 | 	$ Rscript rplot.R mtcars.csv wt mpg -R
140 | 	 __________________________________________________
141 | 	|    .                                             |  Points
142 | 	|        .                                         |  .  1  (x̄ 1.0)
143 | 	|..                                                |  :  2  (x̄ 2.0)
144 | 	|                                                  |
145 | 	|                                                  |
146 | 	|     .                                            |
147 | 	|       .                                          |
148 | 	|                     .                            |
149 | 	|          .         .                             |
150 | 	|                                                  |  mpg
151 | 	|           . . . .   .                            |
152 | 	|               .        .    .                    |
153 | 	|                        :                         |
154 | 	|                        .  .                      |
155 | 	|                    .    .      .                 |
156 | 	|                        .:  .                   . |
157 | 	|                             .                    |
158 | 	|                                                  |
159 | 	|                                                  |
160 | 	|                                              .  .|  R²=-0.868
161 | 	|__________________________________________________|
162 | 	                         wt
163 | 	32 data rows plotted
164 | 	       wt             mpg
165 | 	 Min.   :1.513   Min.   :10.40
166 | 	 1st Qu.:2.581   1st Qu.:15.43
167 | 	 Median :3.325   Median :19.20
168 | 	 Mean   :3.217   Mean   :20.09
169 | 	 3rd Qu.:3.610   3rd Qu.:22.80
170 | 	 Max.   :5.424   Max.   :33.90
171 | 
172 | 	$ # Hashplot - car fuel efficiencies
173 | 
174 | 	$ Rscript rplot.R mtcars.csv 1 mpg
175 | 	32 data rows plotted
176 | 	X                     mpg
177 | 	Mazda RX4             21     ##########################################
178 | 	Mazda RX4 Wag         21     ##########################################
179 | 	Datsun 710            22.8   ##############################################
180 | 	Hornet 4 Drive        21.4   ###########################################
181 | 	Hornet Sportabout     18.7   ######################################
182 | 	Valiant               18.1   ####################################
183 | 	Duster 360            14.3   #############################
184 | 	Merc 240D             24.4   #################################################
185 | 	Merc 230              22.8   ##############################################
186 | 	Merc 280              19.2   #######################################
187 | 	Merc 280C             17.8   ####################################
188 | 	Merc 450SE            16.4   #################################
189 | 	Merc 450SL            17.3   ###################################
190 | 	Merc 450SLC           15.2   ##############################
191 | 	Cadillac Fleetwood    10.4   #####################
192 | 	Lincoln Continental   10.4   #####################
193 | 	Chrysler Imperial     14.7   #############################
194 | 	Fiat 128              32.4   #################################################################
195 | 	Honda Civic           30.4   #############################################################
196 | 	Toyota Corolla        33.9   #####################################################################
197 | 	Toyota Corona         21.5   ###########################################
198 | 	Dodge Challenger      15.5   ###############################
199 | 	AMC Javelin           15.2   ##############################
200 | 	Camaro Z28            13.3   ###########################
201 | 	Pontiac Firebird      19.2   #######################################
202 | 	Fiat X1-9             27.3   #######################################################
203 | 	Porsche 914-2         26     ####################################################
204 | 	Lotus Europa          30.4   #############################################################
205 | 	Ford Pantera L        15.8   ################################
206 | 	Ferrari Dino          19.7   ########################################
207 | 	Maserati Bora         15     ##############################
208 | 	Volvo 142E            21.4   ###########################################
209 | 
210 | 	$ # Ordering plot data
211 | 
212 | 	$ Rscript rplot.R mtcars.csv 1 mpg -o
213 | 	32 data rows plotted
214 | 	X                     mpg
215 | 	Toyota Corolla        33.9   #####################################################################
216 | 	Fiat 128              32.4   #################################################################
217 | 	Honda Civic           30.4   #############################################################
218 | 	Lotus Europa          30.4   #############################################################
219 | 	Fiat X1-9             27.3   #######################################################
220 | 	Porsche 914-2         26     ####################################################
221 | 	Merc 240D             24.4   #################################################
222 | 	Datsun 710            22.8   ##############################################
223 | 	Merc 230              22.8   ##############################################
224 | 	Toyota Corona         21.5   ###########################################
225 | 	Hornet 4 Drive        21.4   ###########################################
226 | 	Volvo 142E            21.4   ###########################################
227 | 	Mazda RX4             21     ##########################################
228 | 	Mazda RX4 Wag         21     ##########################################
229 | 	Ferrari Dino          19.7   ########################################
230 | 	Merc 280              19.2   #######################################
231 | 	Pontiac Firebird      19.2   #######################################
232 | 	Hornet Sportabout     18.7   ######################################
233 | 	Valiant               18.1   ####################################
234 | 	Merc 280C             17.8   ####################################
235 | 	Merc 450SL            17.3   ###################################
236 | 	Merc 450SE            16.4   #################################
237 | 	Ford Pantera L        15.8   ################################
238 | 	Dodge Challenger      15.5   ###############################
239 | 	Merc 450SLC           15.2   ##############################
240 | 	AMC Javelin           15.2   ##############################
241 | 	Maserati Bora         15     ##############################
242 | 	Chrysler Imperial     14.7   #############################
243 | 	Duster 360            14.3   #############################
244 | 	Camaro Z28            13.3   ###########################
245 | 	Cadillac Fleetwood    10.4   #####################
246 | 	Lincoln Continental   10.4   #####################
247 | 
248 | 	$ # bash function (e.g. for .bash_profile)
249 | 
250 | 	$ rplot() { Rscript rplot.R "$1" ${*:2}; }
251 | 
252 | 	$ # Iris dataset
253 | 
254 | 	$ Rscript -e "write.csv(iris, 'iris.csv')"; head -6 iris.csv
255 | 	"","Sepal.Length","Sepal.Width","Petal.Length","Petal.Width","Species"
256 | 	"1",5.1,3.5,1.4,0.2,"setosa"
257 | 	"2",4.9,3,1.4,0.2,"setosa"
258 | 	"3",4.7,3.2,1.3,0.2,"setosa"
259 | 	"4",4.6,3.1,1.5,0.2,"setosa"
260 | 	"5",5,3.6,1.4,0.2,"setosa"
261 | 
262 | 	$ # Aggregating data for categorical averages
263 | 
264 | 	$ rplot iris.csv Species Sepal.Length -am
265 | 	Aggregate function is mean
266 | 	150 data rows plotted
267 | 	Species      Sepal.Length
268 | 	setosa       5.01           #####################################################
269 | 	versicolor   5.94           ###############################################################
270 | 	virginica    6.59           ######################################################################
271 | 
272 | 	$ # Plotting a single numeric variable
273 | 
274 | 	$ rplot iris.csv Sepal.Width
275 | 	 __________________________________________________
276 | 	|    .                                             |  Points
277 | 	|          .                                       |  .  1  (x̄ 1.0)
278 | 	|          .                                       |  :  2  (x̄ 2.0)
279 | 	| .  ..                                            |  ■  3  (x̄ 3.0)
280 | 	|     ..       ..                      .    .      |
281 | 	|   .  .        .                                  |
282 | 	| .     .    .                      .              |
283 | 	|.... ...:......            .                .   . |
284 | 	|       .        . .             .       .      .  |
285 | 	|.        . . . .:      .            .. . .     .  |  Sepal.Width
286 | 	|. .      . .     .   .      .                ::   |
287 | 	|. ...   .   . .    .::  :: ... ■ ::.. .  .. .. ...|
288 | 	|                 ..    .:       .    . ...::      |
289 | 	|                   .  .   ..  .  .  .   .     .   |
290 | 	|                         .   ..       .     .     |
291 | 	|                  .   ..  :  .  . .. .          . |
292 | 	|             .   .          . .                   |
293 | 	|                    . .                .          |
294 | 	|                                                  |
295 | 	|                   .                              |
296 | 	|__________________________________________________|
297 | 	                       Index
298 | 	150 data rows plotted
299 | 	     Index         Sepal.Width
300 | 	 Min.   :  1.00   Min.   :2.000
301 | 	 1st Qu.: 38.25   1st Qu.:2.800
302 | 	 Median : 75.50   Median :3.000
303 | 	 Mean   : 75.50   Mean   :3.057
304 | 	 3rd Qu.:112.75   3rd Qu.:3.300
305 | 	 Max.   :150.00   Max.   :4.400
306 | 
307 | 	$ # Reordered and without summary
308 | 
309 | 	$ rplot iris.csv Sepal.Width -ox
310 | 	 __________________________________________________
311 | 	|                                                 .|  Points
312 | 	|                                                . |  .  1  (x̄ 1.0)
313 | 	|                                                . |  :  2  (x̄ 2.0)
314 | 	|                                               :. |  ■  3  (x̄ 3.0)
315 | 	|                                             :■.  |  █  4  (x̄ 4.0)
316 | 	|                                            :.    |
317 | 	|                                           ■.     |
318 | 	|                                     ■■■■■■       |
319 | 	|                                   ■■             |
320 | 	|                              .■■■■               |  Sepal.Width
321 | 	|                           ■■■:                   |
322 | 	|               :■■■■■■■■█■■                       |
323 | 	|          .■■■■.                                  |
324 | 	|       .■■:                                       |
325 | 	|      ■:                                          |
326 | 	|  :■■■                                            |
327 | 	| ■.                                               |
328 | 	|■                                                 |
329 | 	|                                                  |
330 | 	|.                                                 |
331 | 	|__________________________________________________|
332 | 	                       Index
333 | 
334 | 	$ # Change scatterplot size
335 | 
336 | 	$ rplot iris.csv Sepal.Width -ox -r 40 -c 80
337 | 	 ________________________________________________________________________________
338 | 	|                                                                               .|  Points
339 | 	|                                                                                |  .  1  (x̄ 1.0)
340 | 	|                                                                                |  :  2  (x̄ 2.0)
341 | 	|                                                                              . |
342 | 	|                                                                             .  |
343 | 	|                                                                                |
344 | 	|                                                                             .  |
345 | 	|                                                                                |
346 | 	|                                                                            :   |
347 | 	|                                                                         :::    |
348 | 	|                                                                                |
349 | 	|                                                                       .:       |
350 | 	|                                                                                |
351 | 	|                                                                     :..        |
352 | 	|                                                                  :::           |
353 | 	|                                                                                |
354 | 	|                                                           .:.::::              |
355 | 	|                                                        .::.                    |
356 | 	|                                                                                |
357 | 	|                                                 .::.:::.                       |  Sepal.Width
358 | 	|                                                                                |
359 | 	|                                            :::::.                              |
360 | 	|                              :::::.:::::::.                                    |
361 | 	|                                                                                |
362 | 	|                        .:.:::                                                  |
363 | 	|                                                                                |
364 | 	|                 .::::::.                                                       |
365 | 	|            .::::                                                               |
366 | 	|                                                                                |
367 | 	|          ::.                                                                   |
368 | 	|     .::.:                                                                      |
369 | 	|                                                                                |
370 | 	|    :.                                                                          |
371 | 	|                                                                                |
372 | 	|  ::                                                                            |
373 | 	|.:                                                                              |
374 | 	|                                                                                |
375 | 	|                                                                                |
376 | 	|                                                                                |
377 | 	|.                                                                               |
378 | 	|________________________________________________________________________________|
379 | 	                                      Index
380 | 
381 | 	$ # Histogram of a single numeric variable (20 bins)
382 | 
383 | 	$ rplot iris.csv Sepal.Width -F -b 20
384 | 	150 data rows plotted
385 | 	Sepal.Width   frequency
386 | 	-- 2 --       1           ##
387 | 	              3           ######
388 | 	              4           ########
389 | 	-- 2.5 --     11          ######################
390 | 	              5           ##########
391 | 	              9           ##################
392 | 	              14          ############################
393 | 	-- 3 --       36          ########################################################################
394 | 	              11          ######################
395 | 	              13          ##########################
396 | 	              6           ############
397 | 	-- 3.5 --     18          ####################################
398 | 	              4           ########
399 | 	              3           ######
400 | 	              6           ############
401 | 	-- 4 --       3           ######
402 | 	              1           ##
403 | 	              1           ##
404 | 	              1           ##
405 | 
406 | 	$ # Single categorical variable frequency (ie. aggregate by length)
407 | 
408 | 	$ rplot iris.csv Species -al
409 | 	Aggregate function is length
410 | 	150 data rows plotted
411 | 	Species      length
412 | 	setosa       50       ############################################################################
413 | 	versicolor   50       ############################################################################
414 | 	virginica    50       ############################################################################
415 | 
416 | 	$ # Other data formats (eg. semicolon-seperated)
417 | 
418 | 	$ Rscript -e "print(names(airquality)); write.table(airquality, 'airquality.csv', sep=';', row.names=T, col.names=F)"; head -3 airquality.csv
419 | 	[1] "Ozone"   "Solar.R" "Wind"    "Temp"    "Month"   "Day"
420 | 	"1";41;190;7.4;67;5;1
421 | 	"2";36;118;8;72;5;2
422 | 	"3";12;149;12.6;74;5;3
423 | 
424 | 	$ # Specifying seperating-character, no-header row, and fields by column index (eg. Ozone 'V2' and Temp 'V5')
425 | 
426 | 	$ rplot airquality.csv 2 5 -ns ";"
427 | 	 __________________________________________________
428 | 	|                      . .                         |  Points
429 | 	|                     .  . .       .               |  .  1  (x̄ 1.0)
430 | 	|                      .    .:                     |  :  2  (x̄ 2.0)
431 | 	|                    .   ..     .   .              |  ■  3  (x̄ 3.0)
432 | 	|           . .     .  :.                          |
433 | 	|         .  . ■   .  ...       .                  |
434 | 	|         . .     .:                    .          |
435 | 	|  . :..:.:..:.   .                               .|
436 | 	| .   .      .    ..              .                |
437 | 	|   :.■■.. . ..                                    |  V5
438 | 	| . :..                                            |
439 | 	|  ■.  .   :                                       |
440 | 	|  ..  . .                                         |
441 | 	| . .... .                                         |
442 | 	|  . . .. . .                                      |
443 | 	|   : ..                                           |
444 | 	|  ...                                             |
445 | 	|. .      .                                        |
446 | 	|.   ..                                            |
447 | 	| .                                                |
448 | 	|__________________________________________________|
449 | 	                         V2
450 | 	116 data rows plotted. 37 rows with NA values omitted
451 | 	       V2               V5
452 | 	 Min.   :  1.00   Min.   :57.00
453 | 	 1st Qu.: 18.00   1st Qu.:71.00
454 | 	 Median : 31.50   Median :79.00
455 | 	 Mean   : 42.13   Mean   :77.87
456 | 	 3rd Qu.: 63.25   3rd Qu.:85.00
457 | 	 Max.   :168.00   Max.   :97.00
458 | 
459 | 	$ # Passing r-plot a bash text object instead of file
460 | 
461 | 	$ rplot "$(cat mtcars.csv)" mpg disp -x
462 | 	 __________________________________________________
463 | 	|:                                                 |  Points
464 | 	|        .                                         |  .  1  (x̄ 1.0)
465 | 	|                                                  |  :  2  (x̄ 2.0)
466 | 	|                  .                               |
467 | 	|                                                  |
468 | 	|      . .  .     .                                |
469 | 	|                                                  |
470 | 	|          :                                       |
471 | 	|         .                                        |
472 | 	|          . . .                                   |  disp
473 | 	|                      .                           |
474 | 	|                .                                 |
475 | 	|                                                  |
476 | 	|                                                  |
477 | 	|               .  .   :                           |
478 | 	|                   .     .   .                    |
479 | 	|                      ..        .                 |
480 | 	|                         .               .        |
481 | 	|                                   .     .   .    |
482 | 	|                                                 .|
483 | 	|__________________________________________________|
484 | 	                        mpg
485 | 
486 | 	$ # Just view aggregated data without plot
487 | 
488 | 	$ rplot mtcars.csv gear mpg -HamzQ
489 | 	Aggregate function is mean
490 | 	  gear      mpg
491 | 	1    3 16.10667
492 | 	2    4 24.53333
493 | 	3    5 21.38000
494 | 	32 data rows plotted
495 | 
496 | 


--------------------------------------------------------------------------------
/rplot.R:
--------------------------------------------------------------------------------
  1 | # A small library for rendering bar or scatterplots of csv data to console using ascii characters.
  2 | # Call `Rscript rplot.R -h` for instructions..
  3 | # by @geotheory | geotheory.co.uk 2016
  4 | 
  5 | # manage input arguments
  6 | args_in = commandArgs(trailingOnly=T)
  7 | 
  8 | # arguments that require a following value (e.g. "-p '+'")
  9 | pars = list(sep=c('-s',','), quote=c('-q',"\"'"), pch=c('-p','*'), x=c('-c',50), 
 10 |   y=c('-r',20), bins=c('-b',15), X=c('-X','%'), size=c('-d',NA), asp=c('-A',1))
 11 | 
 12 | # split up combined arguments (e.g. '-am' for aggregate by mean)
 13 | args = c(unlist(sapply(args_in, function(a) {
 14 |     if(substr(a,1,1)=='-') return(paste0('-',strsplit(substr(a,2,100),'')[[1]])) else a
 15 |   }), use.names = F), '--') #  '--' added in case -A included without an argument
 16 | 
 17 | # update pars argument update received
 18 | for(i in 1:length(pars)) {
 19 |   p = pars[[i]]
 20 |   if(p[1] %in% args) {
 21 |     n = match(p[1], args) + 1
 22 |     if(substr(args[n],1,1) != '-') {
 23 |       if(p[1] == '-s') {
 24 |         if(is.na(args[n])) args[n] = ""       # whitespace seperator
 25 |         if(args[n] == "\\t") args[n] = "\t"   # string to tab char
 26 |       }
 27 |       pars[[i]][2] = args[n]
 28 |       args = args[-n]
 29 |     }
 30 |   }
 31 | }
 32 | 
 33 | plot_args = args[substr(args, 1, 1) == '-']
 34 | field_args = args[!substr(args, 1, 1) == '-']
 35 | if('-A' %in% plot_args) if(is.na(pars$asp[2])) pars$asp[2] = 1 # when no argument provided
 36 | 
 37 | if(any(c('-h','--help') %in% args_in | '-h' %in% plot_args)) {
 38 |   cat("**********************
 39 | ******  R-PLOT  ******
 40 | **********************
 41 | 
 42 | This library plots a scatterplot or hashbar plot (bars made of hashes!) of a csv or a similarly formatted
 43 | file or string in your console. If 2 numeric id_fields are provided a scatterplot will default, else hashbars.
 44 | Required arguments: csv file/string, then column name(s)/index(ices) (values-column last for hashbars)
 45 | 
 46 | NB read.table check.names=T so e.g. numeric colnames prepend 'X' and those with spaces have spaces replaced by '.'.
 47 | Use '-Pz | head' or '-O' to suppress plots and investigate data read-in.
 48 | 
 49 | USAGE
 50 | Example csv call - scatterplot (by column name):
 51 |     \"Rscript rplot.R file.csv num_field1 num_field2\"
 52 | Example csv call - scatterplot (by column index):
 53 |     \"Rscript rplot.R file.csv col#1 col#3\"
 54 | Example csv call - hashbar plot:
 55 |     \"Rscript rplot.R file.csv id_field1 id_field2 value_field\"
 56 | Example csv text string call:
 57 |     \"Rscript rplot.R \"$(cat file.csv)\" id_field value_field\"
 58 | For convenience you can set up an alias in .bash_profile or equivalent, e.g.
 59 |     \"rplot() { Rscript /pathto/rplot.R \"$1\" ${*:2}; }\"
 60 | and call with:
 61 |     \"rplot file.csv field1 field2 etc..\"
 62 | 
 63 | OPTIONS:
 64 |   Data handling:
 65 |     -n   Specify no header row for input data. Use col indices instead
 66 |     -s   sep character for input data (default `,`). Requires value e.g. \";\"  \"\t\"  \"\" (inc. quotes)
 67 |     -q   quote character for input data. Requires value e.g. \"'\" or '\"' (inc. outer quotes)
 68 |     -X   Non-numeric characters to remove from numbers (other than \"{space} , $ £ € %\")
 69 |     -a   Aggregate (default `sum`) a hashbar plot data by its categorical variables
 70 |     -m   Aggregate by `mean` if `-a` selected
 71 |     -M   Aggregate by `median` if `-a` selected
 72 |     -l   Aggregate by `length` (count instances) if `-a` selected
 73 |     -b   Histogram bins (default 15) if `-F` selected. Requires following value
 74 |   Plotting:
 75 |     -o   Reorder hashbar chart by value (also reorders data.frames)
 76 |     -H   Override default scatterplot with hashbar plot
 77 |     -S   Override default hashbar plot with scatterplot (NA values are removed)
 78 |     -F   Override default scatter/hash plot with frequency histogram (requires single numeric field)
 79 |     -r   Scatterplot rows/height (default 20). Requires following value
 80 |     -c   Scatterplot cols/width (default 50). Requires following value
 81 |     -d   Quick plot-size tool. Requires argument: 'l'/'s' large/small
 82 |     -A   Fix y/x aspect ratio. Without argument defaults to 1, otherwise value given
 83 |     -p   pch char (defaults: `#` hashbars, `*` scatterplots without overplotting,
 84 |          `. : ■ █` scatterplots with o/p). Requires 1 char eg. `-p \".\"` (eg. with -y)
 85 |          or a 4 char string eg. \".°*@\" to change overplot symbols (inc. quotes)
 86 |     -R   Add r2 correlation (bivariate only))
 87 |     -w   Remove ID col max width constraints in hash plots, and scale to full console width
 88 |     -x   Suppress summary in case of scatterplot
 89 |     -y   Suppress scatterplot point symbols (that show overplotting)
 90 |     -z   Suppress plot (eg. use with -P or -Q)
 91 |   Other:
 92 |     -h   Call this help (also --help)
 93 |     -O   Inspect data.frame before and after numeric parsing and quit
 94 |     -P   Output raw data.frame to console and quit (truncated 1000 rows)
 95 |     -Q   Output processed data.frame to console and quit (truncated 1000 rows)
 96 | ")
 97 |   quit()
 98 | }
 99 | 
100 | # 2 functions from {scales}: included as R's library path sometimes isn't accessible from a console call
101 | zero_range = function (x, tol = 1000 * .Machine$double.eps) {
102 |   if (length(x) == 1) return(TRUE)
103 |   if (length(x) != 2) stop("x must be length 1 or 2")
104 |   if (any(is.na(x))) return(NA)
105 |   if (x[1] == x[2]) return(TRUE)
106 |   if (all(is.infinite(x))) return(FALSE)
107 |   m <- min(abs(x))
108 |   if (m == 0) return(FALSE)
109 |   abs((x[1] - x[2])/m) < tol
110 | }
111 | 
112 | rescale = function (x, to=c(0,1), from, finite=T) {
113 |   if(missing(from)) from = range(x, na.rm=T)
114 |   if(zero_range(from) || zero_range(to)) return(ifelse(is.na(x), NA, mean(to)))
115 |   (x - from[1])/diff(from) * diff(to) + to[1]
116 | }
117 | 
118 | map = function(x, n) floor(rescale(x, to=c(1,n)))
119 | 
120 | # coerce set to numeric if possible, else return FALSE
121 | num = function(n){
122 |   if(class(n) %in% c('integer','numeric','double')) return(n)
123 |   n = gsub(paste(c(' ',',','$','£','€','%',pars$X[2]), collapse='|'), '', n)
124 |   is.num = all(!is.na(suppressWarnings(as.numeric(na.omit(n)))))
125 |   if(is.num) return(as.numeric(n))
126 |   return(F)
127 | }
128 | 
129 | # because formatC can't quite cut it
130 | format_num = function(x) {
131 |   if(length(unique(nchar(x)))==1 & sum(x%%1) == 0) return(x) # year
132 |   f1 = abs(x) >= 10000000
133 |   f2 = abs(x) >= 100 & abs(x) < 10000000
134 |   f3 = abs(x) >= 1 & abs(x) < 100
135 |   f4 = abs(x) >= 0.001 & abs(x) < 1
136 |   f5 = x == 0
137 |   f6 = !f1 & !f2 & !f3 & !f4 & !f5
138 |   out = x
139 |   out[f1] = formatC(x[f1], digits=2, format = "e")
140 |   out[f2] = formatC(round(x[f2], 0), digits=1, big.mark=',', format = "f", drop0trailing=T)
141 |   out[f3] = formatC(round(x[f3], 2), digits=2, big.mark=',', format = "f", drop0trailing=T)
142 |   out[f4] = formatC(x[f4], digits=3, format = "f", drop0trailing=T)
143 |   out[f5] = '0'
144 |   out[f6] = formatC(x[f6], digits=2, format = "e")
145 |   out
146 | }
147 | 
148 | scatter_plot = function(x, y, cols=50, rows=20, pch="*", xlab="x", ylab="Y") {
149 |   y0 = y
150 |   if('-o' %in% plot_args) y = sort(as.numeric(y))
151 |   if('-A' %in% plot_args){
152 |     data_asp = diff(range(y)) / diff(range(x))
153 |     rows = ceiling(cols * data_asp * as.numeric(pars$asp[2]) * 2/5)
154 |   }
155 |   if(xlab == ylab) xlab = "Index"
156 |   if(missing(x)) x <- 1:length(y)
157 |   else x <- as.numeric(x)
158 |   symbs = c('.', ':', '■', '█')
159 |   if(nchar(pch)==4) symbs = strsplit(pch, '')[[1]]
160 |   if(nchar(pch)!=1 & nchar(pch)!=4) stop("pch must be 1 or 4 characters long")
161 | 
162 |   # output processed data.frame to console
163 |   orig_dat = data.frame(x, y, stringsAsFactors=F)
164 |   names(orig_dat) = c(xlab, ylab)
165 |   if('-Q' %in% plot_args) {
166 |     print(head(orig_dat,1000))
167 |     quit()
168 |   }
169 |   if('-z' %in% plot_args) quit()
170 |   
171 |   # rescale to grid and count point overplotting
172 |   if('-d' %in% plot_args){
173 |     if(pars$size[2] == 'l'){
174 |       rows = 40; cols = 100;
175 |       } else if(pars$size[2] == 's'){
176 |         rows = 10; cols = 25;
177 |         } else warning('-d parameter requires either \'l\' or \'s\' input')
178 |   }
179 |   summary = as.data.frame(table(paste(map(x,cols), map(-y,rows))), stringsAsFactors=F)  # summarise
180 |   summary = data.frame(apply(cbind(do.call('rbind', strsplit(summary[[1]], split=' ')), summary$Freq),2,as.numeric)) # parse
181 |   names(summary) = c('x','y','freq')
182 |   op = max(summary$freq)
183 |   pr_labs = F; labs = NULL
184 |   
185 |   if(op > 1 & !'-y' %in% plot_args) {  # overplotting and not manually over-riden   
186 |     if(nchar(pch)==1 & '-p' %in% plot_args) warning('Single character argument for -p is ignored when point overplotting is present except when -y selected.')
187 |     
188 |     # cluster overplots to map to symbols
189 |     summary$grp = summary$freq
190 |     if(length(unique(summary$freq)) > 4) {
191 |       f = summary$grp != 1   # ignore 1s (over-plotting)
192 |       centres = unique(as.numeric(quantile(summary$grp[f], 0:2/2)))
193 |       summary$grp[f] = kmeans(summary$grp[f], centres)$cluster + 1
194 |     } else{
195 |       summary$grp = as.numeric(factor(summary$freq))
196 |     }
197 | 
198 |     # symbol labels
199 |     freqs = sort(unique(summary$freq))
200 |     if(!identical(freqs, c(1,2)) & !identical(freqs, 1)) { # ie. not points representable literally by comb of '.' and ':'
201 |       pr_labs = T
202 |       # data break points
203 |       op_data = unique(summary[ order(summary$freq), 3:4 ])
204 |       op_data_rev = op_data[order(op_data$freq, decreasing=T), ]
205 |       n = length(unique(op_data$grp))
206 |       labs = data.frame(p0 = 1:n, p1 = 1:n, lab = '', x=0, stringsAsFactors=F)
207 |       for(g in unique(op_data$grp)) {
208 |         labs$p0[g] = op_data$freq[match(g, op_data$grp)]          # first match in group
209 |         labs$p1[g] = op_data_rev$freq[match(g, op_data_rev$grp)]  # last match in group
210 |         labs$x[g] = mean(summary$freq[summary$grp == g])          # mean of grp frequencies
211 |       }
212 |       for(i in 1:nrow(labs)) {
213 |         labs$lab[i] = ifelse(labs$p0[i] == labs$p1[i], labs$p0[i], paste0(labs$p0[i], '-', labs$p1[i]))
214 |         lab_mean = ifelse(length(grep('-', labs$lab[i]))>0, paste0('  (x̄ ', format(round(labs$x[i],1),nsmall=1), ')'), '')
215 |         labs$lab[i] = paste0(symbs[i], '  ', labs$lab[i], lab_mean)
216 |       }
217 |       labs = c('Points', labs$lab)
218 |     }
219 |   } else { # only 2 types of point
220 |     symbs = pch   
221 |     summary$grp = 1
222 |   }
223 | 
224 |   # output scatterplot
225 |   l = rep(' ', cols)
226 |   cat(' ', rep('_',cols), ' \n', sep='')
227 |   k = 0
228 |   for(i in 1:rows) {
229 |     dat = subset(summary, y == i)
230 |     ln = l
231 |     ln[dat$x] = symbs[dat$grp]
232 |     cat('|', ln, '| ',  sep='')
233 |     if(pr_labs & i <= length(labs)) cat(' ', labs[i], sep='')  # point symbol key
234 |     k = k + 1
235 |     if(k == ceiling(rows/2)) cat('', ylab)                     # y label
236 |     if(k == rows) if('-R' %in% plot_args & xlab!='Index') cat(' R²=', round(cor(x,y0),3), sep='')
237 |     cat('\n')
238 |   }
239 |   cat('|', rep('_',cols), '|\n', sep='')
240 |   xlab_mar = max(0, 1 + cols/2 - (nchar(xlab)/2))
241 |   cat(rep(' ', xlab_mar), xlab, '\n', sep="")                  # x label
242 | 
243 |   # add summary unless overriden
244 |   if(!'-x' %in% plot_args) {
245 |     cat(nrows, 'data rows plotted')
246 |     if(nrow(d_orig) > nrows) cat('.', nrow(d_orig) - nrows, 'rows with NA values omitted')
247 |     cat('\n')
248 |     print(summary(orig_dat))
249 |   }
250 | }
251 | 
252 | # report d.f. column classes
253 | inspect_df = function (obj) {
254 |     cat('d.f. dimensions: ', dim(obj), '\n')
255 |     try({
256 |         r = NULL
257 |         for (i in 1:ncol(obj)) {
258 |             r = c(r, class(obj[[i]]))
259 |         }
260 |         names(r) = names(obj)
261 |         print(r)
262 |     }, silent = FALSE)
263 |     print(head(obj, 3))
264 | }
265 | 
266 | # read in data
267 | cons_width = as.integer(system('tput cols', intern=T)) - 10
268 | cons_width = ifelse("-w" %in% plot_args, cons_width, min(100, cons_width))
269 | txt = field_args[1]
270 | rows = length(strsplit(txt, split='\n')[[1]])
271 | 
272 | # data from text blob argument or csv file
273 | if('-n' %in% plot_args) header = F else header = T
274 | if(rows == 1) d = read.table(txt, sep=pars$sep[2], stringsAsFactors=F, header=header, row.names=NULL, quote=pars$quote[2])
275 | if(rows > 1) d = read.table(text=txt, header=header, sep=pars$sep[2], stringsAsFactors=F, quote=pars$quote[2])
276 | 
277 | if("-O" %in% plot_args){
278 |   cat('\nRaw data as read-in:\n')
279 |   inspect_df(d)
280 | }
281 | 
282 | # parse numerics
283 | for(i in 1:ncol(d)){
284 |   nums = num( d[,i] )                    # check if numeric/coercible
285 |   if(is.numeric(nums[1])) d[,i] = nums
286 | }
287 | d_orig = d # backup as is
288 | 
289 | if("-O" %in% plot_args){
290 |   cat('\nNumerically parsed data:\n')
291 |   inspect_df(d)
292 |   cat('\n')
293 |   quit()
294 | }
295 | 
296 | # output data.frame to console
297 | if('-P' %in% plot_args){
298 |   print(head(d, 1000))
299 |   quit()
300 | }
301 | 
302 | field_names = field_args[2:(length(field_args))]
303 | 
304 | # interpret field names - check if valid as name or column index
305 | for(i in length(field_names):1) {
306 |   f = field_names[i]
307 |   badfield = F
308 |   match = pmatch(f, names(d))
309 |   if(is.na(match)){                        # not a valid col name
310 |     if(is.numeric(num(f))){                # is possible number
311 |       f = as.numeric(f)
312 |       if(f <= ncol(d)){                    # is within col index range
313 |         field_names[i] = names(d)[f]       # change to col name
314 |       } else badfield = T
315 |     } else badfield = T
316 |   } else field_names[i] = names(d)[match]  # in case partial match
317 |   if(badfield) {
318 |     warning(paste('fieldname not valid name or column index:', f))
319 |     quit()
320 |     field_names = field_names[-i]
321 |   }
322 | }
323 | 
324 | id_fields = field_names[1:(length(field_names)-1)]
325 | values_field = field_names[length(field_names)]
326 | 
327 | # omit rows with NA in plotting columns
328 | d = na.omit(d[,c(id_fields, values_field), drop=F])
329 | nrows = nrow(d) # to calc NA removals
330 | 
331 | # scatterplot if 2 fully numeric/NA variables or manually specified
332 | if(!'-F' %in% plot_args & length(id_fields) == 1) {
333 |   v = d[[id_fields]]
334 |   numvals = suppressWarnings(as.numeric( v[!is.na(v)] ))
335 |   all_numeric = all(!is.na(numvals))
336 |   plot_scatter = F
337 |   if(all_numeric & !'-H' %in% plot_args) plot_scatter = T
338 |   if(!all_numeric & '-S' %in% plot_args) plot_scatter = T
339 |   if(plot_scatter) {
340 |     if(values_field == id_fields) {    # ie. only a single field supplied
341 |       dat = na.omit(data.frame(x = 1:length(v), y = d[[values_field]], stringsAsFactors=F))
342 |     } else dat = na.omit(data.frame(x = suppressWarnings(as.numeric(v)), y = d[[values_field]], stringsAsFactors=F))
343 |     scatter_plot(dat$x, dat$y, cols=as.numeric(pars$x[2]), rows=as.numeric(pars$y[2]), pch=pars$pch[2], xlab=id_fields, ylab=values_field)
344 |     quit()
345 |   }
346 | }
347 | 
348 | # aggregate
349 | if('-a' %in% plot_args) {
350 |   fun = 'sum'
351 |   if('-m' %in% plot_args) fun = 'mean'
352 |   if('-M' %in% plot_args) fun = 'median'
353 |   if('-l' %in% plot_args) fun = 'length'
354 |   cat('Aggregate function is', fun, '\n')
355 |   if(length(id_fields) > 1) agg_list = as.list(d[,id_fields]) else agg_list = list(d[,id_fields])
356 |   if(fun == 'length') {
357 |     d = aggregate(rep(1,nrow(d)), by=agg_list, FUN=sum, na.rm=T, simplify=T)
358 |   } else d = aggregate(d[[values_field]], by=agg_list, FUN=fun, na.rm=T, simplify=T)
359 |   if(length(unique(c(id_fields, values_field))) == 1) {
360 |     values_field = fun # ie. 'length'
361 |     d[[id_fields]] = 1:nrow(d)
362 |   }
363 | }
364 | 
365 | # rename fields if they've changed
366 | names(d) = c(id_fields, values_field)
367 | 
368 | # reorder data hashbars
369 | if('-o' %in% plot_args) d = d[order(d[[values_field]], decreasing=T),]
370 | 
371 | # output processed data.frame to console
372 | if('-Q' %in% plot_args){
373 |   print(head(d,1000))
374 |   quit()
375 | }
376 | 
377 | # histogram for single numeric variable
378 | if('-F' %in% plot_args){
379 |   ran = range(d[[values_field]])
380 |   brks = seq(ran[1], ran[2], length.out = as.numeric(pars$bins[2]))
381 |   cuts = cut(d[[values_field]], brks, include.lowest=T)
382 |   cats = as.numeric(cuts)
383 |   labs = levels(cuts)
384 |   grps = as.data.frame(table(cats), stringsAsFactors=F)
385 |   grps$cats = as.numeric(grps$cats)
386 |   fullset = min(grps$cats):max(grps$cats)
387 |   missing = fullset[!fullset %in% grps$cats]
388 |   for(m in missing) grps = rbind(grps, data.frame(cats=m, Freq=0))
389 |   grps = grps[order(grps$cats),]
390 |   for(i in c('\\[', '\\]', '\\(')) labs = gsub(i, '', labs)
391 |   grps$means = as.numeric(lapply(sapply(labs, strsplit, split=','), function(i) mean(as.numeric(i))))
392 |   pretty_labs = pretty(grps$means, 4)
393 |   pretty_labs = pretty_labs[pretty_labs >= ran[1] & pretty_labs <= ran[2]]
394 |   ids = sapply(pretty_labs, function(x){ which(abs(grps$means-x)==min(abs(grps$means-x)))[1]} ) # closest group to assign label
395 |   grps$lab = ''
396 |   grps$lab[ids] = paste('--', pretty_labs, '--')
397 |   d = grps[,c(4,2)]
398 |   id_fields = values_field; values_field = 'frequency'
399 |   names(d) = c(id_fields, values_field)
400 | }
401 | 
402 | # calculate column widths
403 | field_data = list()
404 | pos_x = 1
405 | for(f in c(id_fields, values_field)) {
406 |   n = length(field_data) + 1
407 |   vals = d[[f]]
408 |   numerics = !is.na(suppressWarnings(as.numeric(vals)))
409 |   vals[numerics] = format_num(as.numeric(vals[numerics]))
410 |   maxlen = max(nchar(f), nchar(vals))
411 |   maxlen = ifelse("-w" %in% plot_args, maxlen, min(maxlen, 30))
412 |   vals = substr(vals, 1, maxlen)
413 |   padstr = paste0("%-", maxlen, "s")
414 |   field_data[n] = list(list(name = sprintf(padstr, substr(f, 1, maxlen)), values = sprintf(padstr, vals),
415 |                             pos_start = pos_x, pos_end = pos_x + maxlen + 2))
416 |   pos_x = pos_x + maxlen + 3
417 |   char_deficit = maxlen - nchar(field_data[[n]]$values)  # fix for sprintf bug that ignores special characters when padding
418 |   field_data[[n]]$values = paste0(field_data[[n]]$values, sapply(char_deficit, function(i) paste(rep(' ',i), collapse='')))
419 | }
420 | 
421 | plot_width = cons_width - field_data[[length(field_data)]]$pos_end
422 | values = d[[values_field]]
423 | 
424 | # scale and spacing
425 | plot_ind = field_data[[ length(field_data) ]]$pos_end + 2
426 | 
427 | # whether scale to zero or positive/negative extreme
428 | if(min(values) < 0 & max(values) > 0) {
429 |   ran = range(values)         # scale positive to negative
430 | } else if(min(values) >= 0) {
431 |   ran = c(0, max(values))     # all positive, scale to zero
432 | } else{
433 |   ran = c(min(values), 0)     # still plot hashbars from left axis
434 | }
435 | 
436 | fact = (cons_width - plot_ind) / diff(ran)
437 | #plot_values = as.integer(rescale(values, to = fact * ran)) # old
438 | plot_values = as.integer(values * fact)
439 | minvalue = min(values)
440 | spaces = pmax(0, pmin(plot_values - fact * minvalue, fact * -minvalue))
441 | hashes = fact * abs(values)
442 | 
443 | # print hashbar plot
444 | cat(nrows, 'data rows plotted')
445 | if('-z' %in% plot_args) quit()
446 | if('-p' %in% plot_args) pch = pars$pch[2] else pch = '#'
447 | if(nrow(d_orig) > nrows) cat('.', nrow(d_orig) - nrows, 'rows with NA values omitted')
448 | cat('\n'); for(f in field_data) cat(f$name, '  '); cat('\n')
449 | 
450 | for(i in 1:length(values)) {
451 |   # id_fields
452 |   for(f in field_data) cat(f$values[i], '  ')
453 |   # hashes
454 |   cat(rep(' ', spaces[i]), sep='')
455 |   cat(rep(pch, hashes[i]), '\n', sep='')
456 | }
457 | 


--------------------------------------------------------------------------------