├── README.md └── src └── main └── gram.y /README.md: -------------------------------------------------------------------------------- 1 | # R with (x) => x + 1 2 | 3 | Modified R grammar file that adds anonymous functions like this: 4 | 5 | ```r 6 | f <- (x) => x + 1 7 | f(2) 8 | # 3 9 | ``` 10 | 11 | ## Install 12 | 13 | 1. Define the R version: 14 | 15 | ``` 16 | export R_VERSION=3.6.3 17 | ``` 18 | 19 | This is the R version I based the modifications on, so it might not work with other versions. 20 | 2. Follow the 'Download and extract R' instructions from https://docs.rstudio.com/resources/install-r-source/ 21 | 3. Replace the downloaded version of `src/main/gram.y` with `src/main/gram.y` from this repo 22 | 4. Generate a new parser file: 23 | 24 | ``` 25 | bison -d src/main/gram.y -o src/main/gram.c 26 | ``` 27 | 28 | You should see this output: 29 | 30 | ``` 31 | src/main/gram.y: warning: 84 shift/reduce conflicts [-Wconflicts-sr] 32 | ``` 33 | 5. Follow the 'Build and install R' instructions from https://docs.rstudio.com/resources/install-r-source/ 34 | 35 | When I did this, I added a couple of lines to the `./configure` command: 36 | 37 | ``` 38 | ./configure \ 39 | --prefix=/opt/R/${R_VERSION} \ 40 | --enable-memory-profiling \ 41 | --enable-R-shlib \ 42 | --with-blas \ 43 | --with-lapack \ 44 | --with-pcre1 \ 45 | --without-recommended-packages 46 | ``` 47 | 48 | ## Usage 49 | 50 | Run: 51 | 52 | ``` 53 | /opt/R/${R_VERSION}/bin/R 54 | ``` 55 | 56 | Play: 57 | 58 | ```r 59 | f <- (x) => x + 1 60 | g <- (x, y) => x + y 61 | h <- (x = 1) => x + 1 62 | i <- () => 1 63 | f(2) 64 | # 3 65 | g(2, 3) 66 | # 5 67 | h() 68 | # 2 69 | i() 70 | # 1 71 | ``` 72 | 73 | Check that things haven't broken too much: 74 | 75 | ```r 76 | # Normal functions 77 | j <- function(x) x + 1 78 | j(2) 79 | # 3 80 | # Normal expressions in parentheses 81 | (1) 82 | # 1 83 | # Normal assignment in parentheses 84 | (y = 5) 85 | y 86 | # 5 87 | ``` 88 | 89 | ## Issues 90 | 91 | The R grammar is complex and there's a fair chance that these modifications have broken something. Feel free to raise an issue if you find any errors. 92 | -------------------------------------------------------------------------------- /src/main/gram.y: -------------------------------------------------------------------------------- 1 | %{ 2 | /* 3 | * R : A Computer Language for Statistical Data Analysis 4 | * Copyright (C) 1995, 1996, 1997 Robert Gentleman and Ross Ihaka 5 | * Copyright (C) 1997--2019 The R Core Team 6 | * Copyright (C) 2009--2011 Romain Francois 7 | * 8 | * This program is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation; either version 2 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * This program is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program; if not, a copy is available at 20 | * https://www.R-project.org/Licenses/ 21 | */ 22 | 23 | #ifdef HAVE_CONFIG_H 24 | #include 25 | #endif 26 | 27 | #define R_USE_SIGNALS 1 28 | #include "IOStuff.h" /*-> Defn.h */ 29 | #include "Fileio.h" 30 | #include "Parse.h" 31 | #include 32 | 33 | #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__FreeBSD__)) 34 | /* This may not be 100% true (see the comment in rlocale.h), 35 | but it seems true in normal locales */ 36 | # define __STDC_ISO_10646__ 37 | #endif 38 | 39 | /* #define YYDEBUG 1 */ 40 | #define YYERROR_VERBOSE 1 41 | #define PARSE_ERROR_SIZE 256 /* Parse error messages saved here */ 42 | #define PARSE_CONTEXT_SIZE 256 /* Recent parse context kept in a circular buffer */ 43 | 44 | static Rboolean busy = FALSE; 45 | static SEXP R_NullSymbol = NULL; 46 | 47 | static int identifier ; 48 | static void incrementId(void); 49 | static void initData(void); 50 | static void initId(void); 51 | static void record_( int, int, int, int, int, int, char* ) ; 52 | 53 | static void yyerror(const char *); 54 | static int yylex(); 55 | int yyparse(void); 56 | 57 | static FILE *fp_parse; 58 | static int (*ptr_getc)(void); 59 | 60 | static int SavedToken; 61 | static SEXP SavedLval; 62 | 63 | #define yyconst const 64 | 65 | typedef struct yyltype 66 | { 67 | int first_line; 68 | int first_column; 69 | int first_byte; 70 | 71 | int last_line; 72 | int last_column; 73 | int last_byte; 74 | 75 | int first_parsed; 76 | int last_parsed; 77 | 78 | int id; 79 | } yyltype; 80 | 81 | 82 | #define INIT_DATA_COUNT 16384 /* init parser data to this size */ 83 | #define MAX_DATA_COUNT 65536 /* release it at the end if it is this size or larger*/ 84 | 85 | #define DATA_COUNT (length( PS_DATA ) / DATA_ROWS) 86 | #define ID_COUNT ((length( PS_IDS ) / 2) - 1) 87 | 88 | static void finalizeData( ) ; 89 | static void growData( ) ; 90 | static void growID( int ) ; 91 | 92 | #define DATA_ROWS 8 93 | 94 | #define _FIRST_PARSED( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) ] 95 | #define _FIRST_COLUMN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 1 ] 96 | #define _LAST_PARSED( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 2 ] 97 | #define _LAST_COLUMN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 3 ] 98 | #define _TERMINAL( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 4 ] 99 | #define _TOKEN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 5 ] 100 | #define _ID( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 6 ] 101 | #define _PARENT(i) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 7 ] 102 | 103 | #define ID_ID( i ) INTEGER(PS_IDS)[ 2*(i) ] 104 | #define ID_PARENT( i ) INTEGER(PS_IDS)[ 2*(i) + 1 ] 105 | 106 | static void modif_token( yyltype*, int ) ; 107 | static void recordParents( int, yyltype*, int) ; 108 | 109 | static int _current_token ; 110 | 111 | /** 112 | * Records an expression (non terminal symbol 'expr') and gives it an id 113 | * 114 | * @param expr expression we want to record and flag with the next id 115 | * @param loc the location of the expression 116 | */ 117 | static void setId( SEXP expr, yyltype loc){ 118 | record_( 119 | (loc).first_parsed, (loc).first_column, (loc).last_parsed, (loc).last_column, 120 | _current_token, (loc).id, 0 ) ; 121 | } 122 | 123 | # define YYLTYPE yyltype 124 | # define YYLLOC_DEFAULT(Current, Rhs, N) \ 125 | do { \ 126 | if (N){ \ 127 | (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ 128 | (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ 129 | (Current).first_byte = YYRHSLOC (Rhs, 1).first_byte; \ 130 | (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ 131 | (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ 132 | (Current).last_byte = YYRHSLOC (Rhs, N).last_byte; \ 133 | (Current).first_parsed = YYRHSLOC (Rhs, 1).first_parsed; \ 134 | (Current).last_parsed = YYRHSLOC (Rhs, N).last_parsed; \ 135 | incrementId( ) ; \ 136 | (Current).id = identifier ; \ 137 | _current_token = yyr1[yyn] ; \ 138 | if (ParseState.keepSrcRefs && ParseState.keepParseData) { \ 139 | yyltype childs[N]; \ 140 | int ii = 0; \ 141 | for(ii=0; ii 276 | #ifdef HAVE_LANGINFO_CODESET 277 | # include 278 | #endif 279 | 280 | static int mbcs_get_next(int c, wchar_t *wc) 281 | { 282 | int i, res, clen = 1; char s[9]; 283 | mbstate_t mb_st; 284 | 285 | s[0] = (char) c; 286 | /* This assumes (probably OK) that all MBCS embed ASCII as single-byte 287 | lead bytes, including control chars */ 288 | if((unsigned int) c < 0x80) { 289 | *wc = (wchar_t) c; 290 | return 1; 291 | } 292 | if(utf8locale) { 293 | clen = utf8clen((char) c); 294 | for(i = 1; i < clen; i++) { 295 | c = xxgetc(); 296 | if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); 297 | s[i] = (char) c; 298 | } 299 | s[clen] ='\0'; /* x86 Solaris requires this */ 300 | res = (int) mbrtowc(wc, s, clen, NULL); 301 | if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); 302 | } else { 303 | /* This is not necessarily correct for stateful MBCS */ 304 | while(clen <= MB_CUR_MAX) { 305 | mbs_init(&mb_st); 306 | res = (int) mbrtowc(wc, s, clen, &mb_st); 307 | if(res >= 0) break; 308 | if(res == -1) 309 | error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); 310 | /* so res == -2 */ 311 | c = xxgetc(); 312 | if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); 313 | s[clen++] = (char) c; 314 | } /* we've tried enough, so must be complete or invalid by now */ 315 | } 316 | for(i = clen - 1; i > 0; i--) xxungetc(s[i]); 317 | return clen; 318 | } 319 | 320 | /* Soon to be defunct entry points */ 321 | 322 | void R_SetInput(int); 323 | int R_fgetc(FILE*); 324 | 325 | /* Routines used to build the parse tree */ 326 | 327 | static SEXP xxnullformal(void); 328 | static SEXP xxfirstformal0(SEXP); 329 | static SEXP xxfirstformal1(SEXP, SEXP); 330 | static SEXP xxaddformal0(SEXP, SEXP, YYLTYPE *); 331 | static SEXP xxaddformal1(SEXP, SEXP, SEXP, YYLTYPE *); 332 | static SEXP xxexprlist0(); 333 | static SEXP xxexprlist1(SEXP, YYLTYPE *); 334 | static SEXP xxexprlist2(SEXP, SEXP, YYLTYPE *); 335 | static SEXP xxsub0(void); 336 | static SEXP xxsub1(SEXP, YYLTYPE *); 337 | static SEXP xxsymsub0(SEXP, YYLTYPE *); 338 | static SEXP xxsymsub1(SEXP, SEXP, YYLTYPE *); 339 | static SEXP xxnullsub0(YYLTYPE *); 340 | static SEXP xxnullsub1(SEXP, YYLTYPE *); 341 | static SEXP xxsublist1(SEXP); 342 | static SEXP xxsublist2(SEXP, SEXP); 343 | static SEXP xxcond(SEXP); 344 | static SEXP xxifcond(SEXP); 345 | static SEXP xxif(SEXP, SEXP, SEXP); 346 | static SEXP xxifelse(SEXP, SEXP, SEXP, SEXP); 347 | static SEXP xxforcond(SEXP, SEXP); 348 | static SEXP xxfor(SEXP, SEXP, SEXP); 349 | static SEXP xxwhile(SEXP, SEXP, SEXP); 350 | static SEXP xxrepeat(SEXP, SEXP); 351 | static SEXP xxnxtbrk(SEXP); 352 | static SEXP xxfuncall(SEXP, SEXP); 353 | static SEXP xxdefun(SEXP, SEXP, SEXP, YYLTYPE *); 354 | static SEXP xxdefunanon(SEXP, SEXP, YYLTYPE *); 355 | static SEXP xxunary(SEXP, SEXP); 356 | static SEXP xxbinary(SEXP, SEXP, SEXP); 357 | static SEXP xxparen(SEXP, SEXP); 358 | static SEXP xxsubscript(SEXP, SEXP, SEXP); 359 | static SEXP xxexprlist(SEXP, YYLTYPE *, SEXP); 360 | static int xxvalue(SEXP, int, YYLTYPE *); 361 | 362 | #define YYSTYPE SEXP 363 | 364 | %} 365 | 366 | %token-table 367 | 368 | %token END_OF_INPUT ERROR 369 | %token STR_CONST NUM_CONST NULL_CONST SYMBOL FUNCTION ANON 370 | %token INCOMPLETE_STRING 371 | %token LEFT_ASSIGN EQ_ASSIGN RIGHT_ASSIGN LBB 372 | %token FOR IN IF ELSE WHILE NEXT BREAK REPEAT 373 | %token GT GE LT LE EQ NE AND OR AND2 OR2 374 | %token NS_GET NS_GET_INT 375 | %token COMMENT LINE_DIRECTIVE 376 | %token SYMBOL_FORMALS 377 | %token EQ_FORMALS 378 | %token EQ_SUB SYMBOL_SUB 379 | %token SYMBOL_FUNCTION_CALL 380 | %token SYMBOL_PACKAGE 381 | /* no longer used: %token COLON_ASSIGN */ 382 | %token SLOT 383 | 384 | /* This is the precedence table, low to high */ 385 | %left '?' 386 | %left LOW WHILE FOR REPEAT 387 | %nonassoc ANON 388 | %right IF 389 | %left ELSE 390 | %right LEFT_ASSIGN 391 | %right EQ_ASSIGN 392 | %left RIGHT_ASSIGN 393 | %left '~' TILDE 394 | %left OR OR2 395 | %left AND AND2 396 | %left UNOT NOT 397 | %nonassoc GT GE LT LE EQ NE 398 | %left '+' '-' 399 | %left '*' '/' 400 | %left SPECIAL 401 | %left ':' 402 | %left UMINUS UPLUS 403 | %right '^' 404 | %left '$' '@' 405 | %left NS_GET NS_GET_INT 406 | %nonassoc '(' '[' LBB 407 | 408 | %% 409 | 410 | prog : END_OF_INPUT { YYACCEPT; } 411 | | '\n' { yyresult = xxvalue(NULL,2,NULL); goto yyreturn; } 412 | | expr_or_assign '\n' { yyresult = xxvalue($1,3,&@1); goto yyreturn; } 413 | | expr_or_assign ';' { yyresult = xxvalue($1,4,&@1); goto yyreturn; } 414 | | error { YYABORT; } 415 | ; 416 | 417 | expr_or_assign : expr { $$ = $1; } 418 | | equal_assign { $$ = $1; } 419 | ; 420 | 421 | expr_not_sym_or_assign : expr_not_sym { $$ = $1; } 422 | | equal_assign { $$ = $1; } 423 | ; 424 | 425 | equal_assign : expr EQ_ASSIGN expr_or_assign { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 426 | ; 427 | 428 | expr : expr_not_sym { $$ = $1; } 429 | | SYMBOL { $$ = $1; } 430 | ; 431 | 432 | expr_not_sym : NUM_CONST { $$ = $1; setId( $$, @$); } 433 | | STR_CONST { $$ = $1; setId( $$, @$); } 434 | | NULL_CONST { $$ = $1; setId( $$, @$); } 435 | | '(' SYMBOL ')' ANON expr_or_assign 436 | { $$ = xxfirstformal0($2); modif_token( &@2, SYMBOL_FORMALS ) ; 437 | $$ = xxdefunanon($$,$5,&@$); setId( $$, @$); } 438 | | '(' SYMBOL EQ_ASSIGN expr ')' ANON expr_or_assign 439 | { $$ = xxfirstformal1($2,$4); modif_token( &@2, SYMBOL_FORMALS ) ; modif_token( &@3, EQ_FORMALS ) ; 440 | $$ = xxdefunanon($$,$7,&@$); setId( $$, @$); } 441 | | '(' formlist ')' ANON expr_or_assign 442 | { $$ = xxdefunanon($2,$5,&@$); setId( $$, @$); } 443 | | '{' exprlist '}' { $$ = xxexprlist($1,&@1,$2); setId( $$, @$); } 444 | | '(' SYMBOL ')' { $$ = xxparen($1,$2); setId( $$, @$); } 445 | | '(' SYMBOL EQ_ASSIGN expr ')' { $$ = xxbinary($3,$2,$4); 446 | $$ = xxparen($1,$$); setId( $$, @$); } 447 | | '(' expr_not_sym_or_assign ')' { $$ = xxparen($1,$2); setId( $$, @$); } 448 | 449 | | '-' expr %prec UMINUS { $$ = xxunary($1,$2); setId( $$, @$); } 450 | | '+' expr %prec UMINUS { $$ = xxunary($1,$2); setId( $$, @$); } 451 | | '!' expr %prec UNOT { $$ = xxunary($1,$2); setId( $$, @$); } 452 | | '~' expr %prec TILDE { $$ = xxunary($1,$2); setId( $$, @$); } 453 | | '?' expr { $$ = xxunary($1,$2); setId( $$, @$); } 454 | 455 | | expr ':' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 456 | | expr '+' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 457 | | expr '-' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 458 | | expr '*' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 459 | | expr '/' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 460 | | expr '^' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 461 | | expr SPECIAL expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 462 | | expr '%' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 463 | | expr '~' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 464 | | expr '?' expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 465 | | expr LT expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 466 | | expr LE expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 467 | | expr EQ expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 468 | | expr NE expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 469 | | expr GE expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 470 | | expr GT expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 471 | | expr AND expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 472 | | expr OR expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 473 | | expr AND2 expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 474 | | expr OR2 expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 475 | 476 | | expr LEFT_ASSIGN expr { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 477 | | expr RIGHT_ASSIGN expr { $$ = xxbinary($2,$3,$1); setId( $$, @$); } 478 | | FUNCTION '(' formlist ')' cr expr_or_assign %prec LOW 479 | { $$ = xxdefun($1,$3,$6,&@$); setId( $$, @$); } 480 | | expr '(' sublist ')' { $$ = xxfuncall($1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_FUNCTION_CALL ) ; } 481 | | IF ifcond expr_or_assign { $$ = xxif($1,$2,$3); setId( $$, @$); } 482 | | IF ifcond expr_or_assign ELSE expr_or_assign { $$ = xxifelse($1,$2,$3,$5); setId( $$, @$); } 483 | | FOR forcond expr_or_assign %prec FOR { $$ = xxfor($1,$2,$3); setId( $$, @$); } 484 | | WHILE cond expr_or_assign { $$ = xxwhile($1,$2,$3); setId( $$, @$); } 485 | | REPEAT expr_or_assign { $$ = xxrepeat($1,$2); setId( $$, @$); } 486 | | expr LBB sublist ']' ']' { $$ = xxsubscript($1,$2,$3); setId( $$, @$); } 487 | | expr '[' sublist ']' { $$ = xxsubscript($1,$2,$3); setId( $$, @$); } 488 | | SYMBOL NS_GET SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ; } 489 | | SYMBOL NS_GET STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ; } 490 | | STR_CONST NS_GET SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 491 | | STR_CONST NS_GET STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 492 | | SYMBOL NS_GET_INT SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ;} 493 | | SYMBOL NS_GET_INT STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ;} 494 | | STR_CONST NS_GET_INT SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 495 | | STR_CONST NS_GET_INT STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 496 | | expr '$' SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 497 | | expr '$' STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 498 | | expr '@' SYMBOL { $$ = xxbinary($2,$1,$3); setId( $$, @$); modif_token( &@3, SLOT ) ; } 499 | | expr '@' STR_CONST { $$ = xxbinary($2,$1,$3); setId( $$, @$); } 500 | | NEXT { $$ = xxnxtbrk($1); setId( $$, @$); } 501 | | BREAK { $$ = xxnxtbrk($1); setId( $$, @$); } 502 | ; 503 | 504 | 505 | cond : '(' expr ')' { $$ = xxcond($2); } 506 | ; 507 | 508 | ifcond : '(' expr ')' { $$ = xxifcond($2); } 509 | ; 510 | 511 | forcond : '(' SYMBOL IN expr ')' { $$ = xxforcond($2,$4); setId( $$, @$); } 512 | ; 513 | 514 | 515 | exprlist: { $$ = xxexprlist0(); setId( $$, @$); } 516 | | expr_or_assign { $$ = xxexprlist1($1, &@1); } 517 | | exprlist ';' expr_or_assign { $$ = xxexprlist2($1, $3, &@3); } 518 | | exprlist ';' { $$ = $1; setId( $$, @$); } 519 | | exprlist '\n' expr_or_assign { $$ = xxexprlist2($1, $3, &@3); } 520 | | exprlist '\n' { $$ = $1;} 521 | ; 522 | 523 | sublist : sub { $$ = xxsublist1($1); } 524 | | sublist cr ',' sub { $$ = xxsublist2($1,$4); } 525 | ; 526 | 527 | sub : { $$ = xxsub0(); } 528 | | expr { $$ = xxsub1($1, &@1); } 529 | | SYMBOL EQ_ASSIGN { $$ = xxsymsub0($1, &@1); modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; } 530 | | SYMBOL EQ_ASSIGN expr { $$ = xxsymsub1($1,$3, &@1); modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; } 531 | | STR_CONST EQ_ASSIGN { $$ = xxsymsub0($1, &@1); modif_token( &@2, EQ_SUB ) ; } 532 | | STR_CONST EQ_ASSIGN expr { $$ = xxsymsub1($1,$3, &@1); modif_token( &@2, EQ_SUB ) ; } 533 | | NULL_CONST EQ_ASSIGN { $$ = xxnullsub0(&@1); modif_token( &@2, EQ_SUB ) ; } 534 | | NULL_CONST EQ_ASSIGN expr { $$ = xxnullsub1($3, &@1); modif_token( &@2, EQ_SUB ) ; } 535 | ; 536 | 537 | formlist: { $$ = xxnullformal(); } 538 | | SYMBOL { $$ = xxfirstformal0($1); modif_token( &@1, SYMBOL_FORMALS ) ; } 539 | | SYMBOL EQ_ASSIGN expr { $$ = xxfirstformal1($1,$3); modif_token( &@1, SYMBOL_FORMALS ) ; modif_token( &@2, EQ_FORMALS ) ; } 540 | | formlist ',' SYMBOL { $$ = xxaddformal0($1,$3, &@3); modif_token( &@3, SYMBOL_FORMALS ) ; } 541 | | formlist ',' SYMBOL EQ_ASSIGN expr 542 | { $$ = xxaddformal1($1,$3,$5,&@3); modif_token( &@3, SYMBOL_FORMALS ) ; modif_token( &@4, EQ_FORMALS ) ;} 543 | ; 544 | 545 | cr : { EatLines = 1; } 546 | ; 547 | %% 548 | 549 | 550 | /*----------------------------------------------------------------------------*/ 551 | 552 | static int (*ptr_getc)(void); 553 | 554 | /* Private pushback, since file ungetc only guarantees one byte. 555 | We need up to one MBCS-worth */ 556 | #define DECLARE_YYTEXT_BUFP(bp) char *bp = yytext ; 557 | #define YYTEXT_PUSH(c, bp) do { \ 558 | if ((bp) - yytext >= sizeof(yytext) - 1){ \ 559 | error(_("input buffer overflow at line %d"), ParseState.xxlineno); \ 560 | } \ 561 | *(bp)++ = ((char)c); \ 562 | } while(0) ; 563 | 564 | #define PUSHBACK_BUFSIZE 16 565 | static int pushback[PUSHBACK_BUFSIZE]; 566 | static unsigned int npush = 0; 567 | 568 | static int prevpos = 0; 569 | static int prevlines[PUSHBACK_BUFSIZE]; 570 | static int prevcols[PUSHBACK_BUFSIZE]; 571 | static int prevbytes[PUSHBACK_BUFSIZE]; 572 | static int prevparse[PUSHBACK_BUFSIZE]; 573 | 574 | static int xxgetc(void) 575 | { 576 | int c; 577 | 578 | if(npush) c = pushback[--npush]; else c = ptr_getc(); 579 | 580 | prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE; 581 | prevbytes[prevpos] = ParseState.xxbyteno; 582 | prevlines[prevpos] = ParseState.xxlineno; 583 | prevparse[prevpos] = ParseState.xxparseno; 584 | prevcols[prevpos] = ParseState.xxcolno; 585 | 586 | if (c == EOF) { 587 | EndOfFile = 1; 588 | return R_EOF; 589 | } 590 | R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE; 591 | R_ParseContext[R_ParseContextLast] = (char) c; 592 | 593 | if (c == '\n') { 594 | ParseState.xxlineno += 1; 595 | ParseState.xxcolno = 0; 596 | ParseState.xxbyteno = 0; 597 | ParseState.xxparseno += 1; 598 | } else { 599 | /* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */ 600 | if (!known_to_be_utf8 || (unsigned char)c < 0x80 || 0xC0 <= (unsigned char)c) 601 | ParseState.xxcolno++; 602 | ParseState.xxbyteno++; 603 | } 604 | 605 | if (c == '\t') ParseState.xxcolno = ((ParseState.xxcolno + 7) & ~7); 606 | 607 | R_ParseContextLine = ParseState.xxlineno; 608 | 609 | xxcharcount++; 610 | return c; 611 | } 612 | 613 | static int xxungetc(int c) 614 | { 615 | /* this assumes that c was the result of xxgetc; if not, some edits will be needed */ 616 | ParseState.xxlineno = prevlines[prevpos]; 617 | ParseState.xxbyteno = prevbytes[prevpos]; 618 | ParseState.xxcolno = prevcols[prevpos]; 619 | ParseState.xxparseno = prevparse[prevpos]; 620 | 621 | prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE; 622 | 623 | R_ParseContextLine = ParseState.xxlineno; 624 | 625 | xxcharcount--; 626 | R_ParseContext[R_ParseContextLast] = '\0'; 627 | /* precaution as to how % is implemented for < 0 numbers */ 628 | R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE -1) % PARSE_CONTEXT_SIZE; 629 | if(npush >= PUSHBACK_BUFSIZE) return EOF; 630 | pushback[npush++] = c; 631 | return c; 632 | } 633 | 634 | /* 635 | * Increments/inits the token/grouping counter 636 | */ 637 | static void incrementId(void){ 638 | identifier++; 639 | } 640 | 641 | static void initId(void){ 642 | identifier = 0 ; 643 | } 644 | 645 | static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile) 646 | { 647 | SEXP val; 648 | 649 | PROTECT(val = allocVector(INTSXP, 8)); 650 | INTEGER(val)[0] = lloc->first_line; 651 | INTEGER(val)[1] = lloc->first_byte; 652 | INTEGER(val)[2] = lloc->last_line; 653 | INTEGER(val)[3] = lloc->last_byte; 654 | INTEGER(val)[4] = lloc->first_column; 655 | INTEGER(val)[5] = lloc->last_column; 656 | INTEGER(val)[6] = lloc->first_parsed; 657 | INTEGER(val)[7] = lloc->last_parsed; 658 | setAttrib(val, R_SrcfileSymbol, srcfile); 659 | setAttrib(val, R_ClassSymbol, mkString("srcref")); 660 | UNPROTECT(1); /* val */ 661 | return val; 662 | } 663 | 664 | static void attachSrcrefs(SEXP val) 665 | { 666 | SEXP srval; 667 | 668 | PROTECT(srval = SrcRefsToVectorList()); 669 | 670 | setAttrib(val, R_SrcrefSymbol, srval); 671 | setAttrib(val, R_SrcfileSymbol, PS_SRCFILE); 672 | { 673 | YYLTYPE wholeFile; 674 | wholeFile.first_line = 1; 675 | wholeFile.first_byte = 0; 676 | wholeFile.first_column = 0; 677 | wholeFile.last_line = ParseState.xxlineno; 678 | wholeFile.last_byte = ParseState.xxbyteno; 679 | wholeFile.last_column = ParseState.xxcolno; 680 | wholeFile.first_parsed = 1; 681 | wholeFile.last_parsed = ParseState.xxparseno; 682 | setAttrib(val, R_WholeSrcrefSymbol, makeSrcref(&wholeFile, PS_SRCFILE)); 683 | } 684 | PS_SET_SRCREFS(R_NilValue); 685 | ParseState.didAttach = TRUE; 686 | UNPROTECT(1); /* srval */ 687 | } 688 | 689 | static int xxvalue(SEXP v, int k, YYLTYPE *lloc) 690 | { 691 | if (k > 2) { 692 | if (ParseState.keepSrcRefs) { 693 | SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE)); 694 | AppendToSrcRefs(s); 695 | UNPROTECT(1); /* s */ 696 | } 697 | RELEASE_SV(v); 698 | } 699 | R_CurrentExpr = v; 700 | return k; 701 | } 702 | 703 | static SEXP xxnullformal() 704 | { 705 | SEXP ans; 706 | PRESERVE_SV(ans = R_NilValue); 707 | return ans; 708 | } 709 | 710 | static SEXP xxfirstformal0(SEXP sym) 711 | { 712 | SEXP ans; 713 | if (GenerateCode) 714 | PRESERVE_SV(ans = FirstArg(R_MissingArg, sym)); 715 | else 716 | PRESERVE_SV(ans = R_NilValue); 717 | RELEASE_SV(sym); 718 | return ans; 719 | } 720 | 721 | static SEXP xxfirstformal1(SEXP sym, SEXP expr) 722 | { 723 | SEXP ans; 724 | if (GenerateCode) 725 | PRESERVE_SV(ans = FirstArg(expr, sym)); 726 | else 727 | PRESERVE_SV(ans = R_NilValue); 728 | RELEASE_SV(expr); 729 | RELEASE_SV(sym); 730 | return ans; 731 | } 732 | 733 | static SEXP xxaddformal0(SEXP formlist, SEXP sym, YYLTYPE *lloc) 734 | { 735 | SEXP ans; 736 | if (GenerateCode) { 737 | CheckFormalArgs(formlist, sym, lloc); 738 | NextArg(formlist, R_MissingArg, sym); 739 | ans = formlist; 740 | } else { 741 | RELEASE_SV(formlist); 742 | PRESERVE_SV(ans = R_NilValue); 743 | } 744 | RELEASE_SV(sym); 745 | return ans; 746 | } 747 | 748 | static SEXP xxaddformal1(SEXP formlist, SEXP sym, SEXP expr, YYLTYPE *lloc) 749 | { 750 | SEXP ans; 751 | if (GenerateCode) { 752 | CheckFormalArgs(formlist, sym, lloc); 753 | NextArg(formlist, expr, sym); 754 | ans = formlist; 755 | } else { 756 | RELEASE_SV(formlist); 757 | PRESERVE_SV(ans = R_NilValue); 758 | } 759 | RELEASE_SV(expr); 760 | RELEASE_SV(sym); 761 | return ans; 762 | } 763 | 764 | static SEXP xxexprlist0(void) 765 | { 766 | SEXP ans; 767 | if (GenerateCode) { 768 | PRESERVE_SV(ans = NewList()); 769 | if (ParseState.keepSrcRefs) { 770 | setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS); 771 | PS_SET_SRCREFS(R_NilValue); 772 | } 773 | } 774 | else 775 | PRESERVE_SV(ans = R_NilValue); 776 | return ans; 777 | } 778 | 779 | static SEXP xxexprlist1(SEXP expr, YYLTYPE *lloc) 780 | { 781 | SEXP ans; 782 | if (GenerateCode) { 783 | PRESERVE_SV(ans = NewList()); 784 | if (ParseState.keepSrcRefs) { 785 | setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS); 786 | SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE)); 787 | SetSingleSrcRef(s); 788 | UNPROTECT(1); /* s */ 789 | } 790 | GrowList(ans, expr); 791 | } 792 | else 793 | PRESERVE_SV(ans = R_NilValue); 794 | RELEASE_SV(expr); 795 | return ans; 796 | } 797 | 798 | static SEXP xxexprlist2(SEXP exprlist, SEXP expr, YYLTYPE *lloc) 799 | { 800 | SEXP ans; 801 | if (GenerateCode) { 802 | if (ParseState.keepSrcRefs) { 803 | SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE)); 804 | AppendToSrcRefs(s); 805 | UNPROTECT(1); /* s */ 806 | } 807 | GrowList(exprlist, expr); 808 | ans = exprlist; 809 | } else { 810 | RELEASE_SV(exprlist); 811 | PRESERVE_SV(ans = R_NilValue); 812 | } 813 | RELEASE_SV(expr); 814 | return ans; 815 | } 816 | 817 | static SEXP xxsub0(void) 818 | { 819 | SEXP ans; 820 | if (GenerateCode) 821 | PRESERVE_SV(ans = lang2(R_MissingArg,R_NilValue)); 822 | else 823 | PRESERVE_SV(ans = R_NilValue); 824 | return ans; 825 | } 826 | 827 | static SEXP xxsub1(SEXP expr, YYLTYPE *lloc) 828 | { 829 | SEXP ans; 830 | if (GenerateCode) 831 | PRESERVE_SV(ans = TagArg(expr, R_NilValue, lloc)); 832 | else 833 | PRESERVE_SV(ans = R_NilValue); 834 | RELEASE_SV(expr); 835 | return ans; 836 | } 837 | 838 | static SEXP xxsymsub0(SEXP sym, YYLTYPE *lloc) 839 | { 840 | SEXP ans; 841 | if (GenerateCode) 842 | PRESERVE_SV(ans = TagArg(R_MissingArg, sym, lloc)); 843 | else 844 | PRESERVE_SV(ans = R_NilValue); 845 | RELEASE_SV(sym); 846 | return ans; 847 | } 848 | 849 | static SEXP xxsymsub1(SEXP sym, SEXP expr, YYLTYPE *lloc) 850 | { 851 | SEXP ans; 852 | if (GenerateCode) 853 | PRESERVE_SV(ans = TagArg(expr, sym, lloc)); 854 | else 855 | PRESERVE_SV(ans = R_NilValue); 856 | RELEASE_SV(expr); 857 | RELEASE_SV(sym); 858 | return ans; 859 | } 860 | 861 | static SEXP xxnullsub0(YYLTYPE *lloc) 862 | { 863 | SEXP ans; 864 | if (GenerateCode) 865 | PRESERVE_SV(ans = TagArg(R_MissingArg, R_NullSymbol, lloc)); 866 | else 867 | PRESERVE_SV(ans = R_NilValue); 868 | RELEASE_SV(R_NilValue); 869 | return ans; 870 | } 871 | 872 | static SEXP xxnullsub1(SEXP expr, YYLTYPE *lloc) 873 | { 874 | SEXP ans; 875 | if (GenerateCode) 876 | PRESERVE_SV(ans = TagArg(expr, R_NullSymbol, lloc)); 877 | else 878 | PRESERVE_SV(ans = R_NilValue); 879 | RELEASE_SV(R_NilValue); 880 | RELEASE_SV(expr); 881 | return ans; 882 | } 883 | 884 | 885 | static SEXP xxsublist1(SEXP sub) 886 | { 887 | SEXP ans; 888 | if (GenerateCode) 889 | PRESERVE_SV(ans = FirstArg(CAR(sub),CADR(sub))); 890 | else 891 | PRESERVE_SV(ans = R_NilValue); 892 | RELEASE_SV(sub); 893 | return ans; 894 | } 895 | 896 | static SEXP xxsublist2(SEXP sublist, SEXP sub) 897 | { 898 | SEXP ans; 899 | if (GenerateCode) { 900 | NextArg(sublist, CAR(sub), CADR(sub)); 901 | ans = sublist; 902 | } else { 903 | RELEASE_SV(sublist); 904 | PRESERVE_SV(ans = R_NilValue); 905 | } 906 | RELEASE_SV(sub); 907 | return ans; 908 | } 909 | 910 | static SEXP xxcond(SEXP expr) 911 | { 912 | EatLines = 1; 913 | return expr; 914 | } 915 | 916 | static SEXP xxifcond(SEXP expr) 917 | { 918 | EatLines = 1; 919 | return expr; 920 | } 921 | 922 | static SEXP xxif(SEXP ifsym, SEXP cond, SEXP expr) 923 | { 924 | SEXP ans; 925 | if (GenerateCode) 926 | PRESERVE_SV(ans = lang3(ifsym, cond, expr)); 927 | else 928 | PRESERVE_SV(ans = R_NilValue); 929 | RELEASE_SV(expr); 930 | RELEASE_SV(cond); 931 | return ans; 932 | } 933 | 934 | static SEXP xxifelse(SEXP ifsym, SEXP cond, SEXP ifexpr, SEXP elseexpr) 935 | { 936 | SEXP ans; 937 | if (GenerateCode) 938 | PRESERVE_SV(ans = lang4(ifsym, cond, ifexpr, elseexpr)); 939 | else 940 | PRESERVE_SV(ans = R_NilValue); 941 | RELEASE_SV(elseexpr); 942 | RELEASE_SV(ifexpr); 943 | RELEASE_SV(cond); 944 | return ans; 945 | } 946 | 947 | static SEXP xxforcond(SEXP sym, SEXP expr) 948 | { 949 | SEXP ans; 950 | EatLines = 1; 951 | if (GenerateCode) 952 | PRESERVE_SV(ans = LCONS(sym, expr)); 953 | else 954 | PRESERVE_SV(ans = R_NilValue); 955 | RELEASE_SV(expr); 956 | RELEASE_SV(sym); 957 | return ans; 958 | } 959 | 960 | static SEXP xxfor(SEXP forsym, SEXP forcond, SEXP body) 961 | { 962 | SEXP ans; 963 | if (GenerateCode) 964 | PRESERVE_SV(ans = lang4(forsym, CAR(forcond), CDR(forcond), body)); 965 | else 966 | PRESERVE_SV(ans = R_NilValue); 967 | RELEASE_SV(body); 968 | RELEASE_SV(forcond); 969 | return ans; 970 | } 971 | 972 | static SEXP xxwhile(SEXP whilesym, SEXP cond, SEXP body) 973 | { 974 | SEXP ans; 975 | if (GenerateCode) 976 | PRESERVE_SV(ans = lang3(whilesym, cond, body)); 977 | else 978 | PRESERVE_SV(ans = R_NilValue); 979 | RELEASE_SV(body); 980 | RELEASE_SV(cond); 981 | return ans; 982 | } 983 | 984 | static SEXP xxrepeat(SEXP repeatsym, SEXP body) 985 | { 986 | SEXP ans; 987 | if (GenerateCode) 988 | PRESERVE_SV(ans = lang2(repeatsym, body)); 989 | else 990 | PRESERVE_SV(ans = R_NilValue); 991 | RELEASE_SV(body); 992 | return ans; 993 | } 994 | 995 | static SEXP xxnxtbrk(SEXP keyword) 996 | { 997 | if (GenerateCode) 998 | PRESERVE_SV(keyword = lang1(keyword)); 999 | else 1000 | PRESERVE_SV(keyword = R_NilValue); 1001 | return keyword; 1002 | } 1003 | 1004 | static SEXP xxfuncall(SEXP expr, SEXP args) 1005 | { 1006 | SEXP ans, sav_expr = expr; 1007 | if (GenerateCode) { 1008 | if (isString(expr)) 1009 | expr = installTrChar(STRING_ELT(expr, 0)); 1010 | PROTECT(expr); 1011 | if (length(CDR(args)) == 1 && CADR(args) == R_MissingArg && TAG(CDR(args)) == R_NilValue ) 1012 | ans = lang1(expr); 1013 | else 1014 | ans = LCONS(expr, CDR(args)); 1015 | UNPROTECT(1); /* expr */ 1016 | PRESERVE_SV(ans); 1017 | } else 1018 | PRESERVE_SV(ans = R_NilValue); 1019 | 1020 | RELEASE_SV(args); 1021 | RELEASE_SV(sav_expr); 1022 | return ans; 1023 | } 1024 | 1025 | static SEXP mkString2(const char *s, size_t len, Rboolean escaped) 1026 | { 1027 | SEXP t; 1028 | cetype_t enc = CE_NATIVE; 1029 | 1030 | if(known_to_be_latin1) enc = CE_LATIN1; 1031 | else if(!escaped && known_to_be_utf8) enc = CE_UTF8; 1032 | 1033 | PROTECT(t = allocVector(STRSXP, 1)); 1034 | SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc)); 1035 | UNPROTECT(1); /* t */ 1036 | return t; 1037 | } 1038 | 1039 | static SEXP xxdefun(SEXP fname, SEXP formals, SEXP body, YYLTYPE *lloc) 1040 | { 1041 | SEXP ans, srcref; 1042 | 1043 | if (GenerateCode) { 1044 | if (ParseState.keepSrcRefs) { 1045 | srcref = makeSrcref(lloc, PS_SRCFILE); 1046 | ParseState.didAttach = TRUE; 1047 | } else 1048 | srcref = R_NilValue; 1049 | PRESERVE_SV(ans = lang4(fname, CDR(formals), body, srcref)); 1050 | } else 1051 | PRESERVE_SV(ans = R_NilValue); 1052 | RELEASE_SV(body); 1053 | RELEASE_SV(formals); 1054 | return ans; 1055 | } 1056 | 1057 | static SEXP xxdefunanon(SEXP formals, SEXP body, YYLTYPE *lloc) 1058 | { 1059 | SEXP fname = install("function"); 1060 | 1061 | return xxdefun(fname, formals, body, lloc); 1062 | } 1063 | 1064 | static SEXP xxunary(SEXP op, SEXP arg) 1065 | { 1066 | SEXP ans; 1067 | if (GenerateCode) 1068 | PRESERVE_SV(ans = lang2(op, arg)); 1069 | else 1070 | PRESERVE_SV(ans = R_NilValue); 1071 | RELEASE_SV(arg); 1072 | return ans; 1073 | } 1074 | 1075 | static SEXP xxbinary(SEXP n1, SEXP n2, SEXP n3) 1076 | { 1077 | SEXP ans; 1078 | if (GenerateCode) 1079 | PRESERVE_SV(ans = lang3(n1, n2, n3)); 1080 | else 1081 | PRESERVE_SV(ans = R_NilValue); 1082 | RELEASE_SV(n2); 1083 | RELEASE_SV(n3); 1084 | return ans; 1085 | } 1086 | 1087 | static SEXP xxparen(SEXP n1, SEXP n2) 1088 | { 1089 | SEXP ans; 1090 | if (GenerateCode) 1091 | PRESERVE_SV(ans = lang2(n1, n2)); 1092 | else 1093 | PRESERVE_SV(ans = R_NilValue); 1094 | RELEASE_SV(n2); 1095 | return ans; 1096 | } 1097 | 1098 | 1099 | /* This should probably use CONS rather than LCONS, but 1100 | it shouldn't matter and we would rather not meddle 1101 | See PR#7055 */ 1102 | 1103 | static SEXP xxsubscript(SEXP a1, SEXP a2, SEXP a3) 1104 | { 1105 | SEXP ans; 1106 | if (GenerateCode) 1107 | PRESERVE_SV(ans = LCONS(a2, CONS(a1, CDR(a3)))); 1108 | else 1109 | PRESERVE_SV(ans = R_NilValue); 1110 | RELEASE_SV(a3); 1111 | RELEASE_SV(a1); 1112 | return ans; 1113 | } 1114 | 1115 | static SEXP xxexprlist(SEXP a1, YYLTYPE *lloc, SEXP a2) 1116 | { 1117 | SEXP ans; 1118 | SEXP prevSrcrefs; 1119 | 1120 | EatLines = 0; 1121 | if (GenerateCode) { 1122 | SET_TYPEOF(a2, LANGSXP); 1123 | SETCAR(a2, a1); 1124 | if (ParseState.keepSrcRefs) { 1125 | PROTECT(prevSrcrefs = getAttrib(a2, R_SrcrefSymbol)); 1126 | SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE)); 1127 | PrependToSrcRefs(s); 1128 | attachSrcrefs(a2); 1129 | UNPROTECT(2); /* prevSrcrefs, s */ 1130 | /* SrcRefs got NAMED by being an attribute, preventively 1131 | getAttrib(), but it has not in fact been referenced. Set NAMED 1132 | to 0 to avoid overhead in further setAttrib calls due to cycle 1133 | detection. */ 1134 | SET_NAMED(prevSrcrefs, 0); 1135 | PS_SET_SRCREFS(prevSrcrefs); 1136 | } 1137 | PRESERVE_SV(ans = a2); 1138 | } 1139 | else 1140 | PRESERVE_SV(ans = R_NilValue); 1141 | RELEASE_SV(a2); 1142 | return ans; 1143 | } 1144 | 1145 | /*--------------------------------------------------------------------------*/ 1146 | 1147 | static SEXP TagArg(SEXP arg, SEXP tag, YYLTYPE *lloc) 1148 | { 1149 | switch (TYPEOF(tag)) { 1150 | case STRSXP: 1151 | tag = installTrChar(STRING_ELT(tag, 0)); 1152 | case NILSXP: 1153 | case SYMSXP: 1154 | return lang2(arg, tag); 1155 | default: 1156 | error(_("incorrect tag type at line %d"), lloc->first_line); return R_NilValue/* -Wall */; 1157 | } 1158 | } 1159 | 1160 | 1161 | /* Stretchy List Structures : Lists are created and grown using a special */ 1162 | /* dotted pair. The CAR of the list points to the last cons-cell in the */ 1163 | /* list and the CDR points to the first. The list can be extracted from */ 1164 | /* the pair by taking its CDR, while the CAR gives fast access to the end */ 1165 | /* of the list. */ 1166 | 1167 | /* These functions must be called with arguments protected */ 1168 | 1169 | /* Create a stretchy-list dotted pair */ 1170 | static SEXP NewList(void) 1171 | { 1172 | SEXP s = CONS(R_NilValue, R_NilValue); 1173 | SETCAR(s, s); 1174 | return s; 1175 | } 1176 | 1177 | /* Add a new element at the end of a stretchy list */ 1178 | static void GrowList(SEXP l, SEXP s) 1179 | { 1180 | SEXP tmp; 1181 | tmp = CONS(s, R_NilValue); 1182 | SETCDR(CAR(l), tmp); 1183 | SETCAR(l, tmp); 1184 | } 1185 | 1186 | /* Create a stretchy list with a single named element */ 1187 | static SEXP FirstArg(SEXP s, SEXP tag) 1188 | { 1189 | SEXP tmp; 1190 | PROTECT(tmp = NewList()); 1191 | GrowList(tmp, s); 1192 | SET_TAG(CAR(tmp), tag); 1193 | UNPROTECT(1); /* tmp */ 1194 | return tmp; 1195 | } 1196 | 1197 | /* Add named element to the end of a stretchy list */ 1198 | static void NextArg(SEXP l, SEXP s, SEXP tag) 1199 | { 1200 | GrowList(l, s); 1201 | SET_TAG(CAR(l), tag); 1202 | } 1203 | 1204 | /* SrcRefs (PS_SRCREFS) are represented as R_NilValue (empty) or by 1205 | a stretchy list (which includes another representation for empty) 1206 | for fast append operation. */ 1207 | 1208 | static void SetSingleSrcRef(SEXP r) 1209 | { 1210 | SEXP l; 1211 | 1212 | PROTECT(l = NewList()); 1213 | GrowList(l, r); 1214 | PS_SET_SRCREFS(l); 1215 | UNPROTECT(1); /* l */ 1216 | } 1217 | 1218 | static void AppendToSrcRefs(SEXP r) 1219 | { 1220 | SEXP l = PS_SRCREFS; 1221 | if (l == R_NilValue) 1222 | SetSingleSrcRef(r); 1223 | else 1224 | GrowList(l, r); 1225 | } 1226 | 1227 | static void PrependToSrcRefs(SEXP r) 1228 | { 1229 | SEXP l = PS_SRCREFS; 1230 | if (l == R_NilValue) 1231 | SetSingleSrcRef(r); 1232 | else if (CDR(l) == R_NilValue) 1233 | /* adding to empty stretchy list */ 1234 | GrowList(l, r); 1235 | else { 1236 | SEXP tmp = CONS(r, CDR(l)); 1237 | SETCDR(l, tmp); 1238 | } 1239 | } 1240 | 1241 | static SEXP SrcRefsToVectorList() { 1242 | SEXP l = PS_SRCREFS; 1243 | if (l == R_NilValue) 1244 | return PairToVectorList(l); 1245 | else 1246 | return PairToVectorList(CDR(l)); 1247 | } 1248 | 1249 | /*--------------------------------------------------------------------------*/ 1250 | 1251 | /* 1252 | * Parsing Entry Points: 1253 | * 1254 | * The Following entry points provide language parsing facilities. 1255 | * Note that there are separate entry points for parsing IoBuffers 1256 | * (i.e. interactve use), files and R character strings. 1257 | * 1258 | * The entry points provide the same functionality, they just 1259 | * set things up in slightly different ways. 1260 | * 1261 | * The following routines parse a single expression: 1262 | * 1263 | * 1264 | * SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status, Rboolean first) 1265 | * (used for R_ReplFile in main.c) 1266 | * 1267 | * SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status, Rboolean first) 1268 | * (used for ReplIteration and R_ReplDLLdo1 in main.c) 1269 | * 1270 | * The success of the parse is indicated as folllows: 1271 | * 1272 | * 1273 | * status = PARSE_NULL - there was no statement to parse 1274 | * PARSE_OK - complete statement 1275 | * PARSE_INCOMPLETE - incomplete statement 1276 | * PARSE_ERROR - syntax error 1277 | * PARSE_EOF - end of file 1278 | * 1279 | * 1280 | * The following routines parse several expressions and return 1281 | * their values in a single expression vector. 1282 | * 1283 | * SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile) 1284 | * (used for do_edit in file edit.c) 1285 | * 1286 | * SEXP R_ParseVector(SEXP *text, int n, ParseStatus *status, SEXP srcfile) 1287 | * (public, and used by parse(text=) in file source.c) 1288 | * 1289 | * SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, SEXP srcfile) 1290 | * (used by parse(file="") in file source.c) 1291 | * 1292 | * SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile) 1293 | * (used by parse(file=) in file source.c) 1294 | * 1295 | * Here, status is 1 for a successful parse and 0 if parsing failed 1296 | * for some reason. 1297 | */ 1298 | 1299 | #define CONTEXTSTACK_SIZE 50 1300 | static int SavedToken; 1301 | static SEXP SavedLval; 1302 | static char contextstack[CONTEXTSTACK_SIZE], *contextp; 1303 | 1304 | static void PutSrcRefState(SrcRefState *state); 1305 | static void UseSrcRefState(SrcRefState *state); 1306 | 1307 | /* This is called once when R starts up. */ 1308 | attribute_hidden 1309 | void InitParser(void) 1310 | { 1311 | ParseState.sexps = allocVector(VECSXP, 7); /* initialized to R_NilValue */ 1312 | ParseState.data = R_NilValue; 1313 | INIT_SVS(); 1314 | R_PreserveObject(ParseState.sexps); /* never released in an R session */ 1315 | R_NullSymbol = install("NULL"); 1316 | } 1317 | 1318 | static void FinalizeSrcRefStateOnError(void *dummy) 1319 | { 1320 | R_FinalizeSrcRefState(); 1321 | } 1322 | 1323 | /* This is called each time a new parse sequence begins */ 1324 | attribute_hidden 1325 | void R_InitSrcRefState(RCNTXT* cptr) 1326 | { 1327 | if (busy) { 1328 | SrcRefState *prev = malloc(sizeof(SrcRefState)); 1329 | if (prev == NULL) 1330 | error(_("allocation of source reference state failed")); 1331 | PutSrcRefState(prev); 1332 | ParseState.prevState = prev; 1333 | ParseState.sexps = allocVector(VECSXP, 7); 1334 | ParseState.data = R_NilValue; 1335 | INIT_SVS(); 1336 | R_PreserveObject(ParseState.sexps); 1337 | /* ParseState.sexps released in R_FinalizeSrcRefState */ 1338 | } else 1339 | /* re-use data, text, ids arrays */ 1340 | ParseState.prevState = NULL; 1341 | /* set up context _after_ PutSrcRefState */ 1342 | begincontext(cptr, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv, 1343 | R_NilValue, R_NilValue); 1344 | cptr->cend = &FinalizeSrcRefStateOnError; 1345 | cptr->cenddata = NULL; 1346 | ParseState.keepSrcRefs = FALSE; 1347 | ParseState.keepParseData = TRUE; 1348 | ParseState.didAttach = FALSE; 1349 | PS_SET_SRCFILE(R_NilValue); 1350 | PS_SET_ORIGINAL(R_NilValue); 1351 | ParseState.data_count = 0; 1352 | ParseState.xxlineno = 1; 1353 | ParseState.xxcolno = 0; 1354 | ParseState.xxbyteno = 0; 1355 | ParseState.xxparseno = 1; 1356 | busy = TRUE; 1357 | } 1358 | 1359 | attribute_hidden 1360 | void R_FinalizeSrcRefState(void) 1361 | { 1362 | PS_SET_SRCFILE(R_NilValue); 1363 | PS_SET_ORIGINAL(R_NilValue); 1364 | CLEAR_SVS(); 1365 | 1366 | /* Free the data, text and ids if we are restoring a previous state, 1367 | or if they have grown too large */ 1368 | if (PS_DATA != R_NilValue) { 1369 | if (ParseState.prevState || DATA_COUNT > MAX_DATA_COUNT) { 1370 | PS_SET_DATA(R_NilValue); 1371 | PS_SET_TEXT(R_NilValue); 1372 | } else /* Remove all the strings from the text vector so they don't take up memory, and clean up data */ 1373 | for (int i=0; i < ParseState.data_count; i++) { 1374 | SET_STRING_ELT(PS_TEXT, i, R_BlankString); 1375 | _PARENT(i) = 0; 1376 | } 1377 | } 1378 | if (PS_IDS != R_NilValue) { 1379 | if (ParseState.prevState || ID_COUNT > MAX_DATA_COUNT) { 1380 | PS_SET_IDS(R_NilValue); 1381 | } else {/* Remove the parent records */ 1382 | if (identifier > ID_COUNT) identifier = ID_COUNT; 1383 | for (int i=0; i < identifier; i++) { 1384 | ID_ID(i) = 0; 1385 | ID_PARENT(i) = 0; 1386 | } 1387 | } 1388 | } 1389 | ParseState.data_count = NA_INTEGER; 1390 | if (ParseState.prevState) { 1391 | R_ReleaseObject(ParseState.sexps); 1392 | SrcRefState *prev = ParseState.prevState; 1393 | UseSrcRefState(prev); 1394 | free(prev); 1395 | } else 1396 | busy = FALSE; 1397 | } 1398 | 1399 | static void UseSrcRefState(SrcRefState *state) 1400 | { 1401 | ParseState.keepSrcRefs = state->keepSrcRefs; 1402 | ParseState.keepParseData = state->keepParseData; 1403 | ParseState.sexps = state->sexps; 1404 | ParseState.data = state->data; 1405 | ParseState.data_count = state->data_count; 1406 | ParseState.xxlineno = state->xxlineno; 1407 | ParseState.xxcolno = state->xxcolno; 1408 | ParseState.xxbyteno = state->xxbyteno; 1409 | ParseState.xxparseno = state->xxparseno; 1410 | ParseState.prevState = state->prevState; 1411 | busy = TRUE; 1412 | } 1413 | 1414 | static void PutSrcRefState(SrcRefState *state) 1415 | { 1416 | state->keepSrcRefs = ParseState.keepSrcRefs; 1417 | state->keepParseData = ParseState.keepParseData; 1418 | state->sexps = ParseState.sexps; 1419 | state->data = ParseState.data; 1420 | state->data_count = ParseState.data_count; 1421 | state->xxlineno = ParseState.xxlineno; 1422 | state->xxcolno = ParseState.xxcolno; 1423 | state->xxbyteno = ParseState.xxbyteno; 1424 | state->xxparseno = ParseState.xxparseno; 1425 | state->prevState = ParseState.prevState; 1426 | } 1427 | 1428 | static void ParseInit(void) 1429 | { 1430 | contextp = contextstack; 1431 | *contextp = ' '; 1432 | SavedToken = 0; 1433 | SavedLval = R_NilValue; 1434 | EatLines = 0; 1435 | EndOfFile = 0; 1436 | xxcharcount = 0; 1437 | npush = 0; 1438 | } 1439 | 1440 | static void initData(void) 1441 | { 1442 | ParseState.data_count = 0 ; 1443 | } 1444 | 1445 | 1446 | static void ParseContextInit(void) 1447 | { 1448 | R_ParseContextLast = 0; 1449 | R_ParseContext[0] = '\0'; 1450 | 1451 | /* starts the identifier counter*/ 1452 | initId(); 1453 | initData(); 1454 | } 1455 | 1456 | static SEXP R_Parse1(ParseStatus *status) 1457 | { 1458 | switch(yyparse()) { 1459 | case 0: /* End of file */ 1460 | *status = PARSE_EOF; 1461 | if (EndOfFile == 2) *status = PARSE_INCOMPLETE; 1462 | break; 1463 | case 1: /* Syntax error / incomplete */ 1464 | *status = PARSE_ERROR; 1465 | if (EndOfFile) *status = PARSE_INCOMPLETE; 1466 | break; 1467 | case 2: /* Empty Line */ 1468 | *status = PARSE_NULL; 1469 | break; 1470 | case 3: /* Valid expr '\n' terminated */ 1471 | case 4: /* Valid expr ';' terminated */ 1472 | *status = PARSE_OK; 1473 | break; 1474 | } 1475 | return R_CurrentExpr; 1476 | } 1477 | 1478 | static FILE *fp_parse; 1479 | 1480 | static int file_getc(void) 1481 | { 1482 | return R_fgetc(fp_parse); 1483 | } 1484 | 1485 | /* used in main.c */ 1486 | attribute_hidden 1487 | SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status) 1488 | { 1489 | ParseInit(); 1490 | ParseContextInit(); 1491 | GenerateCode = gencode; 1492 | fp_parse = fp; 1493 | ptr_getc = file_getc; 1494 | R_Parse1(status); 1495 | CLEAR_SVS(); 1496 | return R_CurrentExpr; 1497 | } 1498 | 1499 | static IoBuffer *iob; 1500 | 1501 | static int buffer_getc(void) 1502 | { 1503 | return R_IoBufferGetc(iob); 1504 | } 1505 | 1506 | /* Used only in main.c */ 1507 | attribute_hidden 1508 | SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status) 1509 | { 1510 | Rboolean keepSource = FALSE; 1511 | RCNTXT cntxt; 1512 | 1513 | R_InitSrcRefState(&cntxt); 1514 | if (gencode) { 1515 | keepSource = asLogical(GetOption1(install("keep.source"))); 1516 | if (keepSource) { 1517 | ParseState.keepSrcRefs = TRUE; 1518 | ParseState.keepParseData = 1519 | asLogical(GetOption1(install("keep.parse.data"))); 1520 | PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv)); 1521 | PS_SET_ORIGINAL(PS_SRCFILE); 1522 | PS_SET_SRCREFS(R_NilValue); 1523 | } 1524 | } 1525 | ParseInit(); 1526 | ParseContextInit(); 1527 | GenerateCode = gencode; 1528 | iob = buffer; 1529 | ptr_getc = buffer_getc; 1530 | R_Parse1(status); 1531 | if (gencode && keepSource) { 1532 | if (ParseState.didAttach) { 1533 | int buflen = R_IoBufferReadOffset(buffer); 1534 | char buf[buflen+1]; 1535 | SEXP class; 1536 | R_IoBufferReadReset(buffer); 1537 | for (int i=0; i= 0 && i >= n) break; 1588 | ParseInit(); 1589 | rval = R_Parse1(status); 1590 | switch(*status) { 1591 | case PARSE_NULL: 1592 | break; 1593 | case PARSE_OK: 1594 | PROTECT(rval); 1595 | GrowList(t, rval); 1596 | UNPROTECT(1); /* rval */ 1597 | i++; 1598 | break; 1599 | case PARSE_INCOMPLETE: 1600 | case PARSE_ERROR: 1601 | UNPROTECT(1); /* t */ 1602 | if (ParseState.keepSrcRefs && ParseState.keepParseData) 1603 | finalizeData(); 1604 | endcontext(&cntxt); 1605 | R_FinalizeSrcRefState(); 1606 | return R_NilValue; 1607 | break; 1608 | case PARSE_EOF: 1609 | goto finish; 1610 | break; 1611 | } 1612 | } 1613 | 1614 | finish: 1615 | 1616 | t = CDR(t); 1617 | PROTECT(rval = allocVector(EXPRSXP, length(t))); 1618 | for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t)) 1619 | SET_VECTOR_ELT(rval, n, CAR(t)); 1620 | if (ParseState.keepSrcRefs) { 1621 | if (ParseState.keepParseData) 1622 | finalizeData(); 1623 | attachSrcrefs(rval); 1624 | } 1625 | UNPROTECT(2); /* t, rval */ 1626 | PROTECT(rval); 1627 | endcontext(&cntxt); 1628 | R_FinalizeSrcRefState(); 1629 | UNPROTECT(1); /* rval */ 1630 | *status = PARSE_OK; 1631 | return rval; 1632 | } 1633 | 1634 | /* used in edit.c */ 1635 | attribute_hidden 1636 | SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile) 1637 | { 1638 | GenerateCode = 1; 1639 | fp_parse = fp; 1640 | ptr_getc = file_getc; 1641 | return R_Parse(n, status, srcfile); 1642 | } 1643 | 1644 | #include "Rconnections.h" 1645 | static Rconnection con_parse; 1646 | 1647 | /* need to handle incomplete last line */ 1648 | static int con_getc(void) 1649 | { 1650 | int c; 1651 | static int last=-1000; 1652 | 1653 | c = Rconn_fgetc(con_parse); 1654 | if (c == EOF && last != '\n') c = '\n'; 1655 | return (last = c); 1656 | } 1657 | 1658 | /* used in source.c */ 1659 | attribute_hidden 1660 | SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile) 1661 | { 1662 | GenerateCode = 1; 1663 | con_parse = con; 1664 | ptr_getc = con_getc; 1665 | return R_Parse(n, status, srcfile); 1666 | } 1667 | 1668 | /* This one is public, and used in source.c */ 1669 | SEXP R_ParseVector(SEXP text, int n, ParseStatus *status, SEXP srcfile) 1670 | { 1671 | SEXP rval; 1672 | TextBuffer textb; 1673 | R_TextBufferInit(&textb, text); 1674 | txtb = &textb; 1675 | GenerateCode = 1; 1676 | ptr_getc = text_getc; 1677 | rval = R_Parse(n, status, srcfile); 1678 | R_TextBufferFree(&textb); 1679 | return rval; 1680 | } 1681 | 1682 | static const char *Prompt(SEXP prompt, int type) 1683 | { 1684 | if(type == 1) { 1685 | if(length(prompt) <= 0) { 1686 | return CHAR(STRING_ELT(GetOption1(install("prompt")), 0)); 1687 | } 1688 | else 1689 | return CHAR(STRING_ELT(prompt, 0)); 1690 | } 1691 | else { 1692 | return CHAR(STRING_ELT(GetOption1(install("continue")), 0)); 1693 | } 1694 | } 1695 | 1696 | /* used in source.c */ 1697 | attribute_hidden 1698 | SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, 1699 | SEXP srcfile) 1700 | { 1701 | SEXP rval, t; 1702 | char *bufp, buf[CONSOLE_BUFFER_SIZE]; 1703 | int c, i, prompt_type = 1; 1704 | RCNTXT cntxt; 1705 | 1706 | R_IoBufferWriteReset(buffer); 1707 | buf[0] = '\0'; 1708 | bufp = buf; 1709 | R_InitSrcRefState(&cntxt); 1710 | ParseContextInit(); 1711 | 1712 | GenerateCode = 1; 1713 | iob = buffer; 1714 | ptr_getc = buffer_getc; 1715 | 1716 | PS_SET_SRCFILE(srcfile); 1717 | PS_SET_ORIGINAL(srcfile); 1718 | 1719 | if (isEnvironment(srcfile)) { 1720 | ParseState.keepSrcRefs = TRUE; 1721 | ParseState.keepParseData = 1722 | asLogical(GetOption1(install("keep.parse.data"))); 1723 | PS_SET_SRCREFS(R_NilValue); 1724 | } 1725 | 1726 | PROTECT(t = NewList()); 1727 | for(i = 0; ; ) { 1728 | if(n >= 0 && i >= n) break; 1729 | if (!*bufp) { 1730 | if(R_ReadConsole((char *) Prompt(prompt, prompt_type), 1731 | (unsigned char *)buf, CONSOLE_BUFFER_SIZE, 1) == 0) 1732 | goto finish; 1733 | bufp = buf; 1734 | } 1735 | while ((c = *bufp++)) { 1736 | R_IoBufferPutc(c, buffer); 1737 | if (c == ';' || c == '\n') break; 1738 | } 1739 | 1740 | /* Was a call to R_Parse1Buffer, but we don't want to reset 1741 | xxlineno and xxcolno */ 1742 | ParseInit(); 1743 | /* Not calling ParseContextInit() as it resets parse data, and 1744 | to be consistent with R_Parse */ 1745 | R_Parse1(status); 1746 | rval = R_CurrentExpr; 1747 | 1748 | switch(*status) { 1749 | case PARSE_NULL: 1750 | break; 1751 | case PARSE_OK: 1752 | PROTECT(rval); 1753 | GrowList(t, rval); 1754 | UNPROTECT(1); /* rval */ 1755 | i++; 1756 | break; 1757 | case PARSE_INCOMPLETE: 1758 | case PARSE_ERROR: 1759 | UNPROTECT(1); /* t */ 1760 | R_IoBufferWriteReset(buffer); 1761 | endcontext(&cntxt); 1762 | R_FinalizeSrcRefState(); 1763 | return R_NilValue; 1764 | break; 1765 | case PARSE_EOF: 1766 | goto finish; 1767 | break; 1768 | } 1769 | } 1770 | finish: 1771 | R_IoBufferWriteReset(buffer); 1772 | t = CDR(t); 1773 | PROTECT(rval = allocVector(EXPRSXP, length(t))); 1774 | for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t)) 1775 | SET_VECTOR_ELT(rval, n, CAR(t)); 1776 | if (ParseState.keepSrcRefs) { 1777 | if (ParseState.keepParseData) 1778 | finalizeData(); 1779 | attachSrcrefs(rval); 1780 | } 1781 | UNPROTECT(2); /* t, rval */ 1782 | PROTECT(rval); 1783 | endcontext(&cntxt); 1784 | R_FinalizeSrcRefState(); 1785 | UNPROTECT(1); /* rval */ 1786 | *status = PARSE_OK; 1787 | return rval; 1788 | } 1789 | 1790 | 1791 | /*---------------------------------------------------------------------------- 1792 | * 1793 | * The Lexical Analyzer: 1794 | * 1795 | * Basic lexical analysis is performed by the following 1796 | * routines. Input is read a line at a time, and, if the 1797 | * program is in batch mode, each input line is echoed to 1798 | * standard output after it is read. 1799 | * 1800 | * The function yylex() scans the input, breaking it into 1801 | * tokens which are then passed to the parser. The lexical 1802 | * analyser maintains a symbol table (in a very messy fashion). 1803 | * 1804 | * The fact that if statements need to parse differently 1805 | * depending on whether the statement is being interpreted or 1806 | * part of the body of a function causes the need for ifpop 1807 | * and IfPush. When an if statement is encountered an 'i' is 1808 | * pushed on a stack (provided there are parentheses active). 1809 | * At later points this 'i' needs to be popped off of the if 1810 | * stack. 1811 | * 1812 | */ 1813 | 1814 | static void IfPush(void) 1815 | { 1816 | if (*contextp==LBRACE || 1817 | *contextp=='[' || 1818 | *contextp=='(' || 1819 | *contextp == 'i') { 1820 | if(contextp - contextstack >= CONTEXTSTACK_SIZE) 1821 | error(_("contextstack overflow")); 1822 | *++contextp = 'i'; 1823 | } 1824 | 1825 | } 1826 | 1827 | static void ifpop(void) 1828 | { 1829 | if (*contextp=='i') 1830 | *contextp-- = 0; 1831 | } 1832 | 1833 | /* This is only called following ., so we only care if it is 1834 | an ANSI digit or not */ 1835 | static int typeofnext(void) 1836 | { 1837 | int k, c; 1838 | 1839 | c = xxgetc(); 1840 | if (isdigit(c)) k = 1; else k = 2; 1841 | xxungetc(c); 1842 | return k; 1843 | } 1844 | 1845 | static int nextchar(int expect) 1846 | { 1847 | int c = xxgetc(); 1848 | if (c == expect) 1849 | return 1; 1850 | else 1851 | xxungetc(c); 1852 | return 0; 1853 | } 1854 | 1855 | /* Special Symbols */ 1856 | /* Syntactic Keywords + Symbolic Constants */ 1857 | 1858 | struct { 1859 | char *name; 1860 | int token; 1861 | } 1862 | static keywords[] = { 1863 | { "NULL", NULL_CONST }, 1864 | { "NA", NUM_CONST }, 1865 | { "TRUE", NUM_CONST }, 1866 | { "FALSE", NUM_CONST }, 1867 | { "Inf", NUM_CONST }, 1868 | { "NaN", NUM_CONST }, 1869 | { "NA_integer_", NUM_CONST }, 1870 | { "NA_real_", NUM_CONST }, 1871 | { "NA_character_", NUM_CONST }, 1872 | { "NA_complex_", NUM_CONST }, 1873 | { "function", FUNCTION }, 1874 | { "while", WHILE }, 1875 | { "repeat", REPEAT }, 1876 | { "for", FOR }, 1877 | { "if", IF }, 1878 | { "in", IN }, 1879 | { "else", ELSE }, 1880 | { "next", NEXT }, 1881 | { "break", BREAK }, 1882 | { "...", SYMBOL }, 1883 | { 0, 0 } 1884 | }; 1885 | 1886 | /* KeywordLookup has side effects, it sets yylval */ 1887 | 1888 | static int KeywordLookup(const char *s) 1889 | { 1890 | int i; 1891 | for (i = 0; keywords[i].name; i++) { 1892 | if (strcmp(keywords[i].name, s) == 0) { 1893 | switch (keywords[i].token) { 1894 | case NULL_CONST: 1895 | PRESERVE_SV(yylval = R_NilValue); 1896 | break; 1897 | case NUM_CONST: 1898 | if(GenerateCode) { 1899 | switch(i) { 1900 | case 1: 1901 | PRESERVE_SV(yylval = mkNA()); 1902 | break; 1903 | case 2: 1904 | PRESERVE_SV(yylval = mkTrue()); 1905 | break; 1906 | case 3: 1907 | PRESERVE_SV(yylval = mkFalse()); 1908 | break; 1909 | case 4: 1910 | PRESERVE_SV(yylval = allocVector(REALSXP, 1)); 1911 | REAL(yylval)[0] = R_PosInf; 1912 | break; 1913 | case 5: 1914 | PRESERVE_SV(yylval = allocVector(REALSXP, 1)); 1915 | REAL(yylval)[0] = R_NaN; 1916 | break; 1917 | case 6: 1918 | PRESERVE_SV(yylval = allocVector(INTSXP, 1)); 1919 | INTEGER(yylval)[0] = NA_INTEGER; 1920 | break; 1921 | case 7: 1922 | PRESERVE_SV(yylval = allocVector(REALSXP, 1)); 1923 | REAL(yylval)[0] = NA_REAL; 1924 | break; 1925 | case 8: 1926 | PRESERVE_SV(yylval = allocVector(STRSXP, 1)); 1927 | SET_STRING_ELT(yylval, 0, NA_STRING); 1928 | break; 1929 | case 9: 1930 | PRESERVE_SV(yylval = allocVector(CPLXSXP, 1)); 1931 | COMPLEX(yylval)[0].r = COMPLEX(yylval)[0].i = NA_REAL; 1932 | break; 1933 | } 1934 | } else 1935 | PRESERVE_SV(yylval = R_NilValue); 1936 | break; 1937 | case FUNCTION: 1938 | case WHILE: 1939 | case REPEAT: 1940 | case FOR: 1941 | case IF: 1942 | case NEXT: 1943 | case BREAK: 1944 | yylval = install(s); 1945 | break; 1946 | case IN: 1947 | case ELSE: 1948 | break; 1949 | case SYMBOL: 1950 | PRESERVE_SV(yylval = install(s)); 1951 | break; 1952 | } 1953 | return keywords[i].token; 1954 | } 1955 | } 1956 | return 0; 1957 | } 1958 | 1959 | static SEXP mkFloat(const char *s) 1960 | { 1961 | return ScalarReal(R_atof(s)); 1962 | } 1963 | 1964 | static SEXP mkInt(const char *s) 1965 | { 1966 | double f = R_atof(s); /* or R_strtol? */ 1967 | return ScalarInteger((int) f); 1968 | } 1969 | 1970 | static SEXP mkComplex(const char *s) 1971 | { 1972 | SEXP t = R_NilValue; 1973 | double f; 1974 | f = R_atof(s); /* FIXME: make certain the value is legitimate. */ 1975 | t = allocVector(CPLXSXP, 1); 1976 | COMPLEX(t)[0].r = 0; 1977 | COMPLEX(t)[0].i = f; 1978 | return t; 1979 | } 1980 | 1981 | static SEXP mkNA(void) 1982 | { 1983 | SEXP t = allocVector(LGLSXP, 1); 1984 | LOGICAL(t)[0] = NA_LOGICAL; 1985 | return t; 1986 | } 1987 | 1988 | attribute_hidden 1989 | SEXP mkTrue(void) 1990 | { 1991 | SEXP s = allocVector(LGLSXP, 1); 1992 | LOGICAL(s)[0] = 1; 1993 | return s; 1994 | } 1995 | 1996 | SEXP mkFalse(void) 1997 | { 1998 | SEXP s = allocVector(LGLSXP, 1); 1999 | LOGICAL(s)[0] = 0; 2000 | return s; 2001 | } 2002 | 2003 | static void yyerror(const char *s) 2004 | { 2005 | static const char *const yytname_translations[] = 2006 | { 2007 | /* the left column are strings coming from bison, the right 2008 | column are translations for users. 2009 | The first YYENGLISH from the right column are English to be translated, 2010 | the rest are to be copied literally. The #if 0 block below allows xgettext 2011 | to see these. 2012 | */ 2013 | #define YYENGLISH 8 2014 | "$undefined", "input", 2015 | "END_OF_INPUT", "end of input", 2016 | "ERROR", "input", 2017 | "STR_CONST", "string constant", 2018 | "NUM_CONST", "numeric constant", 2019 | "SYMBOL", "symbol", 2020 | "LEFT_ASSIGN", "assignment", 2021 | "'\\n'", "end of line", 2022 | "NULL_CONST", "'NULL'", 2023 | "FUNCTION", "'function'", 2024 | "EQ_ASSIGN", "'='", 2025 | "RIGHT_ASSIGN", "'->'", 2026 | "ANON", "'=>'", 2027 | "LBB", "'[['", 2028 | "FOR", "'for'", 2029 | "IN", "'in'", 2030 | "IF", "'if'", 2031 | "ELSE", "'else'", 2032 | "WHILE", "'while'", 2033 | "NEXT", "'next'", 2034 | "BREAK", "'break'", 2035 | "REPEAT", "'repeat'", 2036 | "GT", "'>'", 2037 | "GE", "'>='", 2038 | "LT", "'<'", 2039 | "LE", "'<='", 2040 | "EQ", "'=='", 2041 | "NE", "'!='", 2042 | "AND", "'&'", 2043 | "OR", "'|'", 2044 | "AND2", "'&&'", 2045 | "OR2", "'||'", 2046 | "NS_GET", "'::'", 2047 | "NS_GET_INT", "':::'", 2048 | 0 2049 | }; 2050 | static char const yyunexpected[] = "syntax error, unexpected "; 2051 | static char const yyexpecting[] = ", expecting "; 2052 | char *expecting; 2053 | 2054 | R_ParseError = yylloc.first_line; 2055 | R_ParseErrorCol = yylloc.first_column; 2056 | R_ParseErrorFile = PS_SRCFILE; 2057 | 2058 | if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) { 2059 | int i; 2060 | /* Edit the error message */ 2061 | expecting = strstr(s + sizeof yyunexpected -1, yyexpecting); 2062 | if (expecting) *expecting = '\0'; 2063 | for (i = 0; yytname_translations[i]; i += 2) { 2064 | if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) { 2065 | switch(i/2) 2066 | { 2067 | case 0: 2068 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input")); 2069 | break; 2070 | case 1: 2071 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of input")); 2072 | break; 2073 | case 2: 2074 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input")); 2075 | break; 2076 | case 3: 2077 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected string constant")); 2078 | break; 2079 | case 4: 2080 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected numeric constant")); 2081 | break; 2082 | case 5: 2083 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected symbol")); 2084 | break; 2085 | case 6: 2086 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected assignment")); 2087 | break; 2088 | case 7: 2089 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of line")); 2090 | break; 2091 | default: 2092 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected %s"), 2093 | yytname_translations[i+1]); 2094 | break; 2095 | } 2096 | 2097 | return; 2098 | } 2099 | } 2100 | snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE - 1, _("unexpected %s"), 2101 | s + sizeof yyunexpected - 1); 2102 | } else { 2103 | strncpy(R_ParseErrorMsg, s, PARSE_ERROR_SIZE - 1); 2104 | R_ParseErrorMsg[PARSE_ERROR_SIZE - 1] = '\0'; 2105 | } 2106 | } 2107 | 2108 | static void CheckFormalArgs(SEXP formlist, SEXP _new, YYLTYPE *lloc) 2109 | { 2110 | while (formlist != R_NilValue) { 2111 | if (TAG(formlist) == _new) { 2112 | error(_("repeated formal argument '%s' on line %d"), EncodeChar(PRINTNAME(_new)), 2113 | lloc->first_line); 2114 | } 2115 | formlist = CDR(formlist); 2116 | } 2117 | } 2118 | 2119 | /* This is used as the buffer for NumericValue, SpecialValue and 2120 | SymbolValue. None of these could conceivably need 8192 bytes. 2121 | 2122 | It has not been used as the buffer for input character strings 2123 | since Oct 2007 (released as 2.7.0), and for comments since 2.8.0 2124 | */ 2125 | static char yytext[MAXELTSIZE]; 2126 | 2127 | static int SkipSpace(void) 2128 | { 2129 | int c; 2130 | static wctype_t blankwct = 0; 2131 | 2132 | if (!blankwct) 2133 | blankwct = Ri18n_wctype("blank"); 2134 | 2135 | #ifdef Win32 2136 | if(!mbcslocale) { /* 0xa0 is NBSP in all 8-bit Windows locales */ 2137 | while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f' || 2138 | (unsigned int) c == 0xa0) ; 2139 | return c; 2140 | } else { 2141 | int i, clen; 2142 | wchar_t wc; 2143 | while (1) { 2144 | c = xxgetc(); 2145 | if (c == ' ' || c == '\t' || c == '\f') continue; 2146 | if (c == '\n' || c == R_EOF) break; 2147 | if ((unsigned int) c < 0x80) break; 2148 | clen = mbcs_get_next(c, &wc); /* always 2 */ 2149 | if(! Ri18n_iswctype(wc, blankwct) ) break; 2150 | for(i = 1; i < clen; i++) c = xxgetc(); 2151 | } 2152 | return c; 2153 | } 2154 | #endif 2155 | #if defined(__STDC_ISO_10646__) 2156 | if(mbcslocale) { /* wctype functions need Unicode wchar_t */ 2157 | int i, clen; 2158 | wchar_t wc; 2159 | while (1) { 2160 | c = xxgetc(); 2161 | if (c == ' ' || c == '\t' || c == '\f') continue; 2162 | if (c == '\n' || c == R_EOF) break; 2163 | if ((unsigned int) c < 0x80) break; 2164 | clen = mbcs_get_next(c, &wc); 2165 | if(! Ri18n_iswctype(wc, blankwct) ) break; 2166 | for(i = 1; i < clen; i++) c = xxgetc(); 2167 | } 2168 | } else 2169 | #endif 2170 | while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f') ; 2171 | return c; 2172 | } 2173 | 2174 | /* Note that with interactive use, EOF cannot occur inside */ 2175 | /* a comment. However, semicolons inside comments make it */ 2176 | /* appear that this does happen. For this reason we use the */ 2177 | /* special assignment EndOfFile=2 to indicate that this is */ 2178 | /* going on. This is detected and dealt with in Parse1Buffer. */ 2179 | 2180 | static int SkipComment(void) 2181 | { 2182 | int c='#', i; 2183 | 2184 | /* locations before the # character was read */ 2185 | int _first_column = ParseState.xxcolno ; 2186 | int _first_parsed = ParseState.xxparseno ; 2187 | int type = COMMENT ; 2188 | 2189 | Rboolean maybeLine = (ParseState.xxcolno == 1); 2190 | Rboolean doSave; 2191 | 2192 | DECLARE_YYTEXT_BUFP(yyp); 2193 | 2194 | if (maybeLine) { 2195 | char lineDirective[] = "#line"; 2196 | YYTEXT_PUSH(c, yyp); 2197 | for (i=1; i<5; i++) { 2198 | c = xxgetc(); 2199 | if (c != (int)(lineDirective[i])) { 2200 | maybeLine = FALSE; 2201 | break; 2202 | } 2203 | YYTEXT_PUSH(c, yyp); 2204 | } 2205 | if (maybeLine) 2206 | c = processLineDirective(&type); 2207 | } 2208 | // we want to track down the character 2209 | // __before__ the new line character 2210 | int _last_column = ParseState.xxcolno ; 2211 | int _last_parsed = ParseState.xxparseno ; 2212 | 2213 | if (c == '\n') { 2214 | _last_column = prevcols[prevpos]; 2215 | _last_parsed = prevparse[prevpos]; 2216 | } 2217 | 2218 | doSave = !maybeLine; 2219 | 2220 | while (c != '\n' && c != R_EOF) { 2221 | // Comments can be any length; we only record the ones that fit in yytext. 2222 | if (doSave) { 2223 | YYTEXT_PUSH(c, yyp); 2224 | doSave = (yyp - yytext) < sizeof(yytext) - 2; 2225 | } 2226 | _last_column = ParseState.xxcolno ; 2227 | _last_parsed = ParseState.xxparseno ; 2228 | c = xxgetc(); 2229 | } 2230 | if (c == R_EOF) EndOfFile = 2; 2231 | incrementId( ) ; 2232 | YYTEXT_PUSH('\0', yyp); 2233 | record_( _first_parsed, _first_column, _last_parsed, _last_column, 2234 | type, identifier, doSave ? yytext : 0 ) ; 2235 | return c; 2236 | } 2237 | 2238 | static int NumericValue(int c) 2239 | { 2240 | int seendot = (c == '.'); 2241 | int seenexp = 0; 2242 | int last = c; 2243 | int nd = 0; 2244 | int asNumeric = 0; 2245 | int count = 1; /* The number of characters seen */ 2246 | 2247 | DECLARE_YYTEXT_BUFP(yyp); 2248 | YYTEXT_PUSH(c, yyp); 2249 | /* We don't care about other than ASCII digits */ 2250 | while (isdigit(c = xxgetc()) || c == '.' || c == 'e' || c == 'E' 2251 | || c == 'x' || c == 'X' || c == 'L') 2252 | { 2253 | count++; 2254 | if (c == 'L') /* must be at the end. Won't allow 1Le3 (at present). */ 2255 | { YYTEXT_PUSH(c, yyp); 2256 | break; 2257 | } 2258 | 2259 | if (c == 'x' || c == 'X') { 2260 | if (count > 2 || last != '0') break; /* 0x must be first */ 2261 | YYTEXT_PUSH(c, yyp); 2262 | while(isdigit(c = xxgetc()) || ('a' <= c && c <= 'f') || 2263 | ('A' <= c && c <= 'F') || c == '.') { 2264 | if (c == '.') { 2265 | if (seendot) return ERROR; 2266 | seendot = 1; 2267 | } 2268 | YYTEXT_PUSH(c, yyp); 2269 | nd++; 2270 | } 2271 | if (nd == 0) return ERROR; 2272 | if (c == 'p' || c == 'P') { 2273 | seenexp = 1; 2274 | YYTEXT_PUSH(c, yyp); 2275 | c = xxgetc(); 2276 | if (!isdigit(c) && c != '+' && c != '-') return ERROR; 2277 | if (c == '+' || c == '-') { 2278 | YYTEXT_PUSH(c, yyp); 2279 | c = xxgetc(); 2280 | } 2281 | for(nd = 0; isdigit(c); c = xxgetc(), nd++) 2282 | YYTEXT_PUSH(c, yyp); 2283 | if (nd == 0) return ERROR; 2284 | } 2285 | if (seendot && !seenexp) return ERROR; 2286 | break; 2287 | } 2288 | if (c == 'E' || c == 'e') { 2289 | if (seenexp) 2290 | break; 2291 | seenexp = 1; 2292 | seendot = seendot == 1 ? seendot : 2; 2293 | YYTEXT_PUSH(c, yyp); 2294 | c = xxgetc(); 2295 | if (!isdigit(c) && c != '+' && c != '-') return ERROR; 2296 | if (c == '+' || c == '-') { 2297 | YYTEXT_PUSH(c, yyp); 2298 | c = xxgetc(); 2299 | if (!isdigit(c)) return ERROR; 2300 | } 2301 | } 2302 | if (c == '.') { 2303 | if (seendot) 2304 | break; 2305 | seendot = 1; 2306 | } 2307 | YYTEXT_PUSH(c, yyp); 2308 | last = c; 2309 | } 2310 | 2311 | if(c == 'i') 2312 | YYTEXT_PUSH(c, yyp); /* for getParseData */ 2313 | 2314 | YYTEXT_PUSH('\0', yyp); 2315 | /* Make certain that things are okay. */ 2316 | if(c == 'L') { 2317 | double a = R_atof(yytext); 2318 | int b = (int) a; 2319 | /* We are asked to create an integer via the L, so we check that the 2320 | double and int values are the same. If not, this is a problem and we 2321 | will not lose information and so use the numeric value. 2322 | */ 2323 | if(a != (double) b) { 2324 | if(GenerateCode) { 2325 | if(seendot == 1 && seenexp == 0) 2326 | warning(_("integer literal %s contains decimal; using numeric value"), yytext); 2327 | else { 2328 | /* hide the L for the warning message */ 2329 | warning(_("non-integer value %s qualified with L; using numeric value"), yytext); 2330 | } 2331 | } 2332 | asNumeric = 1; 2333 | seenexp = 1; 2334 | } 2335 | } 2336 | 2337 | if(c == 'i') { 2338 | yylval = GenerateCode ? mkComplex(yytext) : R_NilValue; 2339 | } else if(c == 'L' && asNumeric == 0) { 2340 | if(GenerateCode && seendot == 1 && seenexp == 0) 2341 | warning(_("integer literal %s contains unnecessary decimal point"), yytext); 2342 | yylval = GenerateCode ? mkInt(yytext) : R_NilValue; 2343 | #if 0 /* do this to make 123 integer not double */ 2344 | } else if(!(seendot || seenexp)) { 2345 | if(c != 'L') xxungetc(c); 2346 | if (GenerateCode) { 2347 | double a = R_atof(yytext); 2348 | int b = (int) a; 2349 | yylval = (a != (double) b) ? mkFloat(yytext) : mkInt(yytext); 2350 | } else yylval = R_NilValue; 2351 | #endif 2352 | } else { 2353 | if(c != 'L') 2354 | xxungetc(c); 2355 | yylval = GenerateCode ? mkFloat(yytext) : R_NilValue; 2356 | } 2357 | 2358 | PRESERVE_SV(yylval); 2359 | return NUM_CONST; 2360 | } 2361 | 2362 | /* Strings may contain the standard ANSI escapes and octal */ 2363 | /* specifications of the form \o, \oo or \ooo, where 'o' */ 2364 | /* is an octal digit. */ 2365 | 2366 | /* The buffer is reallocated on the R heap if needed; not by malloc */ 2367 | /* to avoid memory leak in case of R error (long jump) */ 2368 | #define STEXT_PUSH(c) do { \ 2369 | size_t nc = bp - stext; \ 2370 | if (nc >= nstext - 1) { \ 2371 | char *old = stext; \ 2372 | SEXP st1; \ 2373 | nstext *= 2; \ 2374 | PROTECT(st1 = allocVector(RAWSXP, nstext)); \ 2375 | stext = (char *)RAW(st1); \ 2376 | memmove(stext, old, nc); \ 2377 | REPROTECT(st1, sti); \ 2378 | UNPROTECT(1); /* st1 */ \ 2379 | bp = stext+nc; } \ 2380 | *bp++ = ((char) c); \ 2381 | } while(0) 2382 | 2383 | 2384 | /* The idea here is that if a string contains \u escapes that are not 2385 | valid in the current locale, we should switch to UTF-8 for that 2386 | string. Needs Unicode wide-char support. 2387 | 2388 | Defining __STDC_ISO_10646__ is done by the OS (nor to) in wchar.t. 2389 | Some (e.g. Solaris, FreeBSD) have Unicode wchar_t but do not define it. 2390 | */ 2391 | 2392 | #if defined(Win32) || defined(__STDC_ISO_10646__) 2393 | typedef wchar_t ucs_t; 2394 | # define mbcs_get_next2 mbcs_get_next 2395 | #else 2396 | typedef unsigned int ucs_t; 2397 | # define WC_NOT_UNICODE 2398 | static int mbcs_get_next2(int c, ucs_t *wc) 2399 | { 2400 | int i, res, clen = 1; char s[9]; 2401 | 2402 | s[0] = c; 2403 | /* This assumes (probably OK) that all MBCS embed ASCII as single-byte 2404 | lead bytes, including control chars */ 2405 | if((unsigned int) c < 0x80) { 2406 | *wc = (wchar_t) c; 2407 | return 1; 2408 | } 2409 | if(utf8locale) { 2410 | clen = utf8clen(c); 2411 | for(i = 1; i < clen; i++) { 2412 | c = xxgetc(); 2413 | if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); 2414 | s[i] = (char) c; 2415 | } 2416 | s[clen] ='\0'; /* x86 Solaris requires this */ 2417 | res = mbtoucs(wc, s, clen); 2418 | if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); 2419 | } else { 2420 | /* This is not necessarily correct for stateful MBCS */ 2421 | while(clen <= MB_CUR_MAX) { 2422 | res = mbtoucs(wc, s, clen); 2423 | if(res >= 0) break; 2424 | if(res == -1) 2425 | error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno); 2426 | /* so res == -2 */ 2427 | c = xxgetc(); 2428 | if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno); 2429 | s[clen++] = c; 2430 | } /* we've tried enough, so must be complete or invalid by now */ 2431 | } 2432 | for(i = clen - 1; i > 0; i--) xxungetc(s[i]); 2433 | return clen; 2434 | } 2435 | #endif 2436 | 2437 | #define WTEXT_PUSH(c) do { if(wcnt < 10000) wcs[wcnt++] = c; } while(0) 2438 | 2439 | static SEXP mkStringUTF8(const ucs_t *wcs, int cnt) 2440 | { 2441 | SEXP t; 2442 | int nb; 2443 | 2444 | /* NB: cnt includes the terminator */ 2445 | #ifdef Win32 2446 | nb = cnt*4; /* UCS-2/UTF-16 so max 4 bytes per wchar_t */ 2447 | #else 2448 | nb = cnt*6; 2449 | #endif 2450 | R_CheckStack2(nb); 2451 | char s[nb]; 2452 | memset(s, 0, nb); /* safety */ 2453 | #ifdef WC_NOT_UNICODE 2454 | for(char *ss = s; *wcs; wcs++) ss += ucstoutf8(ss, *wcs); 2455 | #else 2456 | wcstoutf8(s, wcs, sizeof(s)); 2457 | #endif 2458 | PROTECT(t = allocVector(STRSXP, 1)); 2459 | SET_STRING_ELT(t, 0, mkCharCE(s, CE_UTF8)); 2460 | UNPROTECT(1); /* t */ 2461 | return t; 2462 | } 2463 | 2464 | #define CTEXT_PUSH(c) do { \ 2465 | if (ct - currtext >= 1000) { \ 2466 | memmove(currtext, currtext+100, 901); memmove(currtext, "... ", 4); ct -= 100; \ 2467 | currtext_truncated = TRUE; \ 2468 | } \ 2469 | *ct++ = ((char) c); \ 2470 | } while(0) 2471 | #define CTEXT_POP() ct-- 2472 | 2473 | 2474 | /* forSymbol is true when parsing backticked symbols */ 2475 | static int StringValue(int c, Rboolean forSymbol) 2476 | { 2477 | int quote = c; 2478 | char currtext[1010], *ct = currtext; 2479 | char st0[MAXELTSIZE]; 2480 | unsigned int nstext = MAXELTSIZE; 2481 | char *stext = st0, *bp = st0; 2482 | PROTECT_INDEX sti; 2483 | int wcnt = 0; 2484 | ucs_t wcs[10001]; 2485 | Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE; 2486 | 2487 | PROTECT_WITH_INDEX(R_NilValue, &sti); 2488 | CTEXT_PUSH(c); 2489 | while ((c = xxgetc()) != R_EOF && c != quote) { 2490 | CTEXT_PUSH(c); 2491 | if (c == '\n') { 2492 | xxungetc(c); CTEXT_POP(); 2493 | /* Fix suggested by Mark Bravington to allow multiline strings 2494 | * by pretending we've seen a backslash. Was: 2495 | * return ERROR; 2496 | */ 2497 | c = '\\'; 2498 | } 2499 | if (c == '\\') { 2500 | c = xxgetc(); CTEXT_PUSH(c); 2501 | if ('0' <= c && c <= '7') { 2502 | int octal = c - '0'; 2503 | if ('0' <= (c = xxgetc()) && c <= '7') { 2504 | CTEXT_PUSH(c); 2505 | octal = 8 * octal + c - '0'; 2506 | if ('0' <= (c = xxgetc()) && c <= '7') { 2507 | CTEXT_PUSH(c); 2508 | octal = 8 * octal + c - '0'; 2509 | } else { 2510 | xxungetc(c); 2511 | CTEXT_POP(); 2512 | } 2513 | } else { 2514 | xxungetc(c); 2515 | CTEXT_POP(); 2516 | } 2517 | if (!octal) 2518 | error(_("nul character not allowed (line %d)"), ParseState.xxlineno); 2519 | c = octal; 2520 | oct_or_hex = TRUE; 2521 | } 2522 | else if(c == 'x') { 2523 | int val = 0; int i, ext; 2524 | for(i = 0; i < 2; i++) { 2525 | c = xxgetc(); CTEXT_PUSH(c); 2526 | if(c >= '0' && c <= '9') ext = c - '0'; 2527 | else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10; 2528 | else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10; 2529 | else { 2530 | xxungetc(c); 2531 | CTEXT_POP(); 2532 | if (i == 0) { /* was just \x */ 2533 | *ct = '\0'; 2534 | errorcall(R_NilValue, _("'\\x' used without hex digits in character string starting \"%s\""), currtext); 2535 | } 2536 | break; 2537 | } 2538 | val = 16*val + ext; 2539 | } 2540 | if (!val) 2541 | error(_("nul character not allowed (line %d)"), ParseState.xxlineno); 2542 | c = val; 2543 | oct_or_hex = TRUE; 2544 | } 2545 | else if(c == 'u') { 2546 | unsigned int val = 0; int i, ext; 2547 | Rboolean delim = FALSE; 2548 | 2549 | if(forSymbol) 2550 | error(_("\\uxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno); 2551 | if((c = xxgetc()) == '{') { 2552 | delim = TRUE; 2553 | CTEXT_PUSH(c); 2554 | } else xxungetc(c); 2555 | for(i = 0; i < 4; i++) { 2556 | c = xxgetc(); CTEXT_PUSH(c); 2557 | if(c >= '0' && c <= '9') ext = c - '0'; 2558 | else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10; 2559 | else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10; 2560 | else { 2561 | xxungetc(c); 2562 | CTEXT_POP(); 2563 | if (i == 0) { /* was just \u */ 2564 | *ct = '\0'; 2565 | errorcall(R_NilValue, _("'\\u' used without hex digits in character string starting \"%s\""), currtext); 2566 | } 2567 | break; 2568 | } 2569 | val = 16*val + ext; 2570 | } 2571 | if(delim) { 2572 | if((c = xxgetc()) != '}') 2573 | error(_("invalid \\u{xxxx} sequence (line %d)"), 2574 | ParseState.xxlineno); 2575 | else CTEXT_PUSH(c); 2576 | } 2577 | if (!val) 2578 | error(_("nul character not allowed (line %d)"), ParseState.xxlineno); 2579 | WTEXT_PUSH(val); /* this assumes wchar_t is Unicode */ 2580 | use_wcs = TRUE; 2581 | continue; 2582 | } 2583 | else if(c == 'U') { 2584 | unsigned int val = 0; int i, ext; 2585 | Rboolean delim = FALSE; 2586 | if(forSymbol) 2587 | error(_("\\Uxxxxxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno); 2588 | if((c = xxgetc()) == '{') { 2589 | delim = TRUE; 2590 | CTEXT_PUSH(c); 2591 | } else xxungetc(c); 2592 | for(i = 0; i < 8; i++) { 2593 | c = xxgetc(); CTEXT_PUSH(c); 2594 | if(c >= '0' && c <= '9') ext = c - '0'; 2595 | else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10; 2596 | else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10; 2597 | else { 2598 | xxungetc(c); 2599 | CTEXT_POP(); 2600 | if (i == 0) { /* was just \U */ 2601 | *ct = '\0'; 2602 | errorcall(R_NilValue, _("'\\U' used without hex digits in character string starting \"%s\""), currtext); 2603 | } 2604 | break; 2605 | } 2606 | val = 16*val + ext; 2607 | } 2608 | if(delim) { 2609 | if((c = xxgetc()) != '}') 2610 | error(_("invalid \\U{xxxxxxxx} sequence (line %d)"), ParseState.xxlineno); 2611 | else CTEXT_PUSH(c); 2612 | } 2613 | if (!val) 2614 | error(_("nul character not allowed (line %d)"), ParseState.xxlineno); 2615 | #ifdef Win32 2616 | if (0x010000 <= val && val <= 0x10FFFF) { /* Need surrogate pair in Windows */ 2617 | val = val - 0x010000; 2618 | WTEXT_PUSH( 0xD800 | (val >> 10) ); 2619 | val = 0xDC00 | (val & 0x03FF); 2620 | } 2621 | #endif 2622 | WTEXT_PUSH(val); 2623 | use_wcs = TRUE; 2624 | continue; 2625 | } 2626 | else { 2627 | switch (c) { 2628 | case 'a': 2629 | c = '\a'; 2630 | break; 2631 | case 'b': 2632 | c = '\b'; 2633 | break; 2634 | case 'f': 2635 | c = '\f'; 2636 | break; 2637 | case 'n': 2638 | c = '\n'; 2639 | break; 2640 | case 'r': 2641 | c = '\r'; 2642 | break; 2643 | case 't': 2644 | c = '\t'; 2645 | break; 2646 | case 'v': 2647 | c = '\v'; 2648 | break; 2649 | case '\\': 2650 | c = '\\'; 2651 | break; 2652 | case '"': 2653 | case '\'': 2654 | case '`': 2655 | case ' ': 2656 | case '\n': 2657 | break; 2658 | default: 2659 | *ct = '\0'; 2660 | errorcall(R_NilValue, _("'\\%c' is an unrecognized escape in character string starting \"%s\""), c, currtext); 2661 | } 2662 | } 2663 | } else if(mbcslocale) { 2664 | int i, clen; 2665 | ucs_t wc; 2666 | clen = mbcs_get_next2(c, &wc); 2667 | WTEXT_PUSH(wc); 2668 | ParseState.xxbyteno += clen-1; 2669 | 2670 | for(i = 0; i < clen - 1; i++){ 2671 | STEXT_PUSH(c); 2672 | c = xxgetc(); 2673 | if (c == R_EOF) break; 2674 | CTEXT_PUSH(c); 2675 | if (c == '\n') { 2676 | xxungetc(c); CTEXT_POP(); 2677 | c = '\\'; 2678 | } 2679 | } 2680 | if (c == R_EOF) break; 2681 | STEXT_PUSH(c); 2682 | continue; 2683 | } 2684 | STEXT_PUSH(c); 2685 | if ((unsigned int) c < 0x80) WTEXT_PUSH(c); 2686 | else { /* have an 8-bit char in the current encoding */ 2687 | #ifdef WC_NOT_UNICODE 2688 | ucs_t wc; 2689 | char s[2] = " "; 2690 | s[0] = (char) c; 2691 | mbtoucs(&wc, s, 2); 2692 | #else 2693 | wchar_t wc; 2694 | char s[2] = " "; 2695 | s[0] = (char) c; 2696 | mbrtowc(&wc, s, 2, NULL); 2697 | #endif 2698 | WTEXT_PUSH(wc); 2699 | } 2700 | } 2701 | STEXT_PUSH('\0'); 2702 | WTEXT_PUSH(0); 2703 | yytext[0] = '\0'; 2704 | if (c == R_EOF) { 2705 | PRESERVE_SV(yylval = R_NilValue); 2706 | UNPROTECT(1); /* release stext */ 2707 | return INCOMPLETE_STRING; 2708 | } else { 2709 | CTEXT_PUSH(c); 2710 | CTEXT_PUSH('\0'); 2711 | } 2712 | if (!currtext_truncated) 2713 | strcpy(yytext, currtext); 2714 | else if (forSymbol || !use_wcs) { 2715 | size_t total = strlen(stext); 2716 | snprintf(yytext, MAXELTSIZE, "[%u chars quoted with '%c']", (unsigned int)total, quote); 2717 | } else 2718 | snprintf(yytext, MAXELTSIZE, "[%d wide chars quoted with '%c']", wcnt, quote); 2719 | if(forSymbol) { 2720 | PRESERVE_SV(yylval = install(stext)); 2721 | UNPROTECT(1); /* release stext */ 2722 | return SYMBOL; 2723 | } else { 2724 | if(use_wcs) { 2725 | if(oct_or_hex) 2726 | error(_("mixing Unicode and octal/hex escapes in a string is not allowed")); 2727 | if(wcnt < 10000) 2728 | PRESERVE_SV(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */ 2729 | else 2730 | error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno); 2731 | } else 2732 | PRESERVE_SV(yylval = mkString2(stext, bp - stext - 1, oct_or_hex)); 2733 | UNPROTECT(1); /* release stext */ 2734 | return STR_CONST; 2735 | } 2736 | } 2737 | 2738 | static int SpecialValue(int c) 2739 | { 2740 | DECLARE_YYTEXT_BUFP(yyp); 2741 | YYTEXT_PUSH(c, yyp); 2742 | while ((c = xxgetc()) != R_EOF && c != '%') { 2743 | if (c == '\n') { 2744 | xxungetc(c); 2745 | return ERROR; 2746 | } 2747 | YYTEXT_PUSH(c, yyp); 2748 | } 2749 | if (c == '%') 2750 | YYTEXT_PUSH(c, yyp); 2751 | YYTEXT_PUSH('\0', yyp); 2752 | yylval = install(yytext); 2753 | return SPECIAL; 2754 | } 2755 | 2756 | /* return 1 if name is a valid name 0 otherwise */ 2757 | attribute_hidden 2758 | int isValidName(const char *name) 2759 | { 2760 | const char *p = name; 2761 | int i; 2762 | 2763 | if(mbcslocale) { 2764 | /* the only way to establish which chars are alpha etc is to 2765 | use the wchar variants */ 2766 | size_t n = strlen(name), used; 2767 | wchar_t wc; 2768 | used = Mbrtowc(&wc, p, n, NULL); p += used; n -= used; 2769 | if(used == 0) return 0; 2770 | if (wc != L'.' && !iswalpha(wc) ) return 0; 2771 | if (wc == L'.') { 2772 | /* We don't care about other than ASCII digits */ 2773 | if(isdigit(0xff & (int)*p)) return 0; 2774 | /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */ 2775 | } 2776 | while((used = Mbrtowc(&wc, p, n, NULL))) { 2777 | if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break; 2778 | p += used; n -= used; 2779 | } 2780 | if (*p != '\0') return 0; 2781 | } else { 2782 | int c = 0xff & *p++; 2783 | if (c != '.' && !isalpha(c) ) return 0; 2784 | if (c == '.' && isdigit(0xff & (int)*p)) return 0; 2785 | while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ; 2786 | if (c != '\0') return 0; 2787 | } 2788 | 2789 | if (strcmp(name, "...") == 0) return 1; 2790 | 2791 | for (i = 0; keywords[i].name != NULL; i++) 2792 | if (strcmp(keywords[i].name, name) == 0) return 0; 2793 | 2794 | return 1; 2795 | } 2796 | 2797 | 2798 | static int SymbolValue(int c) 2799 | { 2800 | int kw; 2801 | DECLARE_YYTEXT_BUFP(yyp); 2802 | if(mbcslocale) { 2803 | wchar_t wc; int i, clen; 2804 | clen = mbcs_get_next(c, &wc); 2805 | while(1) { 2806 | /* at this point we have seen one char, so push its bytes 2807 | and get one more */ 2808 | for(i = 0; i < clen; i++) { 2809 | YYTEXT_PUSH(c, yyp); 2810 | c = xxgetc(); 2811 | } 2812 | if(c == R_EOF) break; 2813 | if(c == '.' || c == '_') { 2814 | clen = 1; 2815 | continue; 2816 | } 2817 | clen = mbcs_get_next(c, &wc); 2818 | if(!iswalnum(wc)) break; 2819 | } 2820 | } else 2821 | do { 2822 | YYTEXT_PUSH(c, yyp); 2823 | } while ((c = xxgetc()) != R_EOF && 2824 | (isalnum(c) || c == '.' || c == '_')); 2825 | xxungetc(c); 2826 | YYTEXT_PUSH('\0', yyp); 2827 | if ((kw = KeywordLookup(yytext))) 2828 | return kw; 2829 | 2830 | PRESERVE_SV(yylval = install(yytext)); 2831 | return SYMBOL; 2832 | } 2833 | 2834 | static void setParseFilename(SEXP newname) { 2835 | SEXP class; 2836 | 2837 | if (isEnvironment(PS_SRCFILE)) { 2838 | SEXP oldname = findVar(install("filename"), PS_SRCFILE); 2839 | if (isString(oldname) && length(oldname) > 0 && 2840 | strcmp(CHAR(STRING_ELT(oldname, 0)), 2841 | CHAR(STRING_ELT(newname, 0))) == 0) return; 2842 | PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv)); 2843 | defineVar(install("filename"), newname, PS_SRCFILE); 2844 | defineVar(install("original"), PS_ORIGINAL, PS_SRCFILE); 2845 | 2846 | PROTECT(class = allocVector(STRSXP, 2)); 2847 | SET_STRING_ELT(class, 0, mkChar("srcfilealias")); 2848 | SET_STRING_ELT(class, 1, mkChar("srcfile")); 2849 | setAttrib(PS_SRCFILE, R_ClassSymbol, class); 2850 | UNPROTECT(1); /* class */ 2851 | } else 2852 | PS_SET_SRCFILE(duplicate(newname)); 2853 | RELEASE_SV(newname); 2854 | } 2855 | 2856 | static int processLineDirective(int *type) 2857 | { 2858 | int c, tok, linenumber; 2859 | c = SkipSpace(); 2860 | if (!isdigit(c)) return(c); 2861 | tok = NumericValue(c); 2862 | linenumber = atoi(yytext); 2863 | c = SkipSpace(); 2864 | if (c == '"') 2865 | tok = StringValue(c, FALSE); 2866 | else 2867 | xxungetc(c); 2868 | if (tok == STR_CONST) 2869 | setParseFilename(yylval); 2870 | while ((c = xxgetc()) != '\n' && c != R_EOF) /* skip */ ; 2871 | ParseState.xxlineno = linenumber; 2872 | *type = LINE_DIRECTIVE; 2873 | /* we don't change xxparseno here: it counts parsed lines, not official lines */ 2874 | R_ParseContext[R_ParseContextLast] = '\0'; /* Context report shouldn't show the directive */ 2875 | return(c); 2876 | } 2877 | 2878 | /* Get the R symbol, and set yytext at the same time */ 2879 | static SEXP install_and_save(char * text) 2880 | { 2881 | strcpy(yytext, text); 2882 | return install(text); 2883 | } 2884 | 2885 | /* Get an R symbol, and set different yytext. Used for translation of -> to <-. ->> to <<- */ 2886 | static SEXP install_and_save2(char * text, char * savetext) 2887 | { 2888 | strcpy(yytext, savetext); 2889 | return install(text); 2890 | } 2891 | 2892 | 2893 | /* Split the input stream into tokens. */ 2894 | /* This is the lowest of the parsing levels. */ 2895 | 2896 | static int token(void) 2897 | { 2898 | int c; 2899 | wchar_t wc; 2900 | 2901 | if (SavedToken) { 2902 | c = SavedToken; 2903 | yylval = SavedLval; 2904 | SavedLval = R_NilValue; 2905 | SavedToken = 0; 2906 | yylloc.first_line = xxlinesave; 2907 | yylloc.first_column = xxcolsave; 2908 | yylloc.first_byte = xxbytesave; 2909 | yylloc.first_parsed = xxparsesave; 2910 | return c; 2911 | } 2912 | xxcharsave = xxcharcount; /* want to be able to go back one token */ 2913 | 2914 | c = SkipSpace(); 2915 | if (c == '#') c = SkipComment(); 2916 | 2917 | yylloc.first_line = ParseState.xxlineno; 2918 | yylloc.first_column = ParseState.xxcolno; 2919 | yylloc.first_byte = ParseState.xxbyteno; 2920 | yylloc.first_parsed = ParseState.xxparseno; 2921 | 2922 | if (c == R_EOF) return END_OF_INPUT; 2923 | 2924 | /* Either digits or symbols can start with a "." */ 2925 | /* so we need to decide which it is and jump to */ 2926 | /* the correct spot. */ 2927 | 2928 | if (c == '.' && typeofnext() >= 2) goto symbol; 2929 | 2930 | /* literal numbers */ 2931 | 2932 | if (c == '.') return NumericValue(c); 2933 | /* We don't care about other than ASCII digits */ 2934 | if (isdigit(c)) return NumericValue(c); 2935 | 2936 | /* literal strings */ 2937 | 2938 | if (c == '\"' || c == '\'') 2939 | return StringValue(c, FALSE); 2940 | 2941 | /* special functions */ 2942 | 2943 | if (c == '%') 2944 | return SpecialValue(c); 2945 | 2946 | /* functions, constants and variables */ 2947 | 2948 | if (c == '`') 2949 | return StringValue(c, TRUE); 2950 | symbol: 2951 | 2952 | if (c == '.') return SymbolValue(c); 2953 | if(mbcslocale) { 2954 | mbcs_get_next(c, &wc); 2955 | if (iswalpha(wc)) return SymbolValue(c); 2956 | } else 2957 | if (isalpha(c)) return SymbolValue(c); 2958 | 2959 | /* compound tokens */ 2960 | 2961 | switch (c) { 2962 | case '<': 2963 | if (nextchar('=')) { 2964 | yylval = install_and_save("<="); 2965 | return LE; 2966 | } 2967 | if (nextchar('-')) { 2968 | yylval = install_and_save("<-"); 2969 | return LEFT_ASSIGN; 2970 | } 2971 | if (nextchar('<')) { 2972 | if (nextchar('-')) { 2973 | yylval = install_and_save("<<-"); 2974 | return LEFT_ASSIGN; 2975 | } 2976 | else 2977 | return ERROR; 2978 | } 2979 | yylval = install_and_save("<"); 2980 | return LT; 2981 | case '-': 2982 | if (nextchar('>')) { 2983 | if (nextchar('>')) { 2984 | yylval = install_and_save2("<<-", "->>"); 2985 | return RIGHT_ASSIGN; 2986 | } 2987 | else { 2988 | yylval = install_and_save2("<-", "->"); 2989 | return RIGHT_ASSIGN; 2990 | } 2991 | } 2992 | yylval = install_and_save("-"); 2993 | return '-'; 2994 | case '>': 2995 | if (nextchar('=')) { 2996 | yylval = install_and_save(">="); 2997 | return GE; 2998 | } 2999 | yylval = install_and_save(">"); 3000 | return GT; 3001 | case '!': 3002 | if (nextchar('=')) { 3003 | yylval = install_and_save("!="); 3004 | return NE; 3005 | } 3006 | yylval = install_and_save("!"); 3007 | return '!'; 3008 | case '=': 3009 | if (nextchar('=')) { 3010 | yylval = install_and_save("=="); 3011 | return EQ; 3012 | } 3013 | if (nextchar('>')) { 3014 | yylval = install_and_save("=>"); 3015 | return ANON; 3016 | } 3017 | yylval = install_and_save("="); 3018 | return EQ_ASSIGN; 3019 | case ':': 3020 | if (nextchar(':')) { 3021 | if (nextchar(':')) { 3022 | yylval = install_and_save(":::"); 3023 | return NS_GET_INT; 3024 | } 3025 | else { 3026 | yylval = install_and_save("::"); 3027 | return NS_GET; 3028 | } 3029 | } 3030 | if (nextchar('=')) { 3031 | yylval = install_and_save(":="); 3032 | return LEFT_ASSIGN; 3033 | } 3034 | yylval = install_and_save(":"); 3035 | return ':'; 3036 | case '&': 3037 | if (nextchar('&')) { 3038 | yylval = install_and_save("&&"); 3039 | return AND2; 3040 | } 3041 | yylval = install_and_save("&"); 3042 | return AND; 3043 | case '|': 3044 | if (nextchar('|')) { 3045 | yylval = install_and_save("||"); 3046 | return OR2; 3047 | } 3048 | yylval = install_and_save("|"); 3049 | return OR; 3050 | case LBRACE: 3051 | yylval = install_and_save("{"); 3052 | return c; 3053 | case RBRACE: 3054 | strcpy(yytext, "}"); 3055 | return c; 3056 | case '(': 3057 | yylval = install_and_save("("); 3058 | return c; 3059 | case ')': 3060 | strcpy(yytext, ")"); 3061 | return c; 3062 | case '[': 3063 | if (nextchar('[')) { 3064 | yylval = install_and_save("[["); 3065 | return LBB; 3066 | } 3067 | yylval = install_and_save("["); 3068 | return c; 3069 | case ']': 3070 | strcpy(yytext, "]"); 3071 | return c; 3072 | case '?': 3073 | yylval = install_and_save("?"); 3074 | return c; 3075 | case '*': 3076 | /* Replace ** by ^. This has been here since 1998, but is 3077 | undocumented (at least in the obvious places). It is in 3078 | the index of the Blue Book with a reference to p. 431, the 3079 | help for 'Deprecated'. S-PLUS 6.2 still allowed this, so 3080 | presumably it was for compatibility with S. */ 3081 | if (nextchar('*')) { 3082 | yylval = install_and_save2("^", "**"); 3083 | return '^'; 3084 | } else 3085 | yylval = install_and_save("*"); 3086 | return c; 3087 | case '+': 3088 | case '/': 3089 | case '^': 3090 | case '~': 3091 | case '$': 3092 | case '@': 3093 | yytext[0] = (char) c; 3094 | yytext[1] = '\0'; 3095 | yylval = install(yytext); 3096 | return c; 3097 | default: 3098 | yytext[0] = (char) c; 3099 | yytext[1] = '\0'; 3100 | return c; 3101 | } 3102 | } 3103 | 3104 | /** 3105 | * Sets the first elements of the yyloc structure with current 3106 | * information 3107 | */ 3108 | static void setfirstloc(void) 3109 | { 3110 | yylloc.first_line = ParseState.xxlineno; 3111 | yylloc.first_column = ParseState.xxcolno; 3112 | yylloc.first_byte = ParseState.xxbyteno; 3113 | yylloc.first_parsed = ParseState.xxparseno; 3114 | } 3115 | 3116 | static void setlastloc(void) 3117 | { 3118 | yylloc.last_line = ParseState.xxlineno; 3119 | yylloc.last_column = ParseState.xxcolno; 3120 | yylloc.last_byte = ParseState.xxbyteno; 3121 | yylloc.last_parsed = ParseState.xxparseno; 3122 | } 3123 | 3124 | /** 3125 | * Wrap around the token function. Returns the same result 3126 | * but increments the identifier, after a call to token_, 3127 | * the identifier variable contains the id of the token 3128 | * just returned 3129 | * 3130 | * @return the same as token 3131 | */ 3132 | 3133 | static int token_(void){ 3134 | // capture the position before retrieving the token 3135 | setfirstloc( ) ; 3136 | 3137 | // get the token 3138 | int res = token( ) ; 3139 | 3140 | // capture the position after 3141 | int _last_col = ParseState.xxcolno ; 3142 | int _last_parsed = ParseState.xxparseno ; 3143 | 3144 | _current_token = res ; 3145 | incrementId( ) ; 3146 | yylloc.id = identifier ; 3147 | 3148 | // record the position 3149 | if( res != '\n' && res != END_OF_INPUT) 3150 | record_( yylloc.first_parsed, yylloc.first_column, 3151 | _last_parsed, _last_col, 3152 | res, identifier, yytext ); 3153 | 3154 | return res; 3155 | } 3156 | 3157 | 3158 | static int yylex(void) 3159 | { 3160 | int tok; 3161 | 3162 | again: 3163 | 3164 | tok = token_(); 3165 | 3166 | /* Newlines must be handled in a context */ 3167 | /* sensitive way. The following block of */ 3168 | /* deals directly with newlines in the */ 3169 | /* body of "if" statements. */ 3170 | 3171 | if (tok == '\n') { 3172 | 3173 | if (EatLines || *contextp == '[' || *contextp == '(') 3174 | goto again; 3175 | 3176 | /* The essence of this is that in the body of */ 3177 | /* an "if", any newline must be checked to */ 3178 | /* see if it is followed by an "else". */ 3179 | /* such newlines are discarded. */ 3180 | 3181 | if (*contextp == 'i') { 3182 | 3183 | /* Find the next non-newline token */ 3184 | 3185 | while(tok == '\n') 3186 | tok = token_(); 3187 | 3188 | /* If we encounter "}", ")" or "]" then */ 3189 | /* we know that all immediately preceding */ 3190 | /* "if" bodies have been terminated. */ 3191 | /* The corresponding "i" values are */ 3192 | /* popped off the context stack. */ 3193 | 3194 | if (tok == RBRACE || tok == ')' || tok == ']' ) { 3195 | while (*contextp == 'i') 3196 | ifpop(); 3197 | *contextp-- = 0; 3198 | setlastloc(); 3199 | return tok; 3200 | } 3201 | 3202 | /* When a "," is encountered, it terminates */ 3203 | /* just the immediately preceding "if" body */ 3204 | /* so we pop just a single "i" of the */ 3205 | /* context stack. */ 3206 | 3207 | if (tok == ',') { 3208 | ifpop(); 3209 | setlastloc(); 3210 | return tok; 3211 | } 3212 | 3213 | /* Tricky! If we find an "else" we must */ 3214 | /* ignore the preceding newline. Any other */ 3215 | /* token means that we must return the newline */ 3216 | /* to terminate the "if" and "push back" that */ 3217 | /* token so that we will obtain it on the next */ 3218 | /* call to token. In either case sensitivity */ 3219 | /* is lost, so we pop the "i" from the context */ 3220 | /* stack. */ 3221 | 3222 | if(tok == ELSE) { 3223 | EatLines = 1; 3224 | ifpop(); 3225 | setlastloc(); 3226 | return ELSE; 3227 | } 3228 | else { 3229 | ifpop(); 3230 | SavedToken = tok; 3231 | xxlinesave = yylloc.first_line; 3232 | xxcolsave = yylloc.first_column; 3233 | xxbytesave = yylloc.first_byte; 3234 | xxparsesave = yylloc.first_parsed; 3235 | SavedLval = yylval; 3236 | setlastloc(); 3237 | if (ParseState.keepSrcRefs && ParseState.keepParseData && 3238 | yytext[0]) 3239 | 3240 | /* unrecord the pushed back token if not null */ 3241 | ParseState.data_count--; 3242 | return '\n'; 3243 | } 3244 | } 3245 | else { 3246 | setlastloc(); 3247 | return '\n'; 3248 | } 3249 | } 3250 | 3251 | /* Additional context sensitivities */ 3252 | 3253 | switch(tok) { 3254 | 3255 | /* Any newlines immediately following the */ 3256 | /* the following tokens are discarded. The */ 3257 | /* expressions are clearly incomplete. */ 3258 | 3259 | case '+': 3260 | case '-': 3261 | case '*': 3262 | case '/': 3263 | case '^': 3264 | case LT: 3265 | case LE: 3266 | case GE: 3267 | case GT: 3268 | case EQ: 3269 | case NE: 3270 | case OR: 3271 | case AND: 3272 | case OR2: 3273 | case AND2: 3274 | case SPECIAL: 3275 | case FUNCTION: 3276 | case WHILE: 3277 | case REPEAT: 3278 | case FOR: 3279 | case IN: 3280 | case '?': 3281 | case '!': 3282 | case '=': 3283 | case ':': 3284 | case '~': 3285 | case '$': 3286 | case '@': 3287 | case LEFT_ASSIGN: 3288 | case RIGHT_ASSIGN: 3289 | case ANON: 3290 | case EQ_ASSIGN: 3291 | EatLines = 1; 3292 | break; 3293 | 3294 | /* Push any "if" statements found and */ 3295 | /* discard any immediately following newlines. */ 3296 | 3297 | case IF: 3298 | IfPush(); 3299 | EatLines = 1; 3300 | break; 3301 | 3302 | /* Terminate any immediately preceding "if" */ 3303 | /* statements and discard any immediately */ 3304 | /* following newlines. */ 3305 | 3306 | case ELSE: 3307 | ifpop(); 3308 | EatLines = 1; 3309 | break; 3310 | 3311 | /* These tokens terminate any immediately */ 3312 | /* preceding "if" statements. */ 3313 | 3314 | case ';': 3315 | case ',': 3316 | ifpop(); 3317 | break; 3318 | 3319 | /* Any newlines following these tokens can */ 3320 | /* indicate the end of an expression. */ 3321 | 3322 | case SYMBOL: 3323 | case STR_CONST: 3324 | case NUM_CONST: 3325 | case NULL_CONST: 3326 | case NEXT: 3327 | case BREAK: 3328 | EatLines = 0; 3329 | break; 3330 | 3331 | /* Handle brackets, braces and parentheses */ 3332 | 3333 | case LBB: 3334 | if(contextp - contextstack >= CONTEXTSTACK_SIZE - 1) 3335 | error(_("contextstack overflow at line %d"), ParseState.xxlineno); 3336 | *++contextp = '['; 3337 | *++contextp = '['; 3338 | break; 3339 | 3340 | case '[': 3341 | if(contextp - contextstack >= CONTEXTSTACK_SIZE) 3342 | error(_("contextstack overflow at line %d"), ParseState.xxlineno); 3343 | *++contextp = (char) tok; 3344 | break; 3345 | 3346 | case LBRACE: 3347 | if(contextp - contextstack >= CONTEXTSTACK_SIZE) 3348 | error(_("contextstack overflow at line %d"), ParseState.xxlineno); 3349 | *++contextp = (char) tok; 3350 | EatLines = 1; 3351 | break; 3352 | 3353 | case '(': 3354 | if(contextp - contextstack >= CONTEXTSTACK_SIZE) 3355 | error(_("contextstack overflow at line %d"), ParseState.xxlineno); 3356 | *++contextp = (char) tok; 3357 | break; 3358 | 3359 | case ']': 3360 | while (*contextp == 'i') 3361 | ifpop(); 3362 | *contextp-- = 0; 3363 | EatLines = 0; 3364 | break; 3365 | 3366 | case RBRACE: 3367 | while (*contextp == 'i') 3368 | ifpop(); 3369 | *contextp-- = 0; 3370 | break; 3371 | 3372 | case ')': 3373 | while (*contextp == 'i') 3374 | ifpop(); 3375 | *contextp-- = 0; 3376 | EatLines = 0; 3377 | break; 3378 | 3379 | } 3380 | setlastloc(); 3381 | return tok; 3382 | } 3383 | /** 3384 | * Records location information about a symbol. The information is 3385 | * used to fill the data 3386 | * 3387 | */ 3388 | static void record_( int first_parsed, int first_column, int last_parsed, int last_column, 3389 | int token, int id, char* text_in ){ 3390 | 3391 | if (!ParseState.keepSrcRefs || !ParseState.keepParseData 3392 | || id == NA_INTEGER) return; 3393 | 3394 | // don't care about zero sized things 3395 | if( !yytext[0] ) return ; 3396 | 3397 | if (ParseState.data_count == DATA_COUNT) 3398 | growData(); 3399 | 3400 | _FIRST_COLUMN( ParseState.data_count ) = first_column; 3401 | _FIRST_PARSED( ParseState.data_count ) = first_parsed; 3402 | _LAST_COLUMN( ParseState.data_count ) = last_column; 3403 | _LAST_PARSED( ParseState.data_count ) = last_parsed; 3404 | _TOKEN( ParseState.data_count ) = token; 3405 | _ID( ParseState.data_count ) = id ; 3406 | _PARENT(ParseState.data_count) = 0 ; 3407 | if ( text_in ) 3408 | SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar(text_in)); 3409 | else 3410 | SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar("")); 3411 | 3412 | if( id > ID_COUNT ) 3413 | growID(id) ; 3414 | 3415 | ID_ID( id ) = ParseState.data_count ; 3416 | 3417 | ParseState.data_count++ ; 3418 | } 3419 | 3420 | /** 3421 | * records parent as the parent of all its childs. This grows the 3422 | * parents list with a new vector. The first element of the new 3423 | * vector is the parent id, and other elements are childs id 3424 | * 3425 | * @param parent id of the parent expression 3426 | * @param childs array of location information for all child symbols 3427 | * @param nchilds number of childs 3428 | */ 3429 | static void recordParents( int parent, yyltype * childs, int nchilds){ 3430 | 3431 | if( parent > ID_COUNT ){ 3432 | growID(parent) ; 3433 | } 3434 | 3435 | /* some of the childs might be an empty token (like cr) 3436 | which we do not want to track */ 3437 | int ii; /* loop index */ 3438 | yyltype loc ; 3439 | for( ii=0; ii loc.last_byte) ) 3442 | continue ; 3443 | /* This shouldn't happen... */ 3444 | if (loc.id < 0 || loc.id > identifier) { 3445 | error(_("internal parser error at line %d"), ParseState.xxlineno); 3446 | } 3447 | ID_PARENT( loc.id ) = parent; 3448 | } 3449 | 3450 | } 3451 | 3452 | /** 3453 | * The token pointed by the location has the wrong token type, 3454 | * This updates the type 3455 | * 3456 | * @param loc location information for the token to track 3457 | */ 3458 | static void modif_token( yyltype* loc, int tok ){ 3459 | 3460 | int id = loc->id ; 3461 | 3462 | if (!ParseState.keepSrcRefs || !ParseState.keepParseData 3463 | || id < 0 || id > ID_COUNT) return; 3464 | 3465 | if( tok == SYMBOL_FUNCTION_CALL ){ 3466 | // looking for first child of id 3467 | int j = ID_ID( id ) ; 3468 | int parent = id ; 3469 | 3470 | if (j < 0 || j > ID_COUNT) 3471 | return; 3472 | 3473 | while( ID_PARENT( _ID(j) ) != parent ){ 3474 | j-- ; 3475 | if (j < 0) 3476 | return; 3477 | } 3478 | 3479 | if( _TOKEN(j) == SYMBOL ){ 3480 | _TOKEN(j) = SYMBOL_FUNCTION_CALL ; 3481 | } 3482 | 3483 | } else{ 3484 | _TOKEN( ID_ID(id) ) = tok ; 3485 | } 3486 | 3487 | } 3488 | 3489 | /* this local version of lengthgets() always copies and doesn't fill with NA */ 3490 | static SEXP lengthgets2(SEXP x, int len) { 3491 | SEXP result; 3492 | PROTECT(result = allocVector( TYPEOF(x), len )); 3493 | 3494 | len = (len < length(x)) ? len : length(x); 3495 | switch(TYPEOF(x)) { 3496 | case INTSXP: 3497 | for (int i = 0; i < len; i++) 3498 | INTEGER(result)[i] = INTEGER(x)[i]; 3499 | for (int i = len; i < length(result); i++) 3500 | INTEGER(result)[i] = 0; 3501 | break; 3502 | case STRSXP: 3503 | for (int i = 0; i < len; i++) 3504 | SET_STRING_ELT(result, i, STRING_ELT(x, i)); 3505 | break; 3506 | default: 3507 | UNIMPLEMENTED_TYPE("lengthgets2", x); 3508 | } 3509 | UNPROTECT(1); /* result */ 3510 | return result; 3511 | } 3512 | 3513 | static void finalizeData( ){ 3514 | 3515 | int nloc = ParseState.data_count ; 3516 | 3517 | int i, j, id ; 3518 | int parent ; 3519 | 3520 | /* store parents in the data */ 3521 | for( i=0; i= 0; i--) { 3586 | if (_TOKEN(i) == COMMENT) { 3587 | int orphan = 1; 3588 | int istartl = _FIRST_PARSED(i); 3589 | int istartc = _FIRST_COLUMN(i); 3590 | 3591 | /* look for first node j that does not end before the comment i */ 3592 | for(j = i + 1; j < nloc && _LAST_PARSED(j) <= istartl; j++); 3593 | 3594 | if (j < nloc) { 3595 | for(;;) { 3596 | int jstartl = _FIRST_PARSED(j); 3597 | int jstartc = _FIRST_COLUMN(j); 3598 | 3599 | if (jstartl < istartl || (jstartl == istartl 3600 | && jstartc <= istartc)) { 3601 | /* j starts before or at the comment */ 3602 | _PARENT(i) = _ID(j); 3603 | orphan = 0; 3604 | break; 3605 | } 3606 | /* find parent of j */ 3607 | int jparent = _PARENT(j); 3608 | if (jparent == 0) 3609 | break; /* orphan */ 3610 | j = ID_ID(jparent); 3611 | } 3612 | } 3613 | if (orphan) 3614 | _PARENT(i) = 0; 3615 | } 3616 | } 3617 | #else 3618 | /* the original algorithm, which is slow for large inputs */ 3619 | 3620 | int comment_line, comment_first_col; 3621 | int this_first_parsed, this_last_parsed, this_first_col ; 3622 | int orphan ; 3623 | 3624 | for( i=0; i new_count) 3743 | new_count = 2*new_count + 1; 3744 | 3745 | if (new_count <= ID_COUNT) 3746 | return; 3747 | 3748 | int new_size = (1 + new_count)*2; 3749 | PS_SET_IDS(lengthgets2(PS_IDS, new_size)); 3750 | } 3751 | --------------------------------------------------------------------------------