├── README.md
└── src
    └── main
        └── gram.y


/README.md:
--------------------------------------------------------------------------------
 1 | # R with (x) => x + 1
 2 | 
 3 | Modified R grammar file that adds anonymous functions like this:
 4 | 
 5 | ```r
 6 | f <- (x) => x + 1
 7 | f(2)
 8 | # 3
 9 | ```
10 | 
11 | ## Install
12 | 
13 | 1. Define the R version:
14 | 
15 |     ```
16 |     export R_VERSION=3.6.3
17 |     ```
18 | 
19 |     This is the R version I based the modifications on, so it might not work with other versions.
20 | 2. Follow the 'Download and extract R' instructions from https://docs.rstudio.com/resources/install-r-source/
21 | 3. Replace the downloaded version of `src/main/gram.y` with `src/main/gram.y` from this repo
22 | 4. Generate a new parser file:
23 | 
24 |     ```
25 |     bison -d src/main/gram.y -o src/main/gram.c
26 |     ```
27 | 
28 |     You should see this output:
29 | 
30 |     ```
31 |     src/main/gram.y: warning: 84 shift/reduce conflicts [-Wconflicts-sr]
32 |     ```
33 | 5. Follow the 'Build and install R' instructions from https://docs.rstudio.com/resources/install-r-source/
34 | 
35 |     When I did this, I added a couple of lines to the `./configure` command:
36 | 
37 |     ```
38 |     ./configure \
39 |         --prefix=/opt/R/${R_VERSION} \
40 |         --enable-memory-profiling \
41 |         --enable-R-shlib \
42 |         --with-blas \
43 |         --with-lapack \
44 |         --with-pcre1 \
45 |         --without-recommended-packages
46 |     ```
47 | 
48 | ## Usage
49 | 
50 | Run:
51 | 
52 | ```
53 | /opt/R/${R_VERSION}/bin/R
54 | ```
55 | 
56 | Play:
57 | 
58 | ```r
59 | f <- (x) => x + 1
60 | g <- (x, y) => x + y
61 | h <- (x = 1) => x + 1
62 | i <- () => 1
63 | f(2)
64 | # 3
65 | g(2, 3)
66 | # 5
67 | h()
68 | # 2
69 | i()
70 | # 1
71 | ```
72 | 
73 | Check that things haven't broken too much:
74 | 
75 | ```r
76 | # Normal functions
77 | j <- function(x) x + 1
78 | j(2)
79 | # 3
80 | # Normal expressions in parentheses
81 | (1)
82 | # 1
83 | # Normal assignment in parentheses
84 | (y = 5)
85 | y
86 | # 5
87 | ```
88 | 
89 | ## Issues
90 | 
91 | The R grammar is complex and there's a fair chance that these modifications have broken something. Feel free to raise an issue if you find any errors.
92 | 


--------------------------------------------------------------------------------
/src/main/gram.y:
--------------------------------------------------------------------------------
   1 | %{
   2 | /*
   3 |  *  R : A Computer Language for Statistical Data Analysis
   4 |  *  Copyright (C) 1995, 1996, 1997  Robert Gentleman and Ross Ihaka
   5 |  *  Copyright (C) 1997--2019  The R Core Team
   6 |  *  Copyright (C) 2009--2011  Romain Francois
   7 |  *
   8 |  *  This program is free software; you can redistribute it and/or modify
   9 |  *  it under the terms of the GNU General Public License as published by
  10 |  *  the Free Software Foundation; either version 2 of the License, or
  11 |  *  (at your option) any later version.
  12 |  *
  13 |  *  This program is distributed in the hope that it will be useful,
  14 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 |  *  GNU General Public License for more details.
  17 |  *
  18 |  *  You should have received a copy of the GNU General Public License
  19 |  *  along with this program; if not, a copy is available at
  20 |  *  https://www.R-project.org/Licenses/
  21 |  */
  22 | 
  23 | #ifdef HAVE_CONFIG_H
  24 | #include <config.h>
  25 | #endif
  26 | 
  27 | #define R_USE_SIGNALS 1
  28 | #include "IOStuff.h"		/*-> Defn.h */
  29 | #include "Fileio.h"
  30 | #include "Parse.h"
  31 | #include <R_ext/Print.h>
  32 | 
  33 | #if !defined(__STDC_ISO_10646__) && (defined(__APPLE__) || defined(__FreeBSD__))
  34 | /* This may not be 100% true (see the comment in rlocale.h),
  35 |    but it seems true in normal locales */
  36 | # define __STDC_ISO_10646__
  37 | #endif
  38 | 
  39 | /* #define YYDEBUG 1 */
  40 | #define YYERROR_VERBOSE 1
  41 | #define PARSE_ERROR_SIZE 256	    /* Parse error messages saved here */
  42 | #define PARSE_CONTEXT_SIZE 256	    /* Recent parse context kept in a circular buffer */
  43 | 
  44 | static Rboolean busy = FALSE;
  45 | static SEXP R_NullSymbol = NULL;
  46 | 
  47 | static int identifier ;
  48 | static void incrementId(void);
  49 | static void initData(void);
  50 | static void initId(void);
  51 | static void record_( int, int, int, int, int, int, char* ) ;
  52 | 
  53 | static void yyerror(const char *);
  54 | static int yylex();
  55 | int yyparse(void);
  56 | 
  57 | static FILE *fp_parse;
  58 | static int (*ptr_getc)(void);
  59 | 
  60 | static int	SavedToken;
  61 | static SEXP	SavedLval;
  62 | 
  63 | #define yyconst const
  64 | 
  65 | typedef struct yyltype
  66 | {
  67 |   int first_line;
  68 |   int first_column;
  69 |   int first_byte;
  70 | 
  71 |   int last_line;
  72 |   int last_column;
  73 |   int last_byte;
  74 |   
  75 |   int first_parsed;
  76 |   int last_parsed;
  77 |   
  78 |   int id;
  79 | } yyltype;
  80 | 
  81 | 
  82 | #define INIT_DATA_COUNT 16384    	/* init parser data to this size */
  83 | #define MAX_DATA_COUNT   65536		/* release it at the end if it is this size or larger*/
  84 | 
  85 | #define DATA_COUNT  (length( PS_DATA ) / DATA_ROWS)
  86 | #define ID_COUNT    ((length( PS_IDS ) / 2) - 1)
  87 | 
  88 | static void finalizeData( ) ;
  89 | static void growData( ) ;
  90 | static void growID( int ) ;
  91 | 
  92 | #define DATA_ROWS 8
  93 | 
  94 | #define _FIRST_PARSED( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i)     ]
  95 | #define _FIRST_COLUMN( i ) INTEGER( PS_DATA )[ DATA_ROWS*(i) + 1 ]
  96 | #define _LAST_PARSED( i )  INTEGER( PS_DATA )[ DATA_ROWS*(i) + 2 ]
  97 | #define _LAST_COLUMN( i )  INTEGER( PS_DATA )[ DATA_ROWS*(i) + 3 ]
  98 | #define _TERMINAL( i )     INTEGER( PS_DATA )[ DATA_ROWS*(i) + 4 ]
  99 | #define _TOKEN( i )        INTEGER( PS_DATA )[ DATA_ROWS*(i) + 5 ]
 100 | #define _ID( i )           INTEGER( PS_DATA )[ DATA_ROWS*(i) + 6 ]
 101 | #define _PARENT(i)         INTEGER( PS_DATA )[ DATA_ROWS*(i) + 7 ]
 102 | 
 103 | #define ID_ID( i )      INTEGER(PS_IDS)[ 2*(i) ]
 104 | #define ID_PARENT( i )  INTEGER(PS_IDS)[ 2*(i) + 1 ]
 105 | 
 106 | static void modif_token( yyltype*, int ) ;
 107 | static void recordParents( int, yyltype*, int) ;
 108 | 
 109 | static int _current_token ;
 110 | 
 111 | /**
 112 |  * Records an expression (non terminal symbol 'expr') and gives it an id
 113 |  *
 114 |  * @param expr expression we want to record and flag with the next id
 115 |  * @param loc the location of the expression
 116 |  */   
 117 | static void setId( SEXP expr, yyltype loc){
 118 |     record_( 
 119 | 	    (loc).first_parsed, (loc).first_column, (loc).last_parsed, (loc).last_column, 
 120 | 	    _current_token, (loc).id, 0 ) ;
 121 | }
 122 | 
 123 | # define YYLTYPE yyltype
 124 | # define YYLLOC_DEFAULT(Current, Rhs, N)				\
 125 |     do	{ 								\
 126 | 	if (N){								\
 127 | 	    (Current).first_line   = YYRHSLOC (Rhs, 1).first_line;	\
 128 | 	    (Current).first_column = YYRHSLOC (Rhs, 1).first_column;	\
 129 | 	    (Current).first_byte   = YYRHSLOC (Rhs, 1).first_byte;	\
 130 | 	    (Current).last_line    = YYRHSLOC (Rhs, N).last_line;	\
 131 | 	    (Current).last_column  = YYRHSLOC (Rhs, N).last_column;	\
 132 | 	    (Current).last_byte    = YYRHSLOC (Rhs, N).last_byte;	\
 133 | 	    (Current).first_parsed = YYRHSLOC (Rhs, 1).first_parsed;    \
 134 | 	    (Current).last_parsed  = YYRHSLOC (Rhs, N).last_parsed;	\
 135 | 	    incrementId( ) ; 						\
 136 | 	    (Current).id = identifier ; 				\
 137 | 	    _current_token = yyr1[yyn] ; 				\
 138 | 	    if (ParseState.keepSrcRefs && ParseState.keepParseData) {	\
 139 | 	        yyltype childs[N];					\
 140 | 	        int ii = 0; 						\
 141 | 	        for(ii=0; ii<N; ii++){					\
 142 | 		      childs[ii] = YYRHSLOC (Rhs, (ii+1) ) ; 		\
 143 | 	        } 							\
 144 | 	        recordParents( identifier, childs, N) ; 		\
 145 | 	    }								\
 146 | 	} else	{							\
 147 | 	  (Current).first_line   = (Current).last_line   =		\
 148 | 	    YYRHSLOC (Rhs, 0).last_line;				\
 149 | 	  (Current).first_parsed   = (Current).last_parsed   =		\
 150 | 	    YYRHSLOC (Rhs, 0).last_parsed;				\
 151 | 	  (Current).first_column = YYRHSLOC (Rhs, 0).last_column;	\
 152 | 	  (Current).last_column = (Current).first_column - 1;		\
 153 | 	  (Current).first_byte = YYRHSLOC (Rhs, 0).last_byte;		\
 154 | 	  (Current).last_byte = (Current).first_byte - 1;		\
 155 | 	  (Current).id = NA_INTEGER;                                    \
 156 | 	} 								\
 157 |     } while (0)
 158 | 
 159 | 		
 160 | # define YY_LOCATION_PRINT(File,Loc)					\
 161 |  fprintf ( File, "%d.%d.%d-%d.%d.%d (%d)",				\
 162 |  	(Loc).first_line, (Loc).first_column,	(Loc).first_byte, 	\
 163 |  	(Loc).last_line,  (Loc).last_column, 	(Loc).last_byte, 	\
 164 | 	(Loc).id )
 165 | 
 166 | /* Useful defines so editors don't get confused ... */
 167 | 
 168 | #define LBRACE	'{'
 169 | #define RBRACE	'}'
 170 | 
 171 | /* Functions used in the parsing process */
 172 | 
 173 | static void	CheckFormalArgs(SEXP, SEXP, YYLTYPE *);
 174 | static SEXP	FirstArg(SEXP, SEXP); /* create list with one element */
 175 | static void 	GrowList(SEXP, SEXP); /* add element to list end */
 176 | 
 177 | static void	SetSingleSrcRef(SEXP);
 178 | static void	AppendToSrcRefs(SEXP);
 179 | static void	PrependToSrcRefs(SEXP);
 180 | static SEXP	SrcRefsToVectorList();
 181 | 
 182 | static void	IfPush(void);
 183 | static int	KeywordLookup(const char *);
 184 | static SEXP	NewList(void);
 185 | static void	NextArg(SEXP, SEXP, SEXP); /* add named element to list end */
 186 | static SEXP	TagArg(SEXP, SEXP, YYLTYPE *);
 187 | static int 	processLineDirective();
 188 | 
 189 | /* These routines allocate constants */
 190 | 
 191 | static SEXP	mkComplex(const char *);
 192 | SEXP		mkFalse(void);
 193 | static SEXP     mkFloat(const char *);
 194 | static SEXP 	mkInt(const char *); 
 195 | static SEXP	mkNA(void);
 196 | SEXP		mkTrue(void);
 197 | 
 198 | /* Internal lexer / parser state variables */
 199 | 
 200 | static int	EatLines = 0;
 201 | static int	GenerateCode = 0;
 202 | static int	EndOfFile = 0;
 203 | static int	xxgetc();
 204 | static int	xxungetc(int);
 205 | static int	xxcharcount, xxcharsave;
 206 | static int	xxlinesave, xxbytesave, xxcolsave, xxparsesave;
 207 | 
 208 | static SrcRefState ParseState;
 209 | 
 210 | #define PS_SET_SRCREFS(x)   SET_VECTOR_ELT(ParseState.sexps, 0, (x))
 211 | #define PS_SET_SRCFILE(x)   SET_VECTOR_ELT(ParseState.sexps, 1, (x))
 212 | #define PS_SET_ORIGINAL(x)  SET_VECTOR_ELT(ParseState.sexps, 2, (x))
 213 | 
 214 | /* direct pointer to data is kept for performance of finalizeData() */
 215 | #define PS_SET_DATA(x)      do {                \
 216 |     SEXP __x__ = (x);                           \
 217 |     SET_VECTOR_ELT(ParseState.sexps, 3, __x__); \
 218 |     ParseState.data = __x__;                    \
 219 | } while(0);
 220 | 
 221 | #define PS_SET_TEXT(x)      SET_VECTOR_ELT(ParseState.sexps, 4, (x))
 222 | #define PS_SET_IDS(x)       SET_VECTOR_ELT(ParseState.sexps, 5, (x))
 223 | #define PS_SET_SVS(x)       SET_VECTOR_ELT(ParseState.sexps, 6, (x))
 224 | 
 225 | #define PS_SRCREFS          VECTOR_ELT(ParseState.sexps, 0)
 226 | #define PS_SRCFILE          VECTOR_ELT(ParseState.sexps, 1)
 227 | #define PS_ORIGINAL         VECTOR_ELT(ParseState.sexps, 2)
 228 | #define PS_DATA             ParseState.data
 229 | #define PS_TEXT             VECTOR_ELT(ParseState.sexps, 4)
 230 | #define PS_IDS              VECTOR_ELT(ParseState.sexps, 5)
 231 | #define PS_SVS              VECTOR_ELT(ParseState.sexps, 6)
 232 | 
 233 | /* Memory protection in the parser
 234 | 
 235 |    The generated code of the parser keeps semantic values (SEXPs) on its
 236 |    semantic values stack. Values are added to the stack during shift and
 237 |    reduce operations and are removed during reduce operations or error
 238 |    handling. Values are created by the lexer before they are added to the
 239 |    stack. Values are also held in a local SEXP variable once removed from
 240 |    the stack but still needed. The stack is automatically expanded on demand.
 241 | 
 242 |    For memory protection, it would be natural to have that stack on the R heap
 243 |    and to use PROTECT/UNPROTECT to protect values in local SEXP variables.
 244 |    Unfortunately, bison does not seem to be customizable enough to allow this.
 245 | 
 246 |    Hence, semantic values, when created by the lexer or reduce operations, are
 247 |    placed on parser state precious multi-set via PRESERVE_SV. They are removed
 248 |    from the multi-set in reduce operations using RELEASE_SV, because by design
 249 |    of the bison parsers such values are subsequently removed from the stack.
 250 |    They are also automatically removed when the parsing finishes, including
 251 |    parser error (also on R error, via the context on-end action).
 252 | 
 253 |    Previously semantic values were protected via PROTECT/UNPROTECT_PTR with
 254 |    similar semantics but using protect stack shared with PROTECT/UNPROTECT.
 255 |    Using a separate precious multi-set is safe even with interleaving of the
 256 |    two protection schemes.
 257 | */
 258 | 
 259 | #define INIT_SVS()     PS_SET_SVS(R_NewPreciousMSet(200))
 260 | #define PRESERVE_SV(x) R_PreserveInMSet((x), PS_SVS)
 261 | #define RELEASE_SV(x)  R_ReleaseFromMSet((x), PS_SVS)
 262 | #define CLEAR_SVS()    R_ReleaseMSet(PS_SVS, 500)
 263 | 
 264 | /* Memory leak
 265 | 
 266 |    yyparse(), as generated by bison, allocates extra space for the parser
 267 |    stack using malloc(). Unfortunately this means that there is a memory
 268 |    leak in case of an R error (long-jump). In principle, we could define
 269 |    yyoverflow() to relocate the parser stacks for bison and allocate say on
 270 |    the R heap, but yyoverflow() is undocumented and somewhat complicated
 271 |    (we would have to replicate some macros from the generated parser here).
 272 |    The same problem exists at least in the Rd and LaTeX parsers in tools.
 273 | */
 274 | 
 275 | #include <rlocale.h>
 276 | #ifdef HAVE_LANGINFO_CODESET
 277 | # include <langinfo.h>
 278 | #endif
 279 | 
 280 | static int mbcs_get_next(int c, wchar_t *wc)
 281 | {
 282 |     int i, res, clen = 1; char s[9];
 283 |     mbstate_t mb_st;
 284 | 
 285 |     s[0] = (char) c;
 286 |     /* This assumes (probably OK) that all MBCS embed ASCII as single-byte
 287 |        lead bytes, including control chars */
 288 |     if((unsigned int) c < 0x80) {
 289 | 	*wc = (wchar_t) c;
 290 | 	return 1;
 291 |     }
 292 |     if(utf8locale) {
 293 | 	clen = utf8clen((char) c);
 294 | 	for(i = 1; i < clen; i++) {
 295 | 	    c = xxgetc();
 296 | 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
 297 | 	    s[i] = (char) c;
 298 | 	}
 299 | 	s[clen] ='\0'; /* x86 Solaris requires this */
 300 | 	res = (int) mbrtowc(wc, s, clen, NULL);
 301 | 	if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
 302 |     } else {
 303 | 	/* This is not necessarily correct for stateful MBCS */
 304 | 	while(clen <= MB_CUR_MAX) {
 305 | 	    mbs_init(&mb_st);
 306 | 	    res = (int) mbrtowc(wc, s, clen, &mb_st);
 307 | 	    if(res >= 0) break;
 308 | 	    if(res == -1)
 309 | 		error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
 310 | 	    /* so res == -2 */
 311 | 	    c = xxgetc();
 312 | 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
 313 | 	    s[clen++] = (char) c;
 314 | 	} /* we've tried enough, so must be complete or invalid by now */
 315 |     }
 316 |     for(i = clen - 1; i > 0; i--) xxungetc(s[i]);
 317 |     return clen;
 318 | }
 319 | 
 320 | /* Soon to be defunct entry points */
 321 | 
 322 | void		R_SetInput(int);
 323 | int		R_fgetc(FILE*);
 324 | 
 325 | /* Routines used to build the parse tree */
 326 | 
 327 | static SEXP	xxnullformal(void);
 328 | static SEXP	xxfirstformal0(SEXP);
 329 | static SEXP	xxfirstformal1(SEXP, SEXP);
 330 | static SEXP	xxaddformal0(SEXP, SEXP, YYLTYPE *);
 331 | static SEXP	xxaddformal1(SEXP, SEXP, SEXP, YYLTYPE *);
 332 | static SEXP	xxexprlist0();
 333 | static SEXP	xxexprlist1(SEXP, YYLTYPE *);
 334 | static SEXP	xxexprlist2(SEXP, SEXP, YYLTYPE *);
 335 | static SEXP	xxsub0(void);
 336 | static SEXP	xxsub1(SEXP, YYLTYPE *);
 337 | static SEXP	xxsymsub0(SEXP, YYLTYPE *);
 338 | static SEXP	xxsymsub1(SEXP, SEXP, YYLTYPE *);
 339 | static SEXP	xxnullsub0(YYLTYPE *);
 340 | static SEXP	xxnullsub1(SEXP, YYLTYPE *);
 341 | static SEXP	xxsublist1(SEXP);
 342 | static SEXP	xxsublist2(SEXP, SEXP);
 343 | static SEXP	xxcond(SEXP);
 344 | static SEXP	xxifcond(SEXP);
 345 | static SEXP	xxif(SEXP, SEXP, SEXP);
 346 | static SEXP	xxifelse(SEXP, SEXP, SEXP, SEXP);
 347 | static SEXP	xxforcond(SEXP, SEXP);
 348 | static SEXP	xxfor(SEXP, SEXP, SEXP);
 349 | static SEXP	xxwhile(SEXP, SEXP, SEXP);
 350 | static SEXP	xxrepeat(SEXP, SEXP);
 351 | static SEXP	xxnxtbrk(SEXP);
 352 | static SEXP	xxfuncall(SEXP, SEXP);
 353 | static SEXP	xxdefun(SEXP, SEXP, SEXP, YYLTYPE *);
 354 | static SEXP	xxdefunanon(SEXP, SEXP, YYLTYPE *);
 355 | static SEXP	xxunary(SEXP, SEXP);
 356 | static SEXP	xxbinary(SEXP, SEXP, SEXP);
 357 | static SEXP	xxparen(SEXP, SEXP);
 358 | static SEXP	xxsubscript(SEXP, SEXP, SEXP);
 359 | static SEXP	xxexprlist(SEXP, YYLTYPE *, SEXP);
 360 | static int	xxvalue(SEXP, int, YYLTYPE *);
 361 | 
 362 | #define YYSTYPE		SEXP
 363 | 
 364 | %}
 365 | 
 366 | %token-table
 367 | 
 368 | %token		END_OF_INPUT ERROR
 369 | %token		STR_CONST NUM_CONST NULL_CONST SYMBOL FUNCTION ANON
 370 | %token		INCOMPLETE_STRING
 371 | %token		LEFT_ASSIGN EQ_ASSIGN RIGHT_ASSIGN LBB
 372 | %token		FOR IN IF ELSE WHILE NEXT BREAK REPEAT
 373 | %token		GT GE LT LE EQ NE AND OR AND2 OR2
 374 | %token		NS_GET NS_GET_INT
 375 | %token		COMMENT LINE_DIRECTIVE
 376 | %token		SYMBOL_FORMALS
 377 | %token		EQ_FORMALS
 378 | %token		EQ_SUB SYMBOL_SUB
 379 | %token		SYMBOL_FUNCTION_CALL
 380 | %token		SYMBOL_PACKAGE
 381 | /* no longer used: %token COLON_ASSIGN */
 382 | %token		SLOT
 383 | 
 384 | /* This is the precedence table, low to high */
 385 | %left		'?'
 386 | %left		LOW WHILE FOR REPEAT
 387 | %nonassoc	ANON
 388 | %right		IF
 389 | %left		ELSE
 390 | %right		LEFT_ASSIGN
 391 | %right		EQ_ASSIGN
 392 | %left		RIGHT_ASSIGN
 393 | %left		'~' TILDE
 394 | %left		OR OR2
 395 | %left		AND AND2
 396 | %left		UNOT NOT
 397 | %nonassoc   	GT GE LT LE EQ NE
 398 | %left		'+' '-'
 399 | %left		'*' '/'
 400 | %left		SPECIAL
 401 | %left		':'
 402 | %left		UMINUS UPLUS
 403 | %right		'^'
 404 | %left		'$' '@'
 405 | %left		NS_GET NS_GET_INT
 406 | %nonassoc	'(' '[' LBB
 407 | 
 408 | %%
 409 | 
 410 | prog	:	END_OF_INPUT			{ YYACCEPT; }
 411 | 	|	'\n'				{ yyresult = xxvalue(NULL,2,NULL);	goto yyreturn; }
 412 | 	|	expr_or_assign '\n'			{ yyresult = xxvalue($1,3,&@1);	goto yyreturn; }
 413 | 	|	expr_or_assign ';'			{ yyresult = xxvalue($1,4,&@1);	goto yyreturn; }
 414 | 	|	error	 			{ YYABORT; }
 415 | 	;
 416 | 
 417 | expr_or_assign  :    expr                       { $$ = $1; }
 418 |                 |    equal_assign               { $$ = $1; }
 419 |                 ;
 420 | 
 421 | expr_not_sym_or_assign	:	expr_not_sym	{ $$ = $1; }
 422 | 			|	equal_assign	{ $$ = $1; }
 423 | 			;
 424 | 
 425 | equal_assign    :    expr EQ_ASSIGN expr_or_assign              { $$ = xxbinary($2,$1,$3); setId( $$, @$); }
 426 |                 ;
 427 | 
 428 | expr	:	expr_not_sym			{ $$ = $1; }
 429 | 	|	SYMBOL				{ $$ = $1; }
 430 | 	;
 431 | 
 432 | expr_not_sym	: 	NUM_CONST		{ $$ = $1;	setId( $$, @$); }
 433 | 	|	STR_CONST			{ $$ = $1;	setId( $$, @$); }
 434 | 	|	NULL_CONST			{ $$ = $1;	setId( $$, @$); }
 435 | 	|	'(' SYMBOL ')' ANON expr_or_assign
 436 | 						{ $$ = xxfirstformal0($2); 	modif_token( &@2, SYMBOL_FORMALS ) ;
 437 | 						  $$ = xxdefunanon($$,$5,&@$); 	setId( $$, @$); }
 438 | 	|	'(' SYMBOL EQ_ASSIGN expr ')' ANON expr_or_assign
 439 | 						{ $$ = xxfirstformal1($2,$4); 	modif_token( &@2, SYMBOL_FORMALS ) ; modif_token( &@3, EQ_FORMALS ) ;
 440 | 						  $$ = xxdefunanon($$,$7,&@$); 	setId( $$, @$); }
 441 | 	|	'(' formlist ')' ANON expr_or_assign
 442 | 						{ $$ = xxdefunanon($2,$5,&@$); 	setId( $$, @$); }
 443 | 	|	'{' exprlist '}'		{ $$ = xxexprlist($1,&@1,$2); setId( $$, @$); }
 444 | 	|	'(' SYMBOL ')'			{ $$ = xxparen($1,$2);	setId( $$, @$); }
 445 | 	|	'(' SYMBOL EQ_ASSIGN expr  ')'	{ $$ = xxbinary($3,$2,$4);
 446 | 						  $$ = xxparen($1,$$);	setId( $$, @$); }
 447 | 	|	'(' expr_not_sym_or_assign ')'	{ $$ = xxparen($1,$2);	setId( $$, @$); }
 448 | 
 449 | 	|	'-' expr %prec UMINUS		{ $$ = xxunary($1,$2);	setId( $$, @$); }
 450 | 	|	'+' expr %prec UMINUS		{ $$ = xxunary($1,$2);	setId( $$, @$); }
 451 | 	|	'!' expr %prec UNOT		{ $$ = xxunary($1,$2);	setId( $$, @$); }
 452 | 	|	'~' expr %prec TILDE		{ $$ = xxunary($1,$2);	setId( $$, @$); }
 453 | 	|	'?' expr			{ $$ = xxunary($1,$2);	setId( $$, @$); }
 454 | 
 455 | 	|	expr ':'  expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 456 | 	|	expr '+'  expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 457 | 	|	expr '-' expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 458 | 	|	expr '*' expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 459 | 	|	expr '/' expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 460 | 	|	expr '^' expr 			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 461 | 	|	expr SPECIAL expr		{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 462 | 	|	expr '%' expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 463 | 	|	expr '~' expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 464 | 	|	expr '?' expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 465 | 	|	expr LT expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 466 | 	|	expr LE expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 467 | 	|	expr EQ expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 468 | 	|	expr NE expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 469 | 	|	expr GE expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 470 | 	|	expr GT expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 471 | 	|	expr AND expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 472 | 	|	expr OR expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 473 | 	|	expr AND2 expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 474 | 	|	expr OR2 expr			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 475 | 
 476 | 	|	expr LEFT_ASSIGN expr 		{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 477 | 	|	expr RIGHT_ASSIGN expr 		{ $$ = xxbinary($2,$3,$1);	setId( $$, @$); }
 478 | 	|	FUNCTION '(' formlist ')' cr expr_or_assign %prec LOW
 479 | 						{ $$ = xxdefun($1,$3,$6,&@$); 	setId( $$, @$); }
 480 | 	|	expr '(' sublist ')'		{ $$ = xxfuncall($1,$3);  setId( $$, @$); modif_token( &@1, SYMBOL_FUNCTION_CALL ) ; }
 481 | 	|	IF ifcond expr_or_assign 	{ $$ = xxif($1,$2,$3);	setId( $$, @$); }
 482 | 	|	IF ifcond expr_or_assign ELSE expr_or_assign	{ $$ = xxifelse($1,$2,$3,$5);	setId( $$, @$); }
 483 | 	|	FOR forcond expr_or_assign %prec FOR 	{ $$ = xxfor($1,$2,$3);	setId( $$, @$); }
 484 | 	|	WHILE cond expr_or_assign	{ $$ = xxwhile($1,$2,$3);	setId( $$, @$); }
 485 | 	|	REPEAT expr_or_assign			{ $$ = xxrepeat($1,$2);	setId( $$, @$); }
 486 | 	|	expr LBB sublist ']' ']'	{ $$ = xxsubscript($1,$2,$3);	setId( $$, @$); }
 487 | 	|	expr '[' sublist ']'		{ $$ = xxsubscript($1,$2,$3);	setId( $$, @$); }
 488 | 	|	SYMBOL NS_GET SYMBOL		{ $$ = xxbinary($2,$1,$3);      setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ; }
 489 | 	|	SYMBOL NS_GET STR_CONST		{ $$ = xxbinary($2,$1,$3);      setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ; }
 490 | 	|	STR_CONST NS_GET SYMBOL		{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 491 | 	|	STR_CONST NS_GET STR_CONST	{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 492 | 	|	SYMBOL NS_GET_INT SYMBOL	{ $$ = xxbinary($2,$1,$3);      setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ;}
 493 | 	|	SYMBOL NS_GET_INT STR_CONST	{ $$ = xxbinary($2,$1,$3);      setId( $$, @$); modif_token( &@1, SYMBOL_PACKAGE ) ;}
 494 | 	|	STR_CONST NS_GET_INT SYMBOL	{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 495 | 	|	STR_CONST NS_GET_INT STR_CONST	{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 496 | 	|	expr '$' SYMBOL			{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 497 | 	|	expr '$' STR_CONST		{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 498 | 	|	expr '@' SYMBOL			{ $$ = xxbinary($2,$1,$3);      setId( $$, @$); modif_token( &@3, SLOT ) ; }
 499 | 	|	expr '@' STR_CONST		{ $$ = xxbinary($2,$1,$3);	setId( $$, @$); }
 500 | 	|	NEXT				{ $$ = xxnxtbrk($1);	setId( $$, @$); }
 501 | 	|	BREAK				{ $$ = xxnxtbrk($1);	setId( $$, @$); }
 502 | 	;
 503 | 
 504 | 
 505 | cond	:	'(' expr ')'			{ $$ = xxcond($2);   }
 506 | 	;
 507 | 
 508 | ifcond	:	'(' expr ')'			{ $$ = xxifcond($2); }
 509 | 	;
 510 | 
 511 | forcond :	'(' SYMBOL IN expr ')' 		{ $$ = xxforcond($2,$4);	setId( $$, @$); }
 512 | 	;
 513 | 
 514 | 
 515 | exprlist:					{ $$ = xxexprlist0();	setId( $$, @$); }
 516 | 	|	expr_or_assign			{ $$ = xxexprlist1($1, &@1); }
 517 | 	|	exprlist ';' expr_or_assign	{ $$ = xxexprlist2($1, $3, &@3); }
 518 | 	|	exprlist ';'			{ $$ = $1;		setId( $$, @$); }
 519 | 	|	exprlist '\n' expr_or_assign	{ $$ = xxexprlist2($1, $3, &@3); }
 520 | 	|	exprlist '\n'			{ $$ = $1;}
 521 | 	;
 522 | 
 523 | sublist	:	sub				{ $$ = xxsublist1($1);	  }
 524 | 	|	sublist cr ',' sub		{ $$ = xxsublist2($1,$4); }
 525 | 	;
 526 | 
 527 | sub	:					{ $$ = xxsub0();	 }
 528 | 	|	expr				{ $$ = xxsub1($1, &@1);  }
 529 | 	|	SYMBOL EQ_ASSIGN 		{ $$ = xxsymsub0($1, &@1); 	modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; }
 530 | 	|	SYMBOL EQ_ASSIGN expr		{ $$ = xxsymsub1($1,$3, &@1); 	modif_token( &@2, EQ_SUB ) ; modif_token( &@1, SYMBOL_SUB ) ; }
 531 | 	|	STR_CONST EQ_ASSIGN 		{ $$ = xxsymsub0($1, &@1); 	modif_token( &@2, EQ_SUB ) ; }
 532 | 	|	STR_CONST EQ_ASSIGN expr	{ $$ = xxsymsub1($1,$3, &@1); 	modif_token( &@2, EQ_SUB ) ; }
 533 | 	|	NULL_CONST EQ_ASSIGN 		{ $$ = xxnullsub0(&@1); 	modif_token( &@2, EQ_SUB ) ; }
 534 | 	|	NULL_CONST EQ_ASSIGN expr	{ $$ = xxnullsub1($3, &@1); 	modif_token( &@2, EQ_SUB ) ; }
 535 | 	;
 536 | 
 537 | formlist:					{ $$ = xxnullformal(); }
 538 | 	|	SYMBOL				{ $$ = xxfirstformal0($1); 	modif_token( &@1, SYMBOL_FORMALS ) ; }
 539 | 	|	SYMBOL EQ_ASSIGN expr		{ $$ = xxfirstformal1($1,$3); 	modif_token( &@1, SYMBOL_FORMALS ) ; modif_token( &@2, EQ_FORMALS ) ; }
 540 | 	|	formlist ',' SYMBOL		{ $$ = xxaddformal0($1,$3, &@3);   modif_token( &@3, SYMBOL_FORMALS ) ; }
 541 | 	|	formlist ',' SYMBOL EQ_ASSIGN expr	
 542 | 						{ $$ = xxaddformal1($1,$3,$5,&@3); modif_token( &@3, SYMBOL_FORMALS ) ; modif_token( &@4, EQ_FORMALS ) ;}
 543 | 	;
 544 | 
 545 | cr	:					{ EatLines = 1; }
 546 | 	;
 547 | %%
 548 | 
 549 | 
 550 | /*----------------------------------------------------------------------------*/
 551 | 
 552 | static int (*ptr_getc)(void);
 553 | 
 554 | /* Private pushback, since file ungetc only guarantees one byte.
 555 |    We need up to one MBCS-worth */
 556 | #define DECLARE_YYTEXT_BUFP(bp) char *bp = yytext ;
 557 | #define YYTEXT_PUSH(c, bp) do { \
 558 |     if ((bp) - yytext >= sizeof(yytext) - 1){ \
 559 | 		error(_("input buffer overflow at line %d"), ParseState.xxlineno); \
 560 | 	} \
 561 |     *(bp)++ = ((char)c);			\
 562 | } while(0) ;
 563 | 
 564 | #define PUSHBACK_BUFSIZE 16
 565 | static int pushback[PUSHBACK_BUFSIZE];
 566 | static unsigned int npush = 0;
 567 | 
 568 | static int prevpos = 0;
 569 | static int prevlines[PUSHBACK_BUFSIZE];
 570 | static int prevcols[PUSHBACK_BUFSIZE];
 571 | static int prevbytes[PUSHBACK_BUFSIZE];
 572 | static int prevparse[PUSHBACK_BUFSIZE];
 573 | 
 574 | static int xxgetc(void)
 575 | {
 576 |     int c;
 577 | 
 578 |     if(npush) c = pushback[--npush]; else  c = ptr_getc();
 579 | 
 580 |     prevpos = (prevpos + 1) % PUSHBACK_BUFSIZE;
 581 |     prevbytes[prevpos] = ParseState.xxbyteno;
 582 |     prevlines[prevpos] = ParseState.xxlineno;  
 583 |     prevparse[prevpos] = ParseState.xxparseno;
 584 |     prevcols[prevpos] = ParseState.xxcolno;
 585 |     	
 586 |     if (c == EOF) {
 587 | 	EndOfFile = 1;
 588 | 	return R_EOF;
 589 |     }
 590 |     R_ParseContextLast = (R_ParseContextLast + 1) % PARSE_CONTEXT_SIZE;
 591 |     R_ParseContext[R_ParseContextLast] = (char) c;
 592 | 
 593 |     if (c == '\n') {
 594 | 	ParseState.xxlineno += 1;
 595 | 	ParseState.xxcolno = 0;
 596 |     	ParseState.xxbyteno = 0;
 597 |     	ParseState.xxparseno += 1;
 598 |     } else {
 599 |         /* We only advance the column for the 1st byte in UTF-8, so handle later bytes specially */
 600 | 	if (!known_to_be_utf8 || (unsigned char)c < 0x80 || 0xC0 <= (unsigned char)c)
 601 |             ParseState.xxcolno++;
 602 |     	ParseState.xxbyteno++;
 603 |     }
 604 | 
 605 |     if (c == '\t') ParseState.xxcolno = ((ParseState.xxcolno + 7) & ~7);
 606 |     
 607 |     R_ParseContextLine = ParseState.xxlineno;    
 608 | 
 609 |     xxcharcount++;
 610 |     return c;
 611 | }
 612 | 
 613 | static int xxungetc(int c)
 614 | {
 615 |     /* this assumes that c was the result of xxgetc; if not, some edits will be needed */
 616 |     ParseState.xxlineno = prevlines[prevpos];
 617 |     ParseState.xxbyteno = prevbytes[prevpos];
 618 |     ParseState.xxcolno  = prevcols[prevpos];
 619 |     ParseState.xxparseno = prevparse[prevpos];
 620 |     
 621 |     prevpos = (prevpos + PUSHBACK_BUFSIZE - 1) % PUSHBACK_BUFSIZE;
 622 | 
 623 |     R_ParseContextLine = ParseState.xxlineno;
 624 | 
 625 |     xxcharcount--;
 626 |     R_ParseContext[R_ParseContextLast] = '\0';
 627 |     /* precaution as to how % is implemented for < 0 numbers */
 628 |     R_ParseContextLast = (R_ParseContextLast + PARSE_CONTEXT_SIZE -1) % PARSE_CONTEXT_SIZE;
 629 |     if(npush >= PUSHBACK_BUFSIZE) return EOF;
 630 |     pushback[npush++] = c;
 631 |     return c;
 632 | }
 633 | 
 634 | /*
 635 |  * Increments/inits the token/grouping counter
 636 |  */
 637 | static void incrementId(void){
 638 | 	identifier++; 
 639 | }
 640 | 
 641 | static void initId(void){
 642 | 	identifier = 0 ;
 643 | }
 644 | 
 645 | static SEXP makeSrcref(YYLTYPE *lloc, SEXP srcfile)
 646 | {
 647 |     SEXP val;
 648 | 
 649 |     PROTECT(val = allocVector(INTSXP, 8));
 650 |     INTEGER(val)[0] = lloc->first_line;
 651 |     INTEGER(val)[1] = lloc->first_byte;
 652 |     INTEGER(val)[2] = lloc->last_line;
 653 |     INTEGER(val)[3] = lloc->last_byte;
 654 |     INTEGER(val)[4] = lloc->first_column;
 655 |     INTEGER(val)[5] = lloc->last_column;
 656 |     INTEGER(val)[6] = lloc->first_parsed;
 657 |     INTEGER(val)[7] = lloc->last_parsed;
 658 |     setAttrib(val, R_SrcfileSymbol, srcfile);
 659 |     setAttrib(val, R_ClassSymbol, mkString("srcref"));
 660 |     UNPROTECT(1); /* val */
 661 |     return val;
 662 | }
 663 | 
 664 | static void attachSrcrefs(SEXP val)
 665 | {
 666 |     SEXP srval;
 667 | 
 668 |     PROTECT(srval = SrcRefsToVectorList());
 669 |     
 670 |     setAttrib(val, R_SrcrefSymbol, srval);
 671 |     setAttrib(val, R_SrcfileSymbol, PS_SRCFILE);
 672 |     {
 673 | 	YYLTYPE wholeFile;
 674 | 	wholeFile.first_line = 1;
 675 | 	wholeFile.first_byte = 0;
 676 | 	wholeFile.first_column = 0;
 677 | 	wholeFile.last_line = ParseState.xxlineno;
 678 | 	wholeFile.last_byte = ParseState.xxbyteno;
 679 | 	wholeFile.last_column = ParseState.xxcolno;
 680 | 	wholeFile.first_parsed = 1;
 681 | 	wholeFile.last_parsed = ParseState.xxparseno;
 682 | 	setAttrib(val, R_WholeSrcrefSymbol, makeSrcref(&wholeFile, PS_SRCFILE));
 683 |     }
 684 |     PS_SET_SRCREFS(R_NilValue);
 685 |     ParseState.didAttach = TRUE;
 686 |     UNPROTECT(1); /* srval */
 687 | }
 688 | 
 689 | static int xxvalue(SEXP v, int k, YYLTYPE *lloc)
 690 | {
 691 |     if (k > 2) {
 692 | 	if (ParseState.keepSrcRefs) {
 693 | 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
 694 | 	    AppendToSrcRefs(s);
 695 | 	    UNPROTECT(1); /* s */
 696 | 	}
 697 | 	RELEASE_SV(v);
 698 |     }
 699 |     R_CurrentExpr = v;
 700 |     return k;
 701 | }
 702 | 
 703 | static SEXP xxnullformal()
 704 | {
 705 |     SEXP ans;
 706 |     PRESERVE_SV(ans = R_NilValue);
 707 |     return ans;
 708 | }
 709 | 
 710 | static SEXP xxfirstformal0(SEXP sym)
 711 | {
 712 |     SEXP ans;
 713 |     if (GenerateCode)
 714 | 	PRESERVE_SV(ans = FirstArg(R_MissingArg, sym));
 715 |     else
 716 | 	PRESERVE_SV(ans = R_NilValue);
 717 |     RELEASE_SV(sym);
 718 |     return ans;
 719 | }
 720 | 
 721 | static SEXP xxfirstformal1(SEXP sym, SEXP expr)
 722 | {
 723 |     SEXP ans;
 724 |     if (GenerateCode)
 725 | 	PRESERVE_SV(ans = FirstArg(expr, sym));
 726 |     else
 727 | 	PRESERVE_SV(ans = R_NilValue);
 728 |     RELEASE_SV(expr);
 729 |     RELEASE_SV(sym);
 730 |     return ans;
 731 | }
 732 | 
 733 | static SEXP xxaddformal0(SEXP formlist, SEXP sym, YYLTYPE *lloc)
 734 | {
 735 |     SEXP ans;
 736 |     if (GenerateCode) {
 737 | 	CheckFormalArgs(formlist, sym, lloc);
 738 | 	NextArg(formlist, R_MissingArg, sym);
 739 | 	ans = formlist;
 740 |     } else {
 741 | 	RELEASE_SV(formlist);
 742 | 	PRESERVE_SV(ans = R_NilValue);
 743 |     }
 744 |     RELEASE_SV(sym);
 745 |     return ans;
 746 | }
 747 | 
 748 | static SEXP xxaddformal1(SEXP formlist, SEXP sym, SEXP expr, YYLTYPE *lloc)
 749 | {
 750 |     SEXP ans;
 751 |     if (GenerateCode) {
 752 | 	CheckFormalArgs(formlist, sym, lloc);
 753 | 	NextArg(formlist, expr, sym);
 754 | 	ans = formlist;
 755 |     } else {
 756 | 	RELEASE_SV(formlist);
 757 | 	PRESERVE_SV(ans = R_NilValue);
 758 |     }
 759 |     RELEASE_SV(expr);
 760 |     RELEASE_SV(sym);
 761 |     return ans;
 762 | }
 763 | 
 764 | static SEXP xxexprlist0(void)
 765 | {
 766 |     SEXP ans;
 767 |     if (GenerateCode) {
 768 | 	PRESERVE_SV(ans = NewList());
 769 | 	if (ParseState.keepSrcRefs) {
 770 | 	    setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS);
 771 | 	    PS_SET_SRCREFS(R_NilValue);
 772 | 	}
 773 |     }
 774 |     else
 775 | 	PRESERVE_SV(ans = R_NilValue);
 776 |     return ans;
 777 | }
 778 | 
 779 | static SEXP xxexprlist1(SEXP expr, YYLTYPE *lloc)
 780 | {
 781 |     SEXP ans;
 782 |     if (GenerateCode) {
 783 | 	PRESERVE_SV(ans = NewList());
 784 | 	if (ParseState.keepSrcRefs) {
 785 | 	    setAttrib(ans, R_SrcrefSymbol, PS_SRCREFS);
 786 | 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
 787 | 	    SetSingleSrcRef(s);
 788 | 	    UNPROTECT(1); /* s */
 789 | 	}
 790 | 	GrowList(ans, expr);
 791 |     }
 792 |     else
 793 | 	PRESERVE_SV(ans = R_NilValue);
 794 |     RELEASE_SV(expr);
 795 |     return ans;
 796 | }
 797 | 
 798 | static SEXP xxexprlist2(SEXP exprlist, SEXP expr, YYLTYPE *lloc)
 799 | {
 800 |     SEXP ans;
 801 |     if (GenerateCode) {
 802 | 	if (ParseState.keepSrcRefs) {
 803 | 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
 804 | 	    AppendToSrcRefs(s);
 805 | 	    UNPROTECT(1); /* s */
 806 | 	}
 807 | 	GrowList(exprlist, expr);
 808 | 	ans = exprlist;
 809 |     } else {
 810 | 	RELEASE_SV(exprlist);
 811 | 	PRESERVE_SV(ans = R_NilValue);
 812 |     }
 813 |     RELEASE_SV(expr);
 814 |     return ans;
 815 | }
 816 | 
 817 | static SEXP xxsub0(void)
 818 | {
 819 |     SEXP ans;
 820 |     if (GenerateCode)
 821 | 	PRESERVE_SV(ans = lang2(R_MissingArg,R_NilValue));
 822 |     else
 823 | 	PRESERVE_SV(ans = R_NilValue);
 824 |     return ans;
 825 | }
 826 | 
 827 | static SEXP xxsub1(SEXP expr, YYLTYPE *lloc)
 828 | {
 829 |     SEXP ans;
 830 |     if (GenerateCode)
 831 | 	PRESERVE_SV(ans = TagArg(expr, R_NilValue, lloc));
 832 |     else
 833 | 	PRESERVE_SV(ans = R_NilValue);
 834 |     RELEASE_SV(expr);
 835 |     return ans;
 836 | }
 837 | 
 838 | static SEXP xxsymsub0(SEXP sym, YYLTYPE *lloc)
 839 | {
 840 |     SEXP ans;
 841 |     if (GenerateCode)
 842 | 	PRESERVE_SV(ans = TagArg(R_MissingArg, sym, lloc));
 843 |     else
 844 | 	PRESERVE_SV(ans = R_NilValue);
 845 |     RELEASE_SV(sym);
 846 |     return ans;
 847 | }
 848 | 
 849 | static SEXP xxsymsub1(SEXP sym, SEXP expr, YYLTYPE *lloc)
 850 | {
 851 |     SEXP ans;
 852 |     if (GenerateCode)
 853 | 	PRESERVE_SV(ans = TagArg(expr, sym, lloc));
 854 |     else
 855 | 	PRESERVE_SV(ans = R_NilValue);
 856 |     RELEASE_SV(expr);
 857 |     RELEASE_SV(sym);
 858 |     return ans;
 859 | }
 860 | 
 861 | static SEXP xxnullsub0(YYLTYPE *lloc)
 862 | {
 863 |     SEXP ans;
 864 |     if (GenerateCode)
 865 | 	PRESERVE_SV(ans = TagArg(R_MissingArg, R_NullSymbol, lloc));
 866 |     else
 867 | 	PRESERVE_SV(ans = R_NilValue);
 868 |     RELEASE_SV(R_NilValue);
 869 |     return ans;
 870 | }
 871 | 
 872 | static SEXP xxnullsub1(SEXP expr, YYLTYPE *lloc)
 873 | {
 874 |     SEXP ans;
 875 |     if (GenerateCode)
 876 | 	PRESERVE_SV(ans = TagArg(expr, R_NullSymbol, lloc));
 877 |     else
 878 | 	PRESERVE_SV(ans = R_NilValue);
 879 |     RELEASE_SV(R_NilValue);
 880 |     RELEASE_SV(expr);
 881 |     return ans;
 882 | }
 883 | 
 884 | 
 885 | static SEXP xxsublist1(SEXP sub)
 886 | {
 887 |     SEXP ans;
 888 |     if (GenerateCode)
 889 | 	PRESERVE_SV(ans = FirstArg(CAR(sub),CADR(sub)));
 890 |     else
 891 | 	PRESERVE_SV(ans = R_NilValue);
 892 |     RELEASE_SV(sub);
 893 |     return ans;
 894 | }
 895 | 
 896 | static SEXP xxsublist2(SEXP sublist, SEXP sub)
 897 | {
 898 |     SEXP ans;
 899 |     if (GenerateCode) {
 900 | 	NextArg(sublist, CAR(sub), CADR(sub));
 901 | 	ans = sublist;
 902 |     } else {
 903 | 	RELEASE_SV(sublist);
 904 | 	PRESERVE_SV(ans = R_NilValue);
 905 |     }
 906 |     RELEASE_SV(sub);
 907 |     return ans;
 908 | }
 909 | 
 910 | static SEXP xxcond(SEXP expr)
 911 | {
 912 |     EatLines = 1;
 913 |     return expr;
 914 | }
 915 | 
 916 | static SEXP xxifcond(SEXP expr)
 917 | {
 918 |     EatLines = 1;
 919 |     return expr;
 920 | }
 921 | 
 922 | static SEXP xxif(SEXP ifsym, SEXP cond, SEXP expr)
 923 | {
 924 |     SEXP ans;
 925 |     if (GenerateCode)
 926 | 	PRESERVE_SV(ans = lang3(ifsym, cond, expr));
 927 |     else
 928 | 	PRESERVE_SV(ans = R_NilValue);
 929 |     RELEASE_SV(expr);
 930 |     RELEASE_SV(cond);
 931 |     return ans;
 932 | }
 933 | 
 934 | static SEXP xxifelse(SEXP ifsym, SEXP cond, SEXP ifexpr, SEXP elseexpr)
 935 | {
 936 |     SEXP ans;
 937 |     if (GenerateCode)
 938 | 	PRESERVE_SV(ans = lang4(ifsym, cond, ifexpr, elseexpr));
 939 |     else
 940 | 	PRESERVE_SV(ans = R_NilValue);
 941 |     RELEASE_SV(elseexpr);
 942 |     RELEASE_SV(ifexpr);
 943 |     RELEASE_SV(cond);
 944 |     return ans;
 945 | }
 946 | 
 947 | static SEXP xxforcond(SEXP sym, SEXP expr)
 948 | {
 949 |     SEXP ans;
 950 |     EatLines = 1;
 951 |     if (GenerateCode)
 952 | 	PRESERVE_SV(ans = LCONS(sym, expr));
 953 |     else
 954 | 	PRESERVE_SV(ans = R_NilValue);
 955 |     RELEASE_SV(expr);
 956 |     RELEASE_SV(sym);
 957 |     return ans;
 958 | }
 959 | 
 960 | static SEXP xxfor(SEXP forsym, SEXP forcond, SEXP body)
 961 | {
 962 |     SEXP ans;
 963 |     if (GenerateCode)
 964 | 	PRESERVE_SV(ans = lang4(forsym, CAR(forcond), CDR(forcond), body));
 965 |     else
 966 | 	PRESERVE_SV(ans = R_NilValue);
 967 |     RELEASE_SV(body);
 968 |     RELEASE_SV(forcond);
 969 |     return ans;
 970 | }
 971 | 
 972 | static SEXP xxwhile(SEXP whilesym, SEXP cond, SEXP body)
 973 | {
 974 |     SEXP ans;
 975 |     if (GenerateCode)
 976 | 	PRESERVE_SV(ans = lang3(whilesym, cond, body));
 977 |     else
 978 | 	PRESERVE_SV(ans = R_NilValue);
 979 |     RELEASE_SV(body);
 980 |     RELEASE_SV(cond);
 981 |     return ans;
 982 | }
 983 | 
 984 | static SEXP xxrepeat(SEXP repeatsym, SEXP body)
 985 | {
 986 |     SEXP ans;
 987 |     if (GenerateCode)
 988 | 	PRESERVE_SV(ans = lang2(repeatsym, body));
 989 |     else
 990 | 	PRESERVE_SV(ans = R_NilValue);
 991 |     RELEASE_SV(body);
 992 |     return ans;
 993 | }
 994 | 
 995 | static SEXP xxnxtbrk(SEXP keyword)
 996 | {
 997 |     if (GenerateCode)
 998 | 	PRESERVE_SV(keyword = lang1(keyword));
 999 |     else
1000 | 	PRESERVE_SV(keyword = R_NilValue);
1001 |     return keyword;
1002 | }
1003 | 
1004 | static SEXP xxfuncall(SEXP expr, SEXP args)
1005 | {
1006 |     SEXP ans, sav_expr = expr;
1007 |     if (GenerateCode) {
1008 | 	if (isString(expr))
1009 | 	    expr = installTrChar(STRING_ELT(expr, 0));
1010 | 	PROTECT(expr);
1011 | 	if (length(CDR(args)) == 1 && CADR(args) == R_MissingArg && TAG(CDR(args)) == R_NilValue )
1012 | 	    ans = lang1(expr);
1013 | 	else
1014 | 	    ans = LCONS(expr, CDR(args));
1015 | 	UNPROTECT(1); /* expr */
1016 | 	PRESERVE_SV(ans);
1017 |     } else
1018 | 	PRESERVE_SV(ans = R_NilValue);
1019 | 
1020 |     RELEASE_SV(args);
1021 |     RELEASE_SV(sav_expr);
1022 |     return ans;
1023 | }
1024 | 
1025 | static SEXP mkString2(const char *s, size_t len, Rboolean escaped)
1026 | {
1027 |     SEXP t;
1028 |     cetype_t enc = CE_NATIVE;
1029 | 
1030 |     if(known_to_be_latin1) enc = CE_LATIN1;
1031 |     else if(!escaped && known_to_be_utf8) enc = CE_UTF8;
1032 | 
1033 |     PROTECT(t = allocVector(STRSXP, 1));
1034 |     SET_STRING_ELT(t, 0, mkCharLenCE(s, (int) len, enc));
1035 |     UNPROTECT(1); /* t */
1036 |     return t;
1037 | }
1038 | 
1039 | static SEXP xxdefun(SEXP fname, SEXP formals, SEXP body, YYLTYPE *lloc)
1040 | {
1041 |     SEXP ans, srcref;
1042 | 
1043 |     if (GenerateCode) {
1044 |     	if (ParseState.keepSrcRefs) {
1045 | 	    srcref = makeSrcref(lloc, PS_SRCFILE);
1046 |     	    ParseState.didAttach = TRUE;
1047 |     	} else
1048 |     	    srcref = R_NilValue;
1049 | 	PRESERVE_SV(ans = lang4(fname, CDR(formals), body, srcref));
1050 |     } else
1051 | 	PRESERVE_SV(ans = R_NilValue);
1052 |     RELEASE_SV(body);
1053 |     RELEASE_SV(formals);
1054 |     return ans;
1055 | }
1056 | 
1057 | static SEXP xxdefunanon(SEXP formals, SEXP body, YYLTYPE *lloc)
1058 | {
1059 | 	SEXP fname = install("function");
1060 | 
1061 | 	return xxdefun(fname, formals, body, lloc);
1062 | }
1063 | 
1064 | static SEXP xxunary(SEXP op, SEXP arg)
1065 | {
1066 |     SEXP ans;
1067 |     if (GenerateCode)
1068 | 	PRESERVE_SV(ans = lang2(op, arg));
1069 |     else
1070 | 	PRESERVE_SV(ans = R_NilValue);
1071 |     RELEASE_SV(arg);
1072 |     return ans;
1073 | }
1074 | 
1075 | static SEXP xxbinary(SEXP n1, SEXP n2, SEXP n3)
1076 | {
1077 |     SEXP ans;
1078 |     if (GenerateCode)
1079 | 	PRESERVE_SV(ans = lang3(n1, n2, n3));
1080 |     else
1081 | 	PRESERVE_SV(ans = R_NilValue);
1082 |     RELEASE_SV(n2);
1083 |     RELEASE_SV(n3);
1084 |     return ans;
1085 | }
1086 | 
1087 | static SEXP xxparen(SEXP n1, SEXP n2)
1088 | {
1089 |     SEXP ans;
1090 |     if (GenerateCode)
1091 | 	PRESERVE_SV(ans = lang2(n1, n2));
1092 |     else
1093 | 	PRESERVE_SV(ans = R_NilValue);
1094 |     RELEASE_SV(n2);
1095 |     return ans;
1096 | }
1097 | 
1098 | 
1099 | /* This should probably use CONS rather than LCONS, but
1100 |    it shouldn't matter and we would rather not meddle
1101 |    See PR#7055 */
1102 | 
1103 | static SEXP xxsubscript(SEXP a1, SEXP a2, SEXP a3)
1104 | {
1105 |     SEXP ans;
1106 |     if (GenerateCode)
1107 | 	PRESERVE_SV(ans = LCONS(a2, CONS(a1, CDR(a3))));
1108 |     else
1109 | 	PRESERVE_SV(ans = R_NilValue);
1110 |     RELEASE_SV(a3);
1111 |     RELEASE_SV(a1);
1112 |     return ans;
1113 | }
1114 | 
1115 | static SEXP xxexprlist(SEXP a1, YYLTYPE *lloc, SEXP a2)
1116 | {
1117 |     SEXP ans;
1118 |     SEXP prevSrcrefs;
1119 | 
1120 |     EatLines = 0;
1121 |     if (GenerateCode) {
1122 | 	SET_TYPEOF(a2, LANGSXP);
1123 | 	SETCAR(a2, a1);
1124 | 	if (ParseState.keepSrcRefs) {
1125 | 	    PROTECT(prevSrcrefs = getAttrib(a2, R_SrcrefSymbol));
1126 | 	    SEXP s = PROTECT(makeSrcref(lloc, PS_SRCFILE));
1127 | 	    PrependToSrcRefs(s);
1128 | 	    attachSrcrefs(a2);
1129 | 	    UNPROTECT(2); /* prevSrcrefs, s */
1130 | 	    /* SrcRefs got NAMED by being an attribute, preventively
1131 | 	       getAttrib(), but it has not in fact been referenced. Set NAMED
1132 | 	       to 0 to avoid overhead in further setAttrib calls due to cycle
1133 | 	       detection. */
1134 | 	    SET_NAMED(prevSrcrefs, 0);
1135 | 	    PS_SET_SRCREFS(prevSrcrefs);
1136 | 	}
1137 | 	PRESERVE_SV(ans = a2);
1138 |     }
1139 |     else
1140 | 	PRESERVE_SV(ans = R_NilValue);
1141 |     RELEASE_SV(a2);
1142 |     return ans;
1143 | }
1144 | 
1145 | /*--------------------------------------------------------------------------*/
1146 | 
1147 | static SEXP TagArg(SEXP arg, SEXP tag, YYLTYPE *lloc)
1148 | {
1149 |     switch (TYPEOF(tag)) {
1150 |     case STRSXP:
1151 | 	tag = installTrChar(STRING_ELT(tag, 0));
1152 |     case NILSXP:
1153 |     case SYMSXP:
1154 | 	return lang2(arg, tag);
1155 |     default:
1156 | 	error(_("incorrect tag type at line %d"), lloc->first_line); return R_NilValue/* -Wall */;
1157 |     }
1158 | }
1159 | 
1160 | 
1161 | /* Stretchy List Structures : Lists are created and grown using a special */
1162 | /* dotted pair.  The CAR of the list points to the last cons-cell in the */
1163 | /* list and the CDR points to the first.  The list can be extracted from */
1164 | /* the pair by taking its CDR, while the CAR gives fast access to the end */
1165 | /* of the list. */
1166 | 
1167 | /* These functions must be called with arguments protected */
1168 | 
1169 | /* Create a stretchy-list dotted pair */
1170 | static SEXP NewList(void)
1171 | {
1172 |     SEXP s = CONS(R_NilValue, R_NilValue);
1173 |     SETCAR(s, s);
1174 |     return s;
1175 | }
1176 | 
1177 | /* Add a new element at the end of a stretchy list */
1178 | static void GrowList(SEXP l, SEXP s)
1179 | {
1180 |     SEXP tmp;
1181 |     tmp = CONS(s, R_NilValue);
1182 |     SETCDR(CAR(l), tmp);
1183 |     SETCAR(l, tmp);
1184 | }
1185 | 
1186 | /* Create a stretchy list with a single named element */
1187 | static SEXP FirstArg(SEXP s, SEXP tag)
1188 | {
1189 |     SEXP tmp;
1190 |     PROTECT(tmp = NewList());
1191 |     GrowList(tmp, s);
1192 |     SET_TAG(CAR(tmp), tag);
1193 |     UNPROTECT(1); /* tmp */
1194 |     return tmp;
1195 | }
1196 | 
1197 | /* Add named element to the end of a stretchy list */
1198 | static void NextArg(SEXP l, SEXP s, SEXP tag)
1199 | {
1200 |     GrowList(l, s);
1201 |     SET_TAG(CAR(l), tag);
1202 | }
1203 | 
1204 | /* SrcRefs (PS_SRCREFS) are represented as R_NilValue (empty) or by 
1205 |    a stretchy list (which includes another representation for empty)
1206 |    for fast append operation. */
1207 | 
1208 | static void SetSingleSrcRef(SEXP r)
1209 | {
1210 |     SEXP l;
1211 | 
1212 |     PROTECT(l = NewList());
1213 |     GrowList(l, r);
1214 |     PS_SET_SRCREFS(l);
1215 |     UNPROTECT(1); /* l */
1216 | }
1217 | 
1218 | static void AppendToSrcRefs(SEXP r)
1219 | {
1220 |     SEXP l = PS_SRCREFS;
1221 |     if (l == R_NilValue)
1222 | 	SetSingleSrcRef(r);
1223 |     else
1224 | 	GrowList(l, r);
1225 | }
1226 | 
1227 | static void PrependToSrcRefs(SEXP r)
1228 | {
1229 |     SEXP l = PS_SRCREFS;
1230 |     if (l == R_NilValue)
1231 | 	SetSingleSrcRef(r);
1232 |     else if (CDR(l) == R_NilValue)
1233 | 	/* adding to empty stretchy list */
1234 | 	GrowList(l, r);
1235 |     else {
1236 | 	SEXP tmp = CONS(r, CDR(l));
1237 | 	SETCDR(l, tmp);
1238 |     }
1239 | }
1240 | 
1241 | static SEXP SrcRefsToVectorList() {
1242 |     SEXP l = PS_SRCREFS;
1243 |     if (l == R_NilValue)
1244 | 	return PairToVectorList(l);
1245 |     else
1246 | 	return PairToVectorList(CDR(l));
1247 | }
1248 | 
1249 | /*--------------------------------------------------------------------------*/
1250 | 
1251 | /*
1252 |  *  Parsing Entry Points:
1253 |  *
1254 |  *  The Following entry points provide language parsing facilities.
1255 |  *  Note that there are separate entry points for parsing IoBuffers
1256 |  *  (i.e. interactve use), files and R character strings.
1257 |  *
1258 |  *  The entry points provide the same functionality, they just
1259 |  *  set things up in slightly different ways.
1260 |  *
1261 |  *  The following routines parse a single expression:
1262 |  *
1263 |  *
1264 |  *	SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status, Rboolean first)
1265 |  *   (used for R_ReplFile in main.c)
1266 |  *
1267 |  *	SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status, Rboolean first)
1268 |  *   (used for ReplIteration and R_ReplDLLdo1 in main.c)
1269 |  *
1270 |  *  The success of the parse is indicated as folllows:
1271 |  *
1272 |  *
1273 |  *	status = PARSE_NULL       - there was no statement to parse
1274 |  *		 PARSE_OK	  - complete statement
1275 |  *		 PARSE_INCOMPLETE - incomplete statement
1276 |  *		 PARSE_ERROR      - syntax error
1277 |  *		 PARSE_EOF	  - end of file
1278 |  *
1279 |  *
1280 |  *  The following routines parse several expressions and return
1281 |  *  their values in a single expression vector.
1282 |  *
1283 |  *	SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile)
1284 |  *    (used for do_edit in file edit.c)
1285 |  *
1286 |  *	SEXP R_ParseVector(SEXP *text, int n, ParseStatus *status, SEXP srcfile)
1287 |  *    (public, and used by parse(text=) in file source.c)
1288 |  *
1289 |  *	SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, SEXP srcfile)
1290 |  *    (used by parse(file="") in file source.c)
1291 |  *
1292 |  *      SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile)
1293 |  *    (used by parse(file=) in file source.c)
1294 |  *
1295 |  *  Here, status is 1 for a successful parse and 0 if parsing failed
1296 |  *  for some reason.
1297 |  */
1298 | 
1299 | #define CONTEXTSTACK_SIZE 50
1300 | static int	SavedToken;
1301 | static SEXP	SavedLval;
1302 | static char	contextstack[CONTEXTSTACK_SIZE], *contextp;
1303 | 
1304 | static void PutSrcRefState(SrcRefState *state);
1305 | static void UseSrcRefState(SrcRefState *state);
1306 | 
1307 | /* This is called once when R starts up. */
1308 | attribute_hidden
1309 | void InitParser(void)
1310 | {
1311 |     ParseState.sexps = allocVector(VECSXP, 7); /* initialized to R_NilValue */
1312 |     ParseState.data = R_NilValue;
1313 |     INIT_SVS();
1314 |     R_PreserveObject(ParseState.sexps); /* never released in an R session */
1315 |     R_NullSymbol = install("NULL");
1316 | }
1317 | 
1318 | static void FinalizeSrcRefStateOnError(void *dummy)
1319 | {
1320 |     R_FinalizeSrcRefState();
1321 | }
1322 | 
1323 | /* This is called each time a new parse sequence begins */
1324 | attribute_hidden
1325 | void R_InitSrcRefState(RCNTXT* cptr)
1326 | {
1327 |     if (busy) {
1328 |     	SrcRefState *prev = malloc(sizeof(SrcRefState));
1329 | 	if (prev == NULL)
1330 | 	    error(_("allocation of source reference state failed"));
1331 |     	PutSrcRefState(prev);
1332 | 	ParseState.prevState = prev;
1333 | 	ParseState.sexps = allocVector(VECSXP, 7);
1334 | 	ParseState.data = R_NilValue;
1335 | 	INIT_SVS();
1336 | 	R_PreserveObject(ParseState.sexps);
1337 | 	/* ParseState.sexps released in R_FinalizeSrcRefState */
1338 |     } else
1339 | 	/* re-use data, text, ids arrays */
1340 |         ParseState.prevState = NULL;
1341 |     /* set up context _after_ PutSrcRefState */
1342 |     begincontext(cptr, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
1343 |                  R_NilValue, R_NilValue);
1344 |     cptr->cend = &FinalizeSrcRefStateOnError;
1345 |     cptr->cenddata = NULL;
1346 |     ParseState.keepSrcRefs = FALSE;
1347 |     ParseState.keepParseData = TRUE;
1348 |     ParseState.didAttach = FALSE;
1349 |     PS_SET_SRCFILE(R_NilValue);
1350 |     PS_SET_ORIGINAL(R_NilValue);
1351 |     ParseState.data_count = 0;
1352 |     ParseState.xxlineno = 1;
1353 |     ParseState.xxcolno = 0;
1354 |     ParseState.xxbyteno = 0;
1355 |     ParseState.xxparseno = 1;
1356 |     busy = TRUE;
1357 | }
1358 | 
1359 | attribute_hidden
1360 | void R_FinalizeSrcRefState(void)
1361 | {
1362 |     PS_SET_SRCFILE(R_NilValue);
1363 |     PS_SET_ORIGINAL(R_NilValue);
1364 |     CLEAR_SVS();
1365 | 
1366 |     /* Free the data, text and ids if we are restoring a previous state,
1367 |        or if they have grown too large */
1368 |     if (PS_DATA != R_NilValue) {
1369 |     	if (ParseState.prevState || DATA_COUNT > MAX_DATA_COUNT) {
1370 | 	    PS_SET_DATA(R_NilValue);
1371 | 	    PS_SET_TEXT(R_NilValue);
1372 | 	} else /* Remove all the strings from the text vector so they don't take up memory, and clean up data */
1373 | 	    for (int i=0; i < ParseState.data_count; i++) {
1374 | 		SET_STRING_ELT(PS_TEXT, i, R_BlankString);
1375 | 		_PARENT(i) = 0;
1376 | 	    }
1377 |     } 
1378 |     if (PS_IDS != R_NilValue) {
1379 | 	if (ParseState.prevState || ID_COUNT > MAX_DATA_COUNT) {
1380 | 	    PS_SET_IDS(R_NilValue);
1381 |         } else {/* Remove the parent records */
1382 |             if (identifier > ID_COUNT) identifier = ID_COUNT;
1383 |             for (int i=0; i < identifier; i++) {
1384 | 		ID_ID(i) = 0;
1385 | 	        ID_PARENT(i) = 0;
1386 | 	    }
1387 | 	}
1388 |     }
1389 |     ParseState.data_count = NA_INTEGER;
1390 |     if (ParseState.prevState) {
1391 | 	R_ReleaseObject(ParseState.sexps);
1392 |         SrcRefState *prev = ParseState.prevState;
1393 |     	UseSrcRefState(prev);
1394 |     	free(prev);
1395 |     } else
1396 |         busy = FALSE;
1397 | }
1398 | 
1399 | static void UseSrcRefState(SrcRefState *state)
1400 | {
1401 |     ParseState.keepSrcRefs = state->keepSrcRefs;
1402 |     ParseState.keepParseData = state->keepParseData;
1403 |     ParseState.sexps = state->sexps;
1404 |     ParseState.data = state->data;
1405 |     ParseState.data_count = state->data_count;
1406 |     ParseState.xxlineno = state->xxlineno;
1407 |     ParseState.xxcolno = state->xxcolno;
1408 |     ParseState.xxbyteno = state->xxbyteno;
1409 |     ParseState.xxparseno = state->xxparseno;
1410 |     ParseState.prevState = state->prevState;
1411 |     busy = TRUE;
1412 | }
1413 | 
1414 | static void PutSrcRefState(SrcRefState *state)
1415 | {
1416 |     state->keepSrcRefs = ParseState.keepSrcRefs;
1417 |     state->keepParseData = ParseState.keepParseData;
1418 |     state->sexps = ParseState.sexps;
1419 |     state->data = ParseState.data;
1420 |     state->data_count = ParseState.data_count;
1421 |     state->xxlineno = ParseState.xxlineno;
1422 |     state->xxcolno = ParseState.xxcolno;
1423 |     state->xxbyteno = ParseState.xxbyteno;
1424 |     state->xxparseno = ParseState.xxparseno;
1425 |     state->prevState = ParseState.prevState;
1426 | }
1427 | 
1428 | static void ParseInit(void)
1429 | {
1430 |     contextp = contextstack;
1431 |     *contextp = ' ';
1432 |     SavedToken = 0;
1433 |     SavedLval = R_NilValue;
1434 |     EatLines = 0;
1435 |     EndOfFile = 0;
1436 |     xxcharcount = 0;
1437 |     npush = 0;
1438 | }
1439 | 
1440 | static void initData(void)
1441 | {
1442 |     ParseState.data_count = 0 ;
1443 | }
1444 | 
1445 | 
1446 | static void ParseContextInit(void)
1447 | {
1448 |     R_ParseContextLast = 0;
1449 |     R_ParseContext[0] = '\0';
1450 |     
1451 |     /* starts the identifier counter*/
1452 |     initId();
1453 |     initData();
1454 | }
1455 | 
1456 | static SEXP R_Parse1(ParseStatus *status)
1457 | {
1458 |     switch(yyparse()) {
1459 |     case 0:                     /* End of file */
1460 | 	*status = PARSE_EOF;
1461 | 	if (EndOfFile == 2) *status = PARSE_INCOMPLETE;
1462 | 	break;
1463 |     case 1:                     /* Syntax error / incomplete */
1464 | 	*status = PARSE_ERROR;
1465 | 	if (EndOfFile) *status = PARSE_INCOMPLETE;
1466 | 	break;
1467 |     case 2:                     /* Empty Line */
1468 | 	*status = PARSE_NULL;
1469 | 	break;
1470 |     case 3:                     /* Valid expr '\n' terminated */
1471 |     case 4:                     /* Valid expr ';' terminated */
1472 | 	*status = PARSE_OK;
1473 | 	break;
1474 |     }
1475 |     return R_CurrentExpr;
1476 | }
1477 | 
1478 | static FILE *fp_parse;
1479 | 
1480 | static int file_getc(void)
1481 | {
1482 |     return R_fgetc(fp_parse);
1483 | }
1484 | 
1485 | /* used in main.c */
1486 | attribute_hidden
1487 | SEXP R_Parse1File(FILE *fp, int gencode, ParseStatus *status)
1488 | {
1489 |     ParseInit();
1490 |     ParseContextInit();
1491 |     GenerateCode = gencode;
1492 |     fp_parse = fp;
1493 |     ptr_getc = file_getc;
1494 |     R_Parse1(status);
1495 |     CLEAR_SVS();
1496 |     return R_CurrentExpr;
1497 | }
1498 | 
1499 | static IoBuffer *iob;
1500 | 
1501 | static int buffer_getc(void)
1502 | {
1503 |     return R_IoBufferGetc(iob);
1504 | }
1505 | 
1506 | /* Used only in main.c */
1507 | attribute_hidden
1508 | SEXP R_Parse1Buffer(IoBuffer *buffer, int gencode, ParseStatus *status)
1509 | {
1510 |     Rboolean keepSource = FALSE; 
1511 |     RCNTXT cntxt;
1512 | 
1513 |     R_InitSrcRefState(&cntxt);
1514 |     if (gencode) {
1515 |     	keepSource = asLogical(GetOption1(install("keep.source")));
1516 |     	if (keepSource) {
1517 |     	    ParseState.keepSrcRefs = TRUE;
1518 | 	    ParseState.keepParseData =
1519 | 		asLogical(GetOption1(install("keep.parse.data")));
1520 | 	    PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv));
1521 | 	    PS_SET_ORIGINAL(PS_SRCFILE);
1522 | 	    PS_SET_SRCREFS(R_NilValue);
1523 | 	}
1524 |     }
1525 |     ParseInit();
1526 |     ParseContextInit();
1527 |     GenerateCode = gencode;
1528 |     iob = buffer;
1529 |     ptr_getc = buffer_getc;
1530 |     R_Parse1(status);
1531 |     if (gencode && keepSource) {
1532 |     	if (ParseState.didAttach) {
1533 |    	    int buflen = R_IoBufferReadOffset(buffer);
1534 |    	    char buf[buflen+1];
1535 |    	    SEXP class;
1536 |    	    R_IoBufferReadReset(buffer);
1537 |    	    for (int i=0; i<buflen; i++)
1538 |    	    	buf[i] = (char) R_IoBufferGetc(buffer);
1539 | 
1540 |    	    buf[buflen] = 0;
1541 | 	    SEXP s_filename = install("filename");
1542 | 	    defineVar(s_filename, ScalarString(mkChar("")), PS_ORIGINAL);
1543 | 	    SEXP s_lines = install("lines");
1544 | 	    defineVar(s_lines, ScalarString(mkChar(buf)), PS_ORIGINAL);
1545 |     	    PROTECT(class = allocVector(STRSXP, 2));
1546 |             SET_STRING_ELT(class, 0, mkChar("srcfilecopy"));
1547 |             SET_STRING_ELT(class, 1, mkChar("srcfile"));
1548 | 	    setAttrib(PS_ORIGINAL, R_ClassSymbol, class);
1549 | 	    UNPROTECT(1); /* class */
1550 | 	}
1551 |     }
1552 |     PROTECT(R_CurrentExpr);
1553 |     endcontext(&cntxt);
1554 |     R_FinalizeSrcRefState();
1555 |     UNPROTECT(1); /* R_CurrentExpr */
1556 |     return R_CurrentExpr;
1557 | }
1558 | 
1559 | static TextBuffer *txtb;
1560 | 
1561 | static int text_getc(void)
1562 | {
1563 |     return R_TextBufferGetc(txtb);
1564 | }
1565 | 
1566 | static SEXP R_Parse(int n, ParseStatus *status, SEXP srcfile)
1567 | {
1568 |     int i;
1569 |     SEXP t, rval;
1570 |     RCNTXT cntxt;
1571 | 
1572 |     R_InitSrcRefState(&cntxt);
1573 |     ParseContextInit();
1574 | 
1575 |     PS_SET_SRCFILE(srcfile);
1576 |     PS_SET_ORIGINAL(srcfile);
1577 |     
1578 |     if (isEnvironment(srcfile)) {
1579 |     	ParseState.keepSrcRefs = TRUE;
1580 | 	ParseState.keepParseData =
1581 | 	    asLogical(GetOption1(install("keep.parse.data")));
1582 | 	PS_SET_SRCREFS(R_NilValue);
1583 |     }
1584 |     
1585 |     PROTECT(t = NewList());
1586 |     for(i = 0; ; ) {
1587 | 	if(n >= 0 && i >= n) break;
1588 | 	ParseInit();
1589 | 	rval = R_Parse1(status);
1590 | 	switch(*status) {
1591 | 	case PARSE_NULL:
1592 | 	    break;
1593 | 	case PARSE_OK:
1594 | 	    PROTECT(rval);
1595 | 	    GrowList(t, rval);
1596 | 	    UNPROTECT(1); /* rval */
1597 | 	    i++;
1598 | 	    break;
1599 | 	case PARSE_INCOMPLETE:
1600 | 	case PARSE_ERROR:
1601 | 	    UNPROTECT(1); /* t */
1602 | 	    if (ParseState.keepSrcRefs && ParseState.keepParseData)
1603 | 	        finalizeData();
1604 | 	    endcontext(&cntxt);
1605 | 	    R_FinalizeSrcRefState();	    
1606 | 	    return R_NilValue;
1607 | 	    break;
1608 | 	case PARSE_EOF:
1609 | 	    goto finish;
1610 | 	    break;
1611 | 	}
1612 |     }
1613 | 
1614 | finish:
1615 | 
1616 |     t = CDR(t);
1617 |     PROTECT(rval = allocVector(EXPRSXP, length(t)));
1618 |     for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t))
1619 | 	SET_VECTOR_ELT(rval, n, CAR(t));
1620 |     if (ParseState.keepSrcRefs) {
1621 | 	if (ParseState.keepParseData)
1622 | 	    finalizeData();
1623 | 	attachSrcrefs(rval);
1624 |     }
1625 |     UNPROTECT(2); /* t, rval */
1626 |     PROTECT(rval);
1627 |     endcontext(&cntxt);
1628 |     R_FinalizeSrcRefState();
1629 |     UNPROTECT(1); /* rval */
1630 |     *status = PARSE_OK;
1631 |     return rval;
1632 | }
1633 | 
1634 | /* used in edit.c */
1635 | attribute_hidden
1636 | SEXP R_ParseFile(FILE *fp, int n, ParseStatus *status, SEXP srcfile)
1637 | {
1638 |     GenerateCode = 1;
1639 |     fp_parse = fp;
1640 |     ptr_getc = file_getc;
1641 |     return R_Parse(n, status, srcfile);
1642 | }
1643 | 
1644 | #include "Rconnections.h"
1645 | static Rconnection con_parse;
1646 | 
1647 | /* need to handle incomplete last line */
1648 | static int con_getc(void)
1649 | {
1650 |     int c;
1651 |     static int last=-1000;
1652 | 
1653 |     c = Rconn_fgetc(con_parse);
1654 |     if (c == EOF && last != '\n') c = '\n';
1655 |     return (last = c);
1656 | }
1657 | 
1658 | /* used in source.c */
1659 | attribute_hidden
1660 | SEXP R_ParseConn(Rconnection con, int n, ParseStatus *status, SEXP srcfile)
1661 | {
1662 |     GenerateCode = 1;
1663 |     con_parse = con;
1664 |     ptr_getc = con_getc;
1665 |     return R_Parse(n, status, srcfile);
1666 | }
1667 | 
1668 | /* This one is public, and used in source.c */
1669 | SEXP R_ParseVector(SEXP text, int n, ParseStatus *status, SEXP srcfile)
1670 | {
1671 |     SEXP rval;
1672 |     TextBuffer textb;
1673 |     R_TextBufferInit(&textb, text);
1674 |     txtb = &textb;
1675 |     GenerateCode = 1;
1676 |     ptr_getc = text_getc;
1677 |     rval = R_Parse(n, status, srcfile);
1678 |     R_TextBufferFree(&textb);
1679 |     return rval;
1680 | }
1681 | 
1682 | static const char *Prompt(SEXP prompt, int type)
1683 | {
1684 |     if(type == 1) {
1685 | 	if(length(prompt) <= 0) {
1686 | 	    return CHAR(STRING_ELT(GetOption1(install("prompt")), 0));
1687 | 	}
1688 | 	else
1689 | 	    return CHAR(STRING_ELT(prompt, 0));
1690 |     }
1691 |     else {
1692 | 	return CHAR(STRING_ELT(GetOption1(install("continue")), 0));
1693 |     }
1694 | }
1695 | 
1696 | /* used in source.c */
1697 | attribute_hidden
1698 | SEXP R_ParseBuffer(IoBuffer *buffer, int n, ParseStatus *status, SEXP prompt, 
1699 | 		   SEXP srcfile)
1700 | {
1701 |     SEXP rval, t;
1702 |     char *bufp, buf[CONSOLE_BUFFER_SIZE];
1703 |     int c, i, prompt_type = 1;
1704 |     RCNTXT cntxt;
1705 | 
1706 |     R_IoBufferWriteReset(buffer);
1707 |     buf[0] = '\0';
1708 |     bufp = buf;
1709 |     R_InitSrcRefState(&cntxt);
1710 |     ParseContextInit();
1711 |     
1712 |     GenerateCode = 1;
1713 |     iob = buffer;
1714 |     ptr_getc = buffer_getc;
1715 | 
1716 |     PS_SET_SRCFILE(srcfile);
1717 |     PS_SET_ORIGINAL(srcfile);
1718 |     
1719 |     if (isEnvironment(srcfile)) {
1720 |     	ParseState.keepSrcRefs = TRUE;
1721 | 	ParseState.keepParseData =
1722 | 	    asLogical(GetOption1(install("keep.parse.data")));
1723 | 	PS_SET_SRCREFS(R_NilValue);
1724 |     }
1725 |     
1726 |     PROTECT(t = NewList());
1727 |     for(i = 0; ; ) {
1728 | 	if(n >= 0 && i >= n) break;
1729 | 	if (!*bufp) {
1730 | 	    if(R_ReadConsole((char *) Prompt(prompt, prompt_type),
1731 | 			     (unsigned char *)buf, CONSOLE_BUFFER_SIZE, 1) == 0)
1732 | 		goto finish;
1733 | 	    bufp = buf;
1734 | 	}
1735 | 	while ((c = *bufp++)) {
1736 | 	    R_IoBufferPutc(c, buffer);
1737 | 	    if (c == ';' || c == '\n') break;
1738 | 	}
1739 | 
1740 | 	/* Was a call to R_Parse1Buffer, but we don't want to reset
1741 | 	   xxlineno and xxcolno */
1742 | 	ParseInit();
1743 | 	/* Not calling ParseContextInit() as it resets parse data, and
1744 | 	   to be consistent with R_Parse */
1745 | 	R_Parse1(status);
1746 | 	rval = R_CurrentExpr;
1747 | 
1748 | 	switch(*status) {
1749 | 	case PARSE_NULL:
1750 | 	    break;
1751 | 	case PARSE_OK:
1752 | 	    PROTECT(rval);
1753 | 	    GrowList(t, rval);
1754 | 	    UNPROTECT(1); /* rval */
1755 | 	    i++;
1756 | 	    break;
1757 | 	case PARSE_INCOMPLETE:
1758 | 	case PARSE_ERROR:
1759 | 	    UNPROTECT(1); /* t */
1760 | 	    R_IoBufferWriteReset(buffer);
1761 | 	    endcontext(&cntxt);
1762 | 	    R_FinalizeSrcRefState();
1763 | 	    return R_NilValue;
1764 | 	    break;
1765 | 	case PARSE_EOF:
1766 | 	    goto finish;
1767 | 	    break;
1768 | 	}
1769 |     }
1770 | finish:
1771 |     R_IoBufferWriteReset(buffer);
1772 |     t = CDR(t);
1773 |     PROTECT(rval = allocVector(EXPRSXP, length(t)));
1774 |     for (n = 0 ; n < LENGTH(rval) ; n++, t = CDR(t))
1775 | 	SET_VECTOR_ELT(rval, n, CAR(t));
1776 |     if (ParseState.keepSrcRefs) {
1777 | 	if (ParseState.keepParseData)
1778 | 	    finalizeData();
1779 | 	attachSrcrefs(rval);
1780 |     }
1781 |     UNPROTECT(2); /* t, rval */
1782 |     PROTECT(rval);
1783 |     endcontext(&cntxt);
1784 |     R_FinalizeSrcRefState();
1785 |     UNPROTECT(1); /* rval */
1786 |     *status = PARSE_OK;
1787 |     return rval;
1788 | }
1789 | 
1790 | 
1791 | /*----------------------------------------------------------------------------
1792 |  *
1793 |  *  The Lexical Analyzer:
1794 |  *
1795 |  *  Basic lexical analysis is performed by the following
1796 |  *  routines.  Input is read a line at a time, and, if the
1797 |  *  program is in batch mode, each input line is echoed to
1798 |  *  standard output after it is read.
1799 |  *
1800 |  *  The function yylex() scans the input, breaking it into
1801 |  *  tokens which are then passed to the parser.  The lexical
1802 |  *  analyser maintains a symbol table (in a very messy fashion).
1803 |  *
1804 |  *  The fact that if statements need to parse differently
1805 |  *  depending on whether the statement is being interpreted or
1806 |  *  part of the body of a function causes the need for ifpop
1807 |  *  and IfPush.  When an if statement is encountered an 'i' is
1808 |  *  pushed on a stack (provided there are parentheses active).
1809 |  *  At later points this 'i' needs to be popped off of the if
1810 |  *  stack.
1811 |  *
1812 |  */
1813 | 
1814 | static void IfPush(void)
1815 | {
1816 |     if (*contextp==LBRACE ||
1817 | 	*contextp=='['    ||
1818 | 	*contextp=='('    ||
1819 | 	*contextp == 'i') {
1820 | 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
1821 | 	    error(_("contextstack overflow"));
1822 | 	*++contextp = 'i';
1823 |     }
1824 | 
1825 | }
1826 | 
1827 | static void ifpop(void)
1828 | {
1829 |     if (*contextp=='i')
1830 | 	*contextp-- = 0;
1831 | }
1832 | 
1833 | /* This is only called following ., so we only care if it is
1834 |    an ANSI digit or not */
1835 | static int typeofnext(void)
1836 | {
1837 |     int k, c;
1838 | 
1839 |     c = xxgetc();
1840 |     if (isdigit(c)) k = 1; else k = 2;
1841 |     xxungetc(c);
1842 |     return k;
1843 | }
1844 | 
1845 | static int nextchar(int expect)
1846 | {
1847 |     int c = xxgetc();
1848 |     if (c == expect)
1849 | 	return 1;
1850 |     else
1851 | 	xxungetc(c);
1852 |     return 0;
1853 | }
1854 | 
1855 | /* Special Symbols */
1856 | /* Syntactic Keywords + Symbolic Constants */
1857 | 
1858 | struct {
1859 |     char *name;
1860 |     int token;
1861 | }
1862 | static keywords[] = {
1863 |     { "NULL",	    NULL_CONST },
1864 |     { "NA",	    NUM_CONST  },
1865 |     { "TRUE",	    NUM_CONST  },
1866 |     { "FALSE",	    NUM_CONST  },
1867 |     { "Inf",	    NUM_CONST  },
1868 |     { "NaN",	    NUM_CONST  },
1869 |     { "NA_integer_", NUM_CONST  },
1870 |     { "NA_real_",    NUM_CONST  },
1871 |     { "NA_character_", NUM_CONST  },
1872 |     { "NA_complex_", NUM_CONST  },
1873 |     { "function",   FUNCTION   },
1874 |     { "while",	    WHILE      },
1875 |     { "repeat",	    REPEAT     },
1876 |     { "for",	    FOR	       },
1877 |     { "if",	    IF	       },
1878 |     { "in",	    IN	       },
1879 |     { "else",	    ELSE       },
1880 |     { "next",	    NEXT       },
1881 |     { "break",	    BREAK      },
1882 |     { "...",	    SYMBOL     },
1883 |     { 0,	    0	       }
1884 | };
1885 | 
1886 | /* KeywordLookup has side effects, it sets yylval */
1887 | 
1888 | static int KeywordLookup(const char *s)
1889 | {
1890 |     int i;
1891 |     for (i = 0; keywords[i].name; i++) {
1892 | 	if (strcmp(keywords[i].name, s) == 0) {
1893 | 	    switch (keywords[i].token) {
1894 | 	    case NULL_CONST:
1895 | 		PRESERVE_SV(yylval = R_NilValue);
1896 | 		break;
1897 | 	    case NUM_CONST:
1898 | 		if(GenerateCode) {
1899 | 		    switch(i) {
1900 | 		    case 1:
1901 | 			PRESERVE_SV(yylval = mkNA());
1902 | 			break;
1903 | 		    case 2:
1904 | 			PRESERVE_SV(yylval = mkTrue());
1905 | 			break;
1906 | 		    case 3:
1907 | 			PRESERVE_SV(yylval = mkFalse());
1908 | 			break;
1909 | 		    case 4:
1910 | 			PRESERVE_SV(yylval = allocVector(REALSXP, 1));
1911 | 			REAL(yylval)[0] = R_PosInf;
1912 | 			break;
1913 | 		    case 5:
1914 | 			PRESERVE_SV(yylval = allocVector(REALSXP, 1));
1915 | 			REAL(yylval)[0] = R_NaN;
1916 | 			break;
1917 | 		    case 6:
1918 | 			PRESERVE_SV(yylval = allocVector(INTSXP, 1));
1919 | 			INTEGER(yylval)[0] = NA_INTEGER;
1920 | 			break;
1921 | 		    case 7:
1922 | 			PRESERVE_SV(yylval = allocVector(REALSXP, 1));
1923 | 			REAL(yylval)[0] = NA_REAL;
1924 | 			break;
1925 | 		    case 8:
1926 | 			PRESERVE_SV(yylval = allocVector(STRSXP, 1));
1927 | 			SET_STRING_ELT(yylval, 0, NA_STRING);
1928 | 			break;
1929 | 		    case 9:
1930 | 			PRESERVE_SV(yylval = allocVector(CPLXSXP, 1));
1931 | 			COMPLEX(yylval)[0].r = COMPLEX(yylval)[0].i = NA_REAL;
1932 | 			break;
1933 | 		    }
1934 | 		} else
1935 | 		    PRESERVE_SV(yylval = R_NilValue);
1936 | 		break;
1937 | 	    case FUNCTION:
1938 | 	    case WHILE:
1939 | 	    case REPEAT:
1940 | 	    case FOR:
1941 | 	    case IF:
1942 | 	    case NEXT:
1943 | 	    case BREAK:
1944 | 		yylval = install(s);
1945 | 		break;
1946 | 	    case IN:
1947 | 	    case ELSE:
1948 | 		break;
1949 | 	    case SYMBOL:
1950 | 		PRESERVE_SV(yylval = install(s));
1951 | 		break;
1952 | 	    }
1953 | 	    return keywords[i].token;
1954 | 	}
1955 |     }
1956 |     return 0;
1957 | }
1958 | 
1959 | static SEXP mkFloat(const char *s)
1960 | {
1961 |     return ScalarReal(R_atof(s));
1962 | }
1963 | 
1964 | static SEXP mkInt(const char *s)
1965 | {
1966 |     double f = R_atof(s);  /* or R_strtol? */
1967 |     return ScalarInteger((int) f);
1968 | }
1969 | 
1970 | static SEXP mkComplex(const char *s)
1971 | {
1972 |     SEXP t = R_NilValue;
1973 |     double f;
1974 |     f = R_atof(s); /* FIXME: make certain the value is legitimate. */
1975 |     t = allocVector(CPLXSXP, 1);
1976 |     COMPLEX(t)[0].r = 0;
1977 |     COMPLEX(t)[0].i = f;
1978 |     return t;
1979 | }
1980 | 
1981 | static SEXP mkNA(void)
1982 | {
1983 |     SEXP t = allocVector(LGLSXP, 1);
1984 |     LOGICAL(t)[0] = NA_LOGICAL;
1985 |     return t;
1986 | }
1987 | 
1988 | attribute_hidden
1989 | SEXP mkTrue(void)
1990 | {
1991 |     SEXP s = allocVector(LGLSXP, 1);
1992 |     LOGICAL(s)[0] = 1;
1993 |     return s;
1994 | }
1995 | 
1996 | SEXP mkFalse(void)
1997 | {
1998 |     SEXP s = allocVector(LGLSXP, 1);
1999 |     LOGICAL(s)[0] = 0;
2000 |     return s;
2001 | }
2002 | 
2003 | static void yyerror(const char *s)
2004 | {
2005 |     static const char *const yytname_translations[] =
2006 |     {
2007 |     /* the left column are strings coming from bison, the right
2008 |        column are translations for users.
2009 |        The first YYENGLISH from the right column are English to be translated,
2010 |        the rest are to be copied literally.  The #if 0 block below allows xgettext
2011 |        to see these.
2012 |     */
2013 | #define YYENGLISH 8
2014 | 	"$undefined",	"input",
2015 | 	"END_OF_INPUT",	"end of input",
2016 | 	"ERROR",	"input",
2017 | 	"STR_CONST",	"string constant",
2018 | 	"NUM_CONST",	"numeric constant",
2019 | 	"SYMBOL",	"symbol",
2020 | 	"LEFT_ASSIGN",	"assignment",
2021 | 	"'\\n'",	"end of line",
2022 | 	"NULL_CONST",	"'NULL'",
2023 | 	"FUNCTION",	"'function'",
2024 | 	"EQ_ASSIGN",	"'='",
2025 | 	"RIGHT_ASSIGN",	"'->'",
2026 | 	"ANON",		"'=>'",
2027 | 	"LBB",		"'[['",
2028 | 	"FOR",		"'for'",
2029 | 	"IN",		"'in'",
2030 | 	"IF",		"'if'",
2031 | 	"ELSE",		"'else'",
2032 | 	"WHILE",	"'while'",
2033 | 	"NEXT",		"'next'",
2034 | 	"BREAK",	"'break'",
2035 | 	"REPEAT",	"'repeat'",
2036 | 	"GT",		"'>'",
2037 | 	"GE",		"'>='",
2038 | 	"LT",		"'<'",
2039 | 	"LE",		"'<='",
2040 | 	"EQ",		"'=='",
2041 | 	"NE",		"'!='",
2042 | 	"AND",		"'&'",
2043 | 	"OR",		"'|'",
2044 | 	"AND2",		"'&&'",
2045 | 	"OR2",		"'||'",
2046 | 	"NS_GET",	"'::'",
2047 | 	"NS_GET_INT",	"':::'",
2048 | 	0
2049 |     };
2050 |     static char const yyunexpected[] = "syntax error, unexpected ";
2051 |     static char const yyexpecting[] = ", expecting ";
2052 |     char *expecting;
2053 | 
2054 |     R_ParseError     = yylloc.first_line;
2055 |     R_ParseErrorCol  = yylloc.first_column;
2056 |     R_ParseErrorFile = PS_SRCFILE;
2057 | 
2058 |     if (!strncmp(s, yyunexpected, sizeof yyunexpected -1)) {
2059 | 	int i;
2060 | 	/* Edit the error message */
2061 | 	expecting = strstr(s + sizeof yyunexpected -1, yyexpecting);
2062 | 	if (expecting) *expecting = '\0';
2063 | 	for (i = 0; yytname_translations[i]; i += 2) {
2064 | 	    if (!strcmp(s + sizeof yyunexpected - 1, yytname_translations[i])) {
2065 |                 switch(i/2)
2066 |                 {
2067 |                 case 0:
2068 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input"));
2069 |                                 break;
2070 |                 case 1:
2071 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of input"));
2072 |                                 break;
2073 |                 case 2:
2074 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected input"));
2075 |                                 break;
2076 |                 case 3:
2077 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected string constant"));
2078 |                                 break;
2079 |                 case 4:
2080 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected numeric constant"));
2081 |                                 break;
2082 |                 case 5:
2083 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected symbol"));
2084 |                                 break;
2085 |                 case 6:
2086 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected assignment"));
2087 |                                 break;
2088 |                 case 7:
2089 |                         snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected end of line"));
2090 |                                 break;
2091 |                 default:
2092 |                   snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE, _("unexpected %s"),
2093 |                            yytname_translations[i+1]);
2094 |                                 break;
2095 |                 }
2096 |                 
2097 | 		return;
2098 | 	    }
2099 | 	}
2100 | 	snprintf(R_ParseErrorMsg, PARSE_ERROR_SIZE - 1, _("unexpected %s"),
2101 |                  s + sizeof yyunexpected - 1);
2102 |     } else {
2103 | 	strncpy(R_ParseErrorMsg, s, PARSE_ERROR_SIZE - 1);
2104 |         R_ParseErrorMsg[PARSE_ERROR_SIZE - 1] = '\0';
2105 |     }
2106 | }
2107 | 
2108 | static void CheckFormalArgs(SEXP formlist, SEXP _new, YYLTYPE *lloc)
2109 | {
2110 |     while (formlist != R_NilValue) {
2111 | 	if (TAG(formlist) == _new) {
2112 | 	    error(_("repeated formal argument '%s' on line %d"), EncodeChar(PRINTNAME(_new)),
2113 | 								 lloc->first_line);
2114 | 	}
2115 | 	formlist = CDR(formlist);
2116 |     }
2117 | }
2118 | 
2119 | /* This is used as the buffer for NumericValue, SpecialValue and
2120 |    SymbolValue.  None of these could conceivably need 8192 bytes.
2121 | 
2122 |    It has not been used as the buffer for input character strings
2123 |    since Oct 2007 (released as 2.7.0), and for comments since 2.8.0
2124 |  */
2125 | static char yytext[MAXELTSIZE];
2126 | 
2127 | static int SkipSpace(void)
2128 | {
2129 |     int c;
2130 |     static wctype_t blankwct = 0;
2131 | 
2132 |     if (!blankwct)
2133 | 	blankwct = Ri18n_wctype("blank");
2134 | 
2135 | #ifdef Win32
2136 |     if(!mbcslocale) { /* 0xa0 is NBSP in all 8-bit Windows locales */
2137 | 	while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f' ||
2138 | 	       (unsigned int) c == 0xa0) ;
2139 | 	return c;
2140 |     } else {
2141 | 	int i, clen;
2142 | 	wchar_t wc;
2143 | 	while (1) {
2144 | 	    c = xxgetc();
2145 | 	    if (c == ' ' || c == '\t' || c == '\f') continue;
2146 | 	    if (c == '\n' || c == R_EOF) break;
2147 | 	    if ((unsigned int) c < 0x80) break;
2148 | 	    clen = mbcs_get_next(c, &wc);  /* always 2 */
2149 | 	    if(! Ri18n_iswctype(wc, blankwct) ) break;
2150 | 	    for(i = 1; i < clen; i++) c = xxgetc();
2151 | 	}
2152 | 	return c;
2153 |     }
2154 | #endif
2155 | #if defined(__STDC_ISO_10646__)
2156 |     if(mbcslocale) { /* wctype functions need Unicode wchar_t */
2157 | 	int i, clen;
2158 | 	wchar_t wc;
2159 | 	while (1) {
2160 | 	    c = xxgetc();
2161 | 	    if (c == ' ' || c == '\t' || c == '\f') continue;
2162 | 	    if (c == '\n' || c == R_EOF) break;
2163 | 	    if ((unsigned int) c < 0x80) break;
2164 | 	    clen = mbcs_get_next(c, &wc);
2165 | 	    if(! Ri18n_iswctype(wc, blankwct) ) break;
2166 | 	    for(i = 1; i < clen; i++) c = xxgetc();
2167 | 	}
2168 |     } else
2169 | #endif
2170 | 	while ((c = xxgetc()) == ' ' || c == '\t' || c == '\f') ;
2171 |     return c;
2172 | }
2173 | 
2174 | /* Note that with interactive use, EOF cannot occur inside */
2175 | /* a comment.  However, semicolons inside comments make it */
2176 | /* appear that this does happen.  For this reason we use the */
2177 | /* special assignment EndOfFile=2 to indicate that this is */
2178 | /* going on.  This is detected and dealt with in Parse1Buffer. */
2179 | 
2180 | static int SkipComment(void)
2181 | {
2182 |     int c='#', i;
2183 |     
2184 |     /* locations before the # character was read */
2185 |     int _first_column = ParseState.xxcolno ;
2186 |     int _first_parsed = ParseState.xxparseno ;
2187 |     int type = COMMENT ;
2188 | 
2189 |     Rboolean maybeLine = (ParseState.xxcolno == 1);
2190 |     Rboolean doSave;
2191 | 
2192 |     DECLARE_YYTEXT_BUFP(yyp);
2193 |     
2194 |     if (maybeLine) {
2195 |     	char lineDirective[] = "#line";
2196 |     	YYTEXT_PUSH(c, yyp);
2197 |     	for (i=1; i<5; i++) {
2198 |     	    c = xxgetc();
2199 |   	    if (c != (int)(lineDirective[i])) {
2200 |   	    	maybeLine = FALSE;
2201 |   	    	break;
2202 |   	    }
2203 |             YYTEXT_PUSH(c, yyp);
2204 |   	}
2205 |   	if (maybeLine)     
2206 | 	    c = processLineDirective(&type);
2207 |     }
2208 |     // we want to track down the character
2209 |     // __before__ the new line character
2210 |     int _last_column  = ParseState.xxcolno ;
2211 |     int _last_parsed  = ParseState.xxparseno ;
2212 |     
2213 |     if (c == '\n') {
2214 |         _last_column = prevcols[prevpos];
2215 |         _last_parsed = prevparse[prevpos];
2216 |     }
2217 |     
2218 |     doSave = !maybeLine;
2219 |     
2220 |     while (c != '\n' && c != R_EOF) {
2221 |         // Comments can be any length; we only record the ones that fit in yytext.
2222 |         if (doSave) {
2223 |             YYTEXT_PUSH(c, yyp);
2224 |             doSave = (yyp - yytext) < sizeof(yytext) - 2;
2225 |         }
2226 |  	_last_column = ParseState.xxcolno ;
2227 | 	_last_parsed = ParseState.xxparseno ;
2228 | 	c = xxgetc();
2229 |     }
2230 |     if (c == R_EOF) EndOfFile = 2;
2231 |     incrementId( ) ;
2232 |     YYTEXT_PUSH('\0', yyp);
2233 |     record_( _first_parsed, _first_column, _last_parsed, _last_column,
2234 | 	     type, identifier, doSave ? yytext : 0 ) ;
2235 |     return c;
2236 | }
2237 | 
2238 | static int NumericValue(int c)
2239 | {
2240 |     int seendot = (c == '.');
2241 |     int seenexp = 0;
2242 |     int last = c;
2243 |     int nd = 0;
2244 |     int asNumeric = 0;
2245 |     int count = 1; /* The number of characters seen */
2246 | 
2247 |     DECLARE_YYTEXT_BUFP(yyp);
2248 |     YYTEXT_PUSH(c, yyp);
2249 |     /* We don't care about other than ASCII digits */
2250 |     while (isdigit(c = xxgetc()) || c == '.' || c == 'e' || c == 'E'
2251 | 	   || c == 'x' || c == 'X' || c == 'L')
2252 |     {
2253 | 	count++;
2254 | 	if (c == 'L') /* must be at the end.  Won't allow 1Le3 (at present). */
2255 | 	{   YYTEXT_PUSH(c, yyp);
2256 | 	    break;
2257 | 	}
2258 | 	
2259 | 	if (c == 'x' || c == 'X') {
2260 | 	    if (count > 2 || last != '0') break;  /* 0x must be first */
2261 | 	    YYTEXT_PUSH(c, yyp);
2262 | 	    while(isdigit(c = xxgetc()) || ('a' <= c && c <= 'f') ||
2263 | 		  ('A' <= c && c <= 'F') || c == '.') {
2264 | 		if (c == '.') {
2265 | 		    if (seendot) return ERROR;
2266 | 		    seendot = 1;
2267 | 		}
2268 | 		YYTEXT_PUSH(c, yyp);
2269 | 		nd++;
2270 | 	    }
2271 | 	    if (nd == 0) return ERROR;
2272 | 	    if (c == 'p' || c == 'P') {
2273 | 	        seenexp = 1;
2274 | 		YYTEXT_PUSH(c, yyp);
2275 | 		c = xxgetc();
2276 | 		if (!isdigit(c) && c != '+' && c != '-') return ERROR;
2277 | 		if (c == '+' || c == '-') {
2278 | 		    YYTEXT_PUSH(c, yyp);
2279 | 		    c = xxgetc();
2280 | 		}
2281 | 		for(nd = 0; isdigit(c); c = xxgetc(), nd++)
2282 | 		    YYTEXT_PUSH(c, yyp);
2283 | 		if (nd == 0) return ERROR;
2284 | 	    }
2285 |             if (seendot && !seenexp) return ERROR;
2286 | 	    break;
2287 | 	}
2288 | 	if (c == 'E' || c == 'e') {
2289 | 	    if (seenexp)
2290 | 		break;
2291 | 	    seenexp = 1;
2292 | 	    seendot = seendot == 1 ? seendot : 2;
2293 | 	    YYTEXT_PUSH(c, yyp);
2294 | 	    c = xxgetc();
2295 | 	    if (!isdigit(c) && c != '+' && c != '-') return ERROR;
2296 | 	    if (c == '+' || c == '-') {
2297 | 		YYTEXT_PUSH(c, yyp);
2298 | 		c = xxgetc();
2299 | 		if (!isdigit(c)) return ERROR;
2300 | 	    }
2301 | 	}
2302 | 	if (c == '.') {
2303 | 	    if (seendot)
2304 | 		break;
2305 | 	    seendot = 1;
2306 | 	}
2307 | 	YYTEXT_PUSH(c, yyp);
2308 | 	last = c;
2309 |     }
2310 |     
2311 |     if(c == 'i')
2312 | 	YYTEXT_PUSH(c, yyp); /* for getParseData */
2313 | 	
2314 |     YYTEXT_PUSH('\0', yyp);    
2315 |     /* Make certain that things are okay. */
2316 |     if(c == 'L') {
2317 | 	double a = R_atof(yytext);
2318 | 	int b = (int) a;
2319 | 	/* We are asked to create an integer via the L, so we check that the
2320 | 	   double and int values are the same. If not, this is a problem and we
2321 | 	   will not lose information and so use the numeric value.
2322 | 	*/
2323 | 	if(a != (double) b) {
2324 | 	    if(GenerateCode) {
2325 | 		if(seendot == 1 && seenexp == 0)
2326 | 		    warning(_("integer literal %s contains decimal; using numeric value"), yytext);
2327 | 		else {
2328 | 		    /* hide the L for the warning message */
2329 | 		    warning(_("non-integer value %s qualified with L; using numeric value"), yytext);
2330 | 		}
2331 | 	    }
2332 | 	    asNumeric = 1;
2333 | 	    seenexp = 1;
2334 | 	}
2335 |     }
2336 | 
2337 |     if(c == 'i') {
2338 | 	yylval = GenerateCode ? mkComplex(yytext) : R_NilValue;
2339 |     } else if(c == 'L' && asNumeric == 0) {
2340 | 	if(GenerateCode && seendot == 1 && seenexp == 0)
2341 | 	    warning(_("integer literal %s contains unnecessary decimal point"), yytext);
2342 | 	yylval = GenerateCode ? mkInt(yytext) : R_NilValue;
2343 | #if 0  /* do this to make 123 integer not double */
2344 |     } else if(!(seendot || seenexp)) {
2345 | 	if(c != 'L') xxungetc(c);
2346 | 	if (GenerateCode) {
2347 | 	    double a = R_atof(yytext);
2348 | 	    int b = (int) a;
2349 | 	    yylval = (a != (double) b) ? mkFloat(yytext) : mkInt(yytext);
2350 | 	} else yylval = R_NilValue;
2351 | #endif
2352 |     } else {
2353 | 	if(c != 'L')
2354 | 	    xxungetc(c);
2355 | 	yylval = GenerateCode ? mkFloat(yytext) : R_NilValue;
2356 |     }
2357 | 
2358 |     PRESERVE_SV(yylval);
2359 |     return NUM_CONST;
2360 | }
2361 | 
2362 | /* Strings may contain the standard ANSI escapes and octal */
2363 | /* specifications of the form \o, \oo or \ooo, where 'o' */
2364 | /* is an octal digit. */
2365 | 
2366 | /* The buffer is reallocated on the R heap if needed; not by malloc */
2367 | /* to avoid memory leak in case of R error (long jump) */
2368 | #define STEXT_PUSH(c) do {                  \
2369 | 	size_t nc = bp - stext;             \
2370 | 	if (nc >= nstext - 1) {             \
2371 | 	    char *old = stext;              \
2372 | 	    SEXP st1;		            \
2373 | 	    nstext *= 2;                    \
2374 | 	    PROTECT(st1 = allocVector(RAWSXP, nstext)); \
2375 | 	    stext = (char *)RAW(st1);       \
2376 | 	    memmove(stext, old, nc);        \
2377 | 	    REPROTECT(st1, sti);	    \
2378 | 	    UNPROTECT(1); /* st1 */         \
2379 | 	    bp = stext+nc; }		    \
2380 | 	*bp++ = ((char) c);		    \
2381 | } while(0)
2382 | 
2383 | 
2384 | /* The idea here is that if a string contains \u escapes that are not
2385 |    valid in the current locale, we should switch to UTF-8 for that
2386 |    string.  Needs Unicode wide-char support.
2387 | 
2388 |    Defining __STDC_ISO_10646__ is done by the OS (nor to) in wchar.t.
2389 |    Some (e.g. Solaris, FreeBSD) have Unicode wchar_t but do not define it.
2390 | */
2391 | 
2392 | #if defined(Win32) || defined(__STDC_ISO_10646__)
2393 | typedef wchar_t ucs_t;
2394 | # define mbcs_get_next2 mbcs_get_next
2395 | #else
2396 | typedef unsigned int ucs_t;
2397 | # define WC_NOT_UNICODE 
2398 | static int mbcs_get_next2(int c, ucs_t *wc)
2399 | {
2400 |     int i, res, clen = 1; char s[9];
2401 | 
2402 |     s[0] = c;
2403 |     /* This assumes (probably OK) that all MBCS embed ASCII as single-byte
2404 |        lead bytes, including control chars */
2405 |     if((unsigned int) c < 0x80) {
2406 | 	*wc = (wchar_t) c;
2407 | 	return 1;
2408 |     }
2409 |     if(utf8locale) {
2410 | 	clen = utf8clen(c);
2411 | 	for(i = 1; i < clen; i++) {
2412 | 	    c = xxgetc();
2413 | 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
2414 | 	    s[i] = (char) c;
2415 | 	}
2416 | 	s[clen] ='\0'; /* x86 Solaris requires this */
2417 | 	res = mbtoucs(wc, s, clen);
2418 | 	if(res == -1) error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
2419 |     } else {
2420 | 	/* This is not necessarily correct for stateful MBCS */
2421 | 	while(clen <= MB_CUR_MAX) {
2422 | 	    res = mbtoucs(wc, s, clen);
2423 | 	    if(res >= 0) break;
2424 | 	    if(res == -1)
2425 | 		error(_("invalid multibyte character in parser at line %d"), ParseState.xxlineno);
2426 | 	    /* so res == -2 */
2427 | 	    c = xxgetc();
2428 | 	    if(c == R_EOF) error(_("EOF whilst reading MBCS char at line %d"), ParseState.xxlineno);
2429 | 	    s[clen++] = c;
2430 | 	} /* we've tried enough, so must be complete or invalid by now */
2431 |     }
2432 |     for(i = clen - 1; i > 0; i--) xxungetc(s[i]);
2433 |     return clen;
2434 | }
2435 | #endif
2436 | 
2437 | #define WTEXT_PUSH(c) do { if(wcnt < 10000) wcs[wcnt++] = c; } while(0)
2438 | 
2439 | static SEXP mkStringUTF8(const ucs_t *wcs, int cnt)
2440 | {
2441 |     SEXP t;
2442 |     int nb;
2443 | 
2444 | /* NB: cnt includes the terminator */
2445 | #ifdef Win32
2446 |     nb = cnt*4; /* UCS-2/UTF-16 so max 4 bytes per wchar_t */
2447 | #else
2448 |     nb = cnt*6;
2449 | #endif
2450 |     R_CheckStack2(nb);
2451 |     char s[nb];
2452 |     memset(s, 0, nb); /* safety */
2453 | #ifdef WC_NOT_UNICODE
2454 |     for(char *ss = s; *wcs; wcs++) ss += ucstoutf8(ss, *wcs);
2455 | #else
2456 |     wcstoutf8(s, wcs, sizeof(s));
2457 | #endif
2458 |     PROTECT(t = allocVector(STRSXP, 1));
2459 |     SET_STRING_ELT(t, 0, mkCharCE(s, CE_UTF8));
2460 |     UNPROTECT(1); /* t */
2461 |     return t;
2462 | }
2463 | 
2464 | #define CTEXT_PUSH(c) do { \
2465 | 	if (ct - currtext >= 1000) { \
2466 | 	    memmove(currtext, currtext+100, 901); memmove(currtext, "... ", 4); ct -= 100; \
2467 | 	    currtext_truncated = TRUE; \
2468 | 	} \
2469 | 	*ct++ = ((char) c);  \
2470 | } while(0)
2471 | #define CTEXT_POP() ct--
2472 | 
2473 | 
2474 | /* forSymbol is true when parsing backticked symbols */
2475 | static int StringValue(int c, Rboolean forSymbol)
2476 | {
2477 |     int quote = c;
2478 |     char currtext[1010], *ct = currtext;
2479 |     char st0[MAXELTSIZE];
2480 |     unsigned int nstext = MAXELTSIZE;
2481 |     char *stext = st0, *bp = st0;
2482 |     PROTECT_INDEX sti;
2483 |     int wcnt = 0;
2484 |     ucs_t wcs[10001];
2485 |     Rboolean oct_or_hex = FALSE, use_wcs = FALSE, currtext_truncated = FALSE;
2486 | 
2487 |     PROTECT_WITH_INDEX(R_NilValue, &sti);
2488 |     CTEXT_PUSH(c);
2489 |     while ((c = xxgetc()) != R_EOF && c != quote) {
2490 | 	CTEXT_PUSH(c);
2491 | 	if (c == '\n') {
2492 | 	    xxungetc(c); CTEXT_POP();
2493 | 	    /* Fix suggested by Mark Bravington to allow multiline strings
2494 | 	     * by pretending we've seen a backslash. Was:
2495 | 	     * return ERROR;
2496 | 	     */
2497 | 	    c = '\\';
2498 | 	}
2499 | 	if (c == '\\') {
2500 | 	    c = xxgetc(); CTEXT_PUSH(c);
2501 | 	    if ('0' <= c && c <= '7') {
2502 | 		int octal = c - '0';
2503 | 		if ('0' <= (c = xxgetc()) && c <= '7') {
2504 | 		    CTEXT_PUSH(c);
2505 | 		    octal = 8 * octal + c - '0';
2506 | 		    if ('0' <= (c = xxgetc()) && c <= '7') {
2507 | 			CTEXT_PUSH(c);
2508 | 			octal = 8 * octal + c - '0';
2509 | 		    } else {
2510 | 			xxungetc(c);
2511 | 			CTEXT_POP();
2512 | 		    }
2513 | 		} else {
2514 | 		    xxungetc(c);
2515 | 		    CTEXT_POP();
2516 | 		}
2517 | 		if (!octal)
2518 | 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2519 | 		c = octal;
2520 | 		oct_or_hex = TRUE;
2521 | 	    }
2522 | 	    else if(c == 'x') {
2523 | 		int val = 0; int i, ext;
2524 | 		for(i = 0; i < 2; i++) {
2525 | 		    c = xxgetc(); CTEXT_PUSH(c);
2526 | 		    if(c >= '0' && c <= '9') ext = c - '0';
2527 | 		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2528 | 		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2529 | 		    else {
2530 | 			xxungetc(c);
2531 | 			CTEXT_POP();
2532 | 			if (i == 0) { /* was just \x */
2533 | 			    *ct = '\0';
2534 | 			    errorcall(R_NilValue, _("'\\x' used without hex digits in character string starting \"%s\""), currtext);
2535 | 			}
2536 | 			break;
2537 | 		    }
2538 | 		    val = 16*val + ext;
2539 | 		}
2540 | 		if (!val)
2541 | 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2542 | 		c = val;
2543 | 		oct_or_hex = TRUE;
2544 | 	    }
2545 | 	    else if(c == 'u') {
2546 | 		unsigned int val = 0; int i, ext; 
2547 | 		Rboolean delim = FALSE;
2548 | 
2549 | 		if(forSymbol) 
2550 | 		    error(_("\\uxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno);
2551 | 		if((c = xxgetc()) == '{') {
2552 | 		    delim = TRUE;
2553 | 		    CTEXT_PUSH(c);
2554 | 		} else xxungetc(c);
2555 | 		for(i = 0; i < 4; i++) {
2556 | 		    c = xxgetc(); CTEXT_PUSH(c);
2557 | 		    if(c >= '0' && c <= '9') ext = c - '0';
2558 | 		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2559 | 		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2560 | 		    else {
2561 | 			xxungetc(c);
2562 | 			CTEXT_POP();
2563 | 			if (i == 0) { /* was just \u */
2564 | 			    *ct = '\0';
2565 | 			    errorcall(R_NilValue, _("'\\u' used without hex digits in character string starting \"%s\""), currtext);
2566 | 			}
2567 | 			break;
2568 | 		    }
2569 | 		    val = 16*val + ext;
2570 | 		}
2571 | 		if(delim) {
2572 | 		    if((c = xxgetc()) != '}')
2573 | 			error(_("invalid \\u{xxxx} sequence (line %d)"),
2574 | 			      ParseState.xxlineno);
2575 | 		    else CTEXT_PUSH(c);
2576 | 		}
2577 | 		if (!val)
2578 | 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2579 | 		WTEXT_PUSH(val); /* this assumes wchar_t is Unicode */
2580 | 		use_wcs = TRUE;
2581 | 		continue;
2582 | 	    }
2583 | 	    else if(c == 'U') {
2584 | 		unsigned int val = 0; int i, ext;
2585 | 		Rboolean delim = FALSE;
2586 | 		if(forSymbol) 
2587 | 		    error(_("\\Uxxxxxxxx sequences not supported inside backticks (line %d)"), ParseState.xxlineno);
2588 | 		if((c = xxgetc()) == '{') {
2589 | 		    delim = TRUE;
2590 | 		    CTEXT_PUSH(c);
2591 | 		} else xxungetc(c);
2592 | 		for(i = 0; i < 8; i++) {
2593 | 		    c = xxgetc(); CTEXT_PUSH(c);
2594 | 		    if(c >= '0' && c <= '9') ext = c - '0';
2595 | 		    else if (c >= 'A' && c <= 'F') ext = c - 'A' + 10;
2596 | 		    else if (c >= 'a' && c <= 'f') ext = c - 'a' + 10;
2597 | 		    else {
2598 | 			xxungetc(c);
2599 | 			CTEXT_POP();
2600 | 			if (i == 0) { /* was just \U */
2601 | 			    *ct = '\0';
2602 | 			    errorcall(R_NilValue, _("'\\U' used without hex digits in character string starting \"%s\""), currtext);
2603 | 			}
2604 | 			break;
2605 | 		    }
2606 | 		    val = 16*val + ext;
2607 | 		}
2608 | 		if(delim) {
2609 | 		    if((c = xxgetc()) != '}')
2610 | 			error(_("invalid \\U{xxxxxxxx} sequence (line %d)"), ParseState.xxlineno);
2611 | 		    else CTEXT_PUSH(c);
2612 | 		}
2613 | 		if (!val)
2614 | 		    error(_("nul character not allowed (line %d)"), ParseState.xxlineno);
2615 | #ifdef Win32
2616 | 		if (0x010000 <= val && val <= 0x10FFFF) {   /* Need surrogate pair in Windows */
2617 | 		    val = val - 0x010000;
2618 | 		    WTEXT_PUSH( 0xD800 | (val >> 10) );
2619 | 		    val = 0xDC00 | (val & 0x03FF);
2620 | 		}
2621 | #endif
2622 | 		WTEXT_PUSH(val);
2623 | 		use_wcs = TRUE;
2624 | 		continue;
2625 | 	    }
2626 | 	    else {
2627 | 		switch (c) {
2628 | 		case 'a':
2629 | 		    c = '\a';
2630 | 		    break;
2631 | 		case 'b':
2632 | 		    c = '\b';
2633 | 		    break;
2634 | 		case 'f':
2635 | 		    c = '\f';
2636 | 		    break;
2637 | 		case 'n':
2638 | 		    c = '\n';
2639 | 		    break;
2640 | 		case 'r':
2641 | 		    c = '\r';
2642 | 		    break;
2643 | 		case 't':
2644 | 		    c = '\t';
2645 | 		    break;
2646 | 		case 'v':
2647 | 		    c = '\v';
2648 | 		    break;
2649 | 		case '\\':
2650 | 		    c = '\\';
2651 | 		    break;
2652 | 		case '"':
2653 | 		case '\'':
2654 | 		case '`':
2655 | 		case ' ':
2656 | 		case '\n':
2657 | 		    break;
2658 | 		default:
2659 | 		    *ct = '\0';
2660 | 		    errorcall(R_NilValue, _("'\\%c' is an unrecognized escape in character string starting \"%s\""), c, currtext);
2661 | 		}
2662 | 	    }
2663 | 	} else if(mbcslocale) {
2664 | 	    int i, clen;
2665 | 	    ucs_t wc;
2666 | 	    clen = mbcs_get_next2(c, &wc);
2667 | 	    WTEXT_PUSH(wc);
2668 | 	    ParseState.xxbyteno += clen-1;
2669 | 	    
2670 | 	    for(i = 0; i < clen - 1; i++){
2671 | 		STEXT_PUSH(c);
2672 | 		c = xxgetc();
2673 | 		if (c == R_EOF) break;
2674 | 		CTEXT_PUSH(c);
2675 | 		if (c == '\n') {
2676 | 		    xxungetc(c); CTEXT_POP();
2677 | 		    c = '\\';
2678 | 		}
2679 | 	    }
2680 | 	    if (c == R_EOF) break;
2681 | 	    STEXT_PUSH(c);
2682 | 	    continue;
2683 | 	}
2684 | 	STEXT_PUSH(c);
2685 | 	if ((unsigned int) c < 0x80) WTEXT_PUSH(c);
2686 | 	else { /* have an 8-bit char in the current encoding */
2687 | #ifdef WC_NOT_UNICODE
2688 | 	    ucs_t wc;
2689 | 	    char s[2] = " ";
2690 | 	    s[0] = (char) c;
2691 | 	    mbtoucs(&wc, s, 2);
2692 | #else
2693 | 	    wchar_t wc;
2694 | 	    char s[2] = " ";
2695 | 	    s[0] = (char) c;
2696 | 	    mbrtowc(&wc, s, 2, NULL);
2697 | #endif
2698 | 	    WTEXT_PUSH(wc);
2699 | 	}
2700 |     }
2701 |     STEXT_PUSH('\0');
2702 |     WTEXT_PUSH(0);
2703 |     yytext[0] = '\0';
2704 |     if (c == R_EOF) {
2705 | 	PRESERVE_SV(yylval = R_NilValue);
2706 | 	UNPROTECT(1); /* release stext */
2707 |     	return INCOMPLETE_STRING;
2708 |     } else {
2709 |     	CTEXT_PUSH(c);
2710 |     	CTEXT_PUSH('\0');
2711 |     }
2712 |     if (!currtext_truncated)
2713 |     	strcpy(yytext, currtext);
2714 |     else if (forSymbol || !use_wcs) {
2715 |         size_t total = strlen(stext);
2716 |         snprintf(yytext, MAXELTSIZE, "[%u chars quoted with '%c']", (unsigned int)total, quote);
2717 |     } else 
2718 |         snprintf(yytext, MAXELTSIZE, "[%d wide chars quoted with '%c']", wcnt, quote);
2719 |     if(forSymbol) {
2720 | 	PRESERVE_SV(yylval = install(stext));
2721 | 	UNPROTECT(1); /* release stext */
2722 | 	return SYMBOL;
2723 |     } else {
2724 | 	if(use_wcs) {
2725 | 	    if(oct_or_hex)
2726 | 		error(_("mixing Unicode and octal/hex escapes in a string is not allowed"));
2727 | 	    if(wcnt < 10000)
2728 | 		PRESERVE_SV(yylval = mkStringUTF8(wcs, wcnt)); /* include terminator */
2729 | 	    else
2730 | 		error(_("string at line %d containing Unicode escapes not in this locale\nis too long (max 10000 chars)"), ParseState.xxlineno);
2731 | 	} else
2732 | 	    PRESERVE_SV(yylval = mkString2(stext,  bp - stext - 1, oct_or_hex));
2733 | 	UNPROTECT(1); /* release stext */
2734 | 	return STR_CONST;
2735 |     }
2736 | }
2737 | 
2738 | static int SpecialValue(int c)
2739 | {
2740 |     DECLARE_YYTEXT_BUFP(yyp);
2741 |     YYTEXT_PUSH(c, yyp);
2742 |     while ((c = xxgetc()) != R_EOF && c != '%') {
2743 | 	if (c == '\n') {
2744 | 	    xxungetc(c);
2745 | 	    return ERROR;
2746 | 	}
2747 | 	YYTEXT_PUSH(c, yyp);
2748 |     }
2749 |     if (c == '%')
2750 | 	YYTEXT_PUSH(c, yyp);
2751 |     YYTEXT_PUSH('\0', yyp);
2752 |     yylval = install(yytext);
2753 |     return SPECIAL;
2754 | }
2755 | 
2756 | /* return 1 if name is a valid name 0 otherwise */
2757 | attribute_hidden
2758 | int isValidName(const char *name)
2759 | {
2760 |     const char *p = name;
2761 |     int i;
2762 | 
2763 |     if(mbcslocale) {
2764 | 	/* the only way to establish which chars are alpha etc is to
2765 | 	   use the wchar variants */
2766 | 	size_t n = strlen(name), used;
2767 | 	wchar_t wc;
2768 | 	used = Mbrtowc(&wc, p, n, NULL); p += used; n -= used;
2769 | 	if(used == 0) return 0;
2770 | 	if (wc != L'.' && !iswalpha(wc) ) return 0;
2771 | 	if (wc == L'.') {
2772 | 	    /* We don't care about other than ASCII digits */
2773 | 	    if(isdigit(0xff & (int)*p)) return 0;
2774 | 	    /* Mbrtowc(&wc, p, n, NULL); if(iswdigit(wc)) return 0; */
2775 | 	}
2776 | 	while((used = Mbrtowc(&wc, p, n, NULL))) {
2777 | 	    if (!(iswalnum(wc) || wc == L'.' || wc == L'_')) break;
2778 | 	    p += used; n -= used;
2779 | 	}
2780 | 	if (*p != '\0') return 0;
2781 |     } else {
2782 | 	int c = 0xff & *p++;
2783 | 	if (c != '.' && !isalpha(c) ) return 0;
2784 | 	if (c == '.' && isdigit(0xff & (int)*p)) return 0;
2785 | 	while ( c = 0xff & *p++, (isalnum(c) || c == '.' || c == '_') ) ;
2786 | 	if (c != '\0') return 0;
2787 |     }
2788 | 
2789 |     if (strcmp(name, "...") == 0) return 1;
2790 | 
2791 |     for (i = 0; keywords[i].name != NULL; i++)
2792 | 	if (strcmp(keywords[i].name, name) == 0) return 0;
2793 | 
2794 |     return 1;
2795 | }
2796 | 
2797 | 
2798 | static int SymbolValue(int c)
2799 | {
2800 |     int kw;
2801 |     DECLARE_YYTEXT_BUFP(yyp);
2802 |     if(mbcslocale) {
2803 | 	wchar_t wc; int i, clen;
2804 | 	clen = mbcs_get_next(c, &wc);
2805 | 	while(1) {
2806 | 	    /* at this point we have seen one char, so push its bytes
2807 | 	       and get one more */
2808 | 	    for(i = 0; i < clen; i++) {
2809 | 		YYTEXT_PUSH(c, yyp);
2810 | 		c = xxgetc();
2811 | 	    }
2812 | 	    if(c == R_EOF) break;
2813 | 	    if(c == '.' || c == '_') {
2814 | 		clen = 1;
2815 | 		continue;
2816 | 	    }
2817 | 	    clen = mbcs_get_next(c, &wc);
2818 | 	    if(!iswalnum(wc)) break;
2819 | 	}
2820 |     } else
2821 | 	do {
2822 | 	    YYTEXT_PUSH(c, yyp);
2823 | 	} while ((c = xxgetc()) != R_EOF &&
2824 | 		 (isalnum(c) || c == '.' || c == '_'));
2825 |     xxungetc(c);
2826 |     YYTEXT_PUSH('\0', yyp);
2827 |     if ((kw = KeywordLookup(yytext))) 
2828 | 	return kw;
2829 |     
2830 |     PRESERVE_SV(yylval = install(yytext));
2831 |     return SYMBOL;
2832 | }
2833 | 
2834 | static void setParseFilename(SEXP newname) {
2835 |     SEXP class;
2836 |     
2837 |     if (isEnvironment(PS_SRCFILE)) {
2838 | 	SEXP oldname = findVar(install("filename"), PS_SRCFILE);
2839 |     	if (isString(oldname) && length(oldname) > 0 &&
2840 |     	    strcmp(CHAR(STRING_ELT(oldname, 0)),
2841 |     	           CHAR(STRING_ELT(newname, 0))) == 0) return;
2842 | 	PS_SET_SRCFILE(NewEnvironment(R_NilValue, R_NilValue, R_EmptyEnv));
2843 | 	defineVar(install("filename"), newname, PS_SRCFILE);
2844 | 	defineVar(install("original"), PS_ORIGINAL, PS_SRCFILE);
2845 | 
2846 | 	PROTECT(class = allocVector(STRSXP, 2));
2847 | 	SET_STRING_ELT(class, 0, mkChar("srcfilealias"));
2848 | 	SET_STRING_ELT(class, 1, mkChar("srcfile"));
2849 | 	setAttrib(PS_SRCFILE, R_ClassSymbol, class);
2850 | 	UNPROTECT(1); /* class */
2851 |     } else 
2852 | 	PS_SET_SRCFILE(duplicate(newname));
2853 |     RELEASE_SV(newname);
2854 | }
2855 | 
2856 | static int processLineDirective(int *type)
2857 | {
2858 |     int c, tok, linenumber;
2859 |     c = SkipSpace();
2860 |     if (!isdigit(c)) return(c);
2861 |     tok = NumericValue(c);
2862 |     linenumber = atoi(yytext);
2863 |     c = SkipSpace();
2864 |     if (c == '"') 
2865 | 	tok = StringValue(c, FALSE);
2866 |     else
2867 |     	xxungetc(c);
2868 |     if (tok == STR_CONST) 
2869 | 	setParseFilename(yylval);
2870 |     while ((c = xxgetc()) != '\n' && c != R_EOF) /* skip */ ;
2871 |     ParseState.xxlineno = linenumber;
2872 |     *type = LINE_DIRECTIVE;
2873 |     /* we don't change xxparseno here:  it counts parsed lines, not official lines */
2874 |     R_ParseContext[R_ParseContextLast] = '\0';  /* Context report shouldn't show the directive */
2875 |     return(c);
2876 | }
2877 | 
2878 | /* Get the R symbol, and set yytext at the same time */
2879 | static SEXP install_and_save(char * text)
2880 | {
2881 |     strcpy(yytext, text);
2882 |     return install(text);
2883 | }
2884 | 
2885 | /* Get an R symbol, and set different yytext.  Used for translation of -> to <-. ->> to <<- */
2886 | static SEXP install_and_save2(char * text, char * savetext)
2887 | {
2888 |     strcpy(yytext, savetext);
2889 |     return install(text);
2890 | }
2891 | 
2892 | 
2893 | /* Split the input stream into tokens. */
2894 | /* This is the lowest of the parsing levels. */
2895 | 
2896 | static int token(void)
2897 | {
2898 |     int c;
2899 |     wchar_t wc;
2900 | 
2901 |     if (SavedToken) {
2902 | 	c = SavedToken;
2903 | 	yylval = SavedLval;
2904 | 	SavedLval = R_NilValue;
2905 | 	SavedToken = 0;
2906 | 	yylloc.first_line = xxlinesave;
2907 | 	yylloc.first_column = xxcolsave;
2908 | 	yylloc.first_byte = xxbytesave;
2909 | 	yylloc.first_parsed = xxparsesave;
2910 | 	return c;
2911 |     }
2912 |     xxcharsave = xxcharcount; /* want to be able to go back one token */
2913 | 
2914 |     c = SkipSpace();
2915 |     if (c == '#') c = SkipComment();
2916 | 
2917 |     yylloc.first_line = ParseState.xxlineno;
2918 |     yylloc.first_column = ParseState.xxcolno;
2919 |     yylloc.first_byte = ParseState.xxbyteno;
2920 |     yylloc.first_parsed = ParseState.xxparseno;
2921 | 
2922 |     if (c == R_EOF) return END_OF_INPUT;
2923 | 
2924 |     /* Either digits or symbols can start with a "." */
2925 |     /* so we need to decide which it is and jump to  */
2926 |     /* the correct spot. */
2927 | 
2928 |     if (c == '.' && typeofnext() >= 2) goto symbol;
2929 | 
2930 |     /* literal numbers */
2931 | 
2932 |     if (c == '.') return NumericValue(c);
2933 |     /* We don't care about other than ASCII digits */
2934 |     if (isdigit(c)) return NumericValue(c);
2935 | 
2936 |     /* literal strings */
2937 | 
2938 |     if (c == '\"' || c == '\'')
2939 | 	return StringValue(c, FALSE);
2940 | 
2941 |     /* special functions */
2942 | 
2943 |     if (c == '%')
2944 | 	return SpecialValue(c);
2945 | 
2946 |     /* functions, constants and variables */
2947 | 
2948 |     if (c == '`')
2949 | 	return StringValue(c, TRUE);
2950 |  symbol:
2951 | 
2952 |     if (c == '.') return SymbolValue(c);
2953 |     if(mbcslocale) {
2954 | 	mbcs_get_next(c, &wc);
2955 | 	if (iswalpha(wc)) return SymbolValue(c);
2956 |     } else
2957 | 	if (isalpha(c)) return SymbolValue(c);
2958 | 
2959 |     /* compound tokens */
2960 | 
2961 |     switch (c) {
2962 |     case '<':
2963 | 	if (nextchar('=')) {
2964 | 	    yylval = install_and_save("<=");
2965 | 	    return LE;
2966 | 	}
2967 | 	if (nextchar('-')) {
2968 | 	    yylval = install_and_save("<-");
2969 | 	    return LEFT_ASSIGN;
2970 | 	}
2971 | 	if (nextchar('<')) {
2972 | 	    if (nextchar('-')) {
2973 | 		yylval = install_and_save("<<-");
2974 | 		return LEFT_ASSIGN;
2975 | 	    }
2976 | 	    else
2977 | 		return ERROR;
2978 | 	}
2979 | 	yylval = install_and_save("<");
2980 | 	return LT;
2981 |     case '-':
2982 | 	if (nextchar('>')) {
2983 | 	    if (nextchar('>')) {
2984 | 		yylval = install_and_save2("<<-", "->>");
2985 | 		return RIGHT_ASSIGN;
2986 | 	    }
2987 | 	    else {
2988 | 		yylval = install_and_save2("<-", "->");
2989 | 		return RIGHT_ASSIGN;
2990 | 	    }
2991 | 	}
2992 | 	yylval = install_and_save("-");
2993 | 	return '-';
2994 |     case '>':
2995 | 	if (nextchar('=')) {
2996 | 	    yylval = install_and_save(">=");
2997 | 	    return GE;
2998 | 	}
2999 | 	yylval = install_and_save(">");
3000 | 	return GT;
3001 |     case '!':
3002 | 	if (nextchar('=')) {
3003 | 	    yylval = install_and_save("!=");
3004 | 	    return NE;
3005 | 	}
3006 | 	yylval = install_and_save("!");
3007 | 	return '!';
3008 |     case '=':
3009 | 	if (nextchar('=')) {
3010 | 	    yylval = install_and_save("==");
3011 | 	    return EQ;
3012 | 	}
3013 | 	if (nextchar('>')) {
3014 | 	    yylval = install_and_save("=>");
3015 | 	    return ANON;
3016 | 	}
3017 | 	yylval = install_and_save("=");
3018 | 	return EQ_ASSIGN;
3019 |     case ':':
3020 | 	if (nextchar(':')) {
3021 | 	    if (nextchar(':')) {
3022 | 		yylval = install_and_save(":::");
3023 | 		return NS_GET_INT;
3024 | 	    }
3025 | 	    else {
3026 | 		yylval = install_and_save("::");
3027 | 		return NS_GET;
3028 | 	    }
3029 | 	}
3030 | 	if (nextchar('=')) {
3031 | 	    yylval = install_and_save(":=");
3032 | 	    return LEFT_ASSIGN;
3033 | 	}
3034 | 	yylval = install_and_save(":");
3035 | 	return ':';
3036 |     case '&':
3037 | 	if (nextchar('&')) {
3038 | 	    yylval = install_and_save("&&");
3039 | 	    return AND2;
3040 | 	}
3041 | 	yylval = install_and_save("&");
3042 | 	return AND;
3043 |     case '|':
3044 | 	if (nextchar('|')) {
3045 | 	    yylval = install_and_save("||");
3046 | 	    return OR2;
3047 | 	}
3048 | 	yylval = install_and_save("|");
3049 | 	return OR;
3050 |     case LBRACE:
3051 | 	yylval = install_and_save("{");
3052 | 	return c;
3053 |     case RBRACE:
3054 |         strcpy(yytext, "}");
3055 | 	return c;
3056 |     case '(':
3057 | 	yylval = install_and_save("(");
3058 | 	return c;
3059 |     case ')':
3060 |         strcpy(yytext, ")");
3061 | 	return c;
3062 |     case '[':
3063 | 	if (nextchar('[')) {
3064 | 	    yylval = install_and_save("[[");
3065 | 	    return LBB;
3066 | 	}
3067 | 	yylval = install_and_save("[");
3068 | 	return c;
3069 |     case ']':
3070 |         strcpy(yytext, "]");
3071 | 	return c;
3072 |     case '?':
3073 | 	yylval = install_and_save("?");
3074 | 	return c;
3075 |     case '*':
3076 | 	/* Replace ** by ^.  This has been here since 1998, but is
3077 | 	   undocumented (at least in the obvious places).  It is in
3078 | 	   the index of the Blue Book with a reference to p. 431, the
3079 | 	   help for 'Deprecated'.  S-PLUS 6.2 still allowed this, so
3080 | 	   presumably it was for compatibility with S. */
3081 | 	if (nextchar('*')) {
3082 | 	    yylval = install_and_save2("^", "**");
3083 | 	    return '^';
3084 | 	} else
3085 | 	    yylval = install_and_save("*");
3086 | 	return c;
3087 |     case '+':
3088 |     case '/':
3089 |     case '^':
3090 |     case '~':
3091 |     case '$':
3092 |     case '@':
3093 | 	yytext[0] = (char) c;
3094 | 	yytext[1] = '\0';
3095 | 	yylval = install(yytext);
3096 | 	return c;
3097 |     default:
3098 |         yytext[0] = (char) c;
3099 |         yytext[1] = '\0';
3100 | 	return c;
3101 |     }
3102 | }
3103 | 
3104 | /**
3105 |  * Sets the first elements of the yyloc structure with current 
3106 |  * information
3107 |  */
3108 | static void setfirstloc(void)
3109 | {
3110 |     yylloc.first_line   = ParseState.xxlineno;
3111 |     yylloc.first_column = ParseState.xxcolno;
3112 |     yylloc.first_byte   = ParseState.xxbyteno;
3113 |     yylloc.first_parsed = ParseState.xxparseno;
3114 | }
3115 | 
3116 | static void setlastloc(void)
3117 | {
3118 |     yylloc.last_line = ParseState.xxlineno;
3119 |     yylloc.last_column = ParseState.xxcolno;
3120 |     yylloc.last_byte = ParseState.xxbyteno;
3121 |     yylloc.last_parsed = ParseState.xxparseno;
3122 | }
3123 | 
3124 | /**
3125 |  * Wrap around the token function. Returns the same result
3126 |  * but increments the identifier, after a call to token_, 
3127 |  * the identifier variable contains the id of the token
3128 |  * just returned
3129 |  *
3130 |  * @return the same as token
3131 |  */
3132 | 
3133 | static int token_(void){
3134 |     // capture the position before retrieving the token
3135 |     setfirstloc( ) ;
3136 | 
3137 |     // get the token
3138 |     int res = token( ) ;
3139 | 
3140 |     // capture the position after
3141 |     int _last_col  = ParseState.xxcolno ;
3142 |     int _last_parsed = ParseState.xxparseno ;
3143 | 
3144 |     _current_token = res ;
3145 |     incrementId( ) ;
3146 |     yylloc.id = identifier ;
3147 | 
3148 |     // record the position
3149 |     if( res != '\n' && res != END_OF_INPUT)
3150 | 	record_( yylloc.first_parsed, yylloc.first_column, 
3151 | 	         _last_parsed, _last_col,
3152 | 		res, identifier, yytext );
3153 | 
3154 |     return res; 
3155 | }
3156 | 
3157 | 
3158 | static int yylex(void)
3159 | {
3160 |     int tok;
3161 | 
3162 |  again:
3163 | 
3164 |     tok = token_();
3165 | 
3166 |     /* Newlines must be handled in a context */
3167 |     /* sensitive way.  The following block of */
3168 |     /* deals directly with newlines in the */
3169 |     /* body of "if" statements. */
3170 | 
3171 |     if (tok == '\n') {
3172 | 
3173 | 	if (EatLines || *contextp == '[' || *contextp == '(')
3174 | 	    goto again;
3175 | 
3176 | 	/* The essence of this is that in the body of */
3177 | 	/* an "if", any newline must be checked to */
3178 | 	/* see if it is followed by an "else". */
3179 | 	/* such newlines are discarded. */
3180 | 
3181 | 	if (*contextp == 'i') {
3182 | 
3183 | 	    /* Find the next non-newline token */
3184 | 
3185 | 	    while(tok == '\n')
3186 | 		tok = token_();
3187 | 
3188 | 	    /* If we encounter "}", ")" or "]" then */
3189 | 	    /* we know that all immediately preceding */
3190 | 	    /* "if" bodies have been terminated. */
3191 | 	    /* The corresponding "i" values are */
3192 | 	    /* popped off the context stack. */
3193 | 
3194 | 	    if (tok == RBRACE || tok == ')' || tok == ']' ) {
3195 | 		while (*contextp == 'i')
3196 | 		    ifpop();
3197 | 		*contextp-- = 0;
3198 | 		setlastloc();
3199 | 		return tok;
3200 | 	    }
3201 | 
3202 | 	    /* When a "," is encountered, it terminates */
3203 | 	    /* just the immediately preceding "if" body */
3204 | 	    /* so we pop just a single "i" of the */
3205 | 	    /* context stack. */
3206 | 
3207 | 	    if (tok == ',') {
3208 | 		ifpop();
3209 | 		setlastloc();
3210 | 		return tok;
3211 | 	    }
3212 | 
3213 | 	    /* Tricky! If we find an "else" we must */
3214 | 	    /* ignore the preceding newline.  Any other */
3215 | 	    /* token means that we must return the newline */
3216 | 	    /* to terminate the "if" and "push back" that */
3217 | 	    /* token so that we will obtain it on the next */
3218 | 	    /* call to token.  In either case sensitivity */
3219 | 	    /* is lost, so we pop the "i" from the context */
3220 | 	    /* stack. */
3221 | 
3222 | 	    if(tok == ELSE) {
3223 | 		EatLines = 1;
3224 | 		ifpop();
3225 | 		setlastloc();
3226 | 		return ELSE;
3227 | 	    }
3228 | 	    else {
3229 | 		ifpop();
3230 | 		SavedToken = tok;
3231 | 		xxlinesave = yylloc.first_line;
3232 | 		xxcolsave  = yylloc.first_column;
3233 | 		xxbytesave = yylloc.first_byte;
3234 | 		xxparsesave = yylloc.first_parsed;
3235 | 		SavedLval = yylval;
3236 | 		setlastloc();
3237 | 		if (ParseState.keepSrcRefs && ParseState.keepParseData &&
3238 | 		    yytext[0])
3239 | 
3240 | 		    /* unrecord the pushed back token if not null */
3241 | 		    ParseState.data_count--;
3242 | 		return '\n';
3243 | 	    }
3244 | 	}
3245 | 	else {
3246 | 	    setlastloc();
3247 | 	    return '\n';
3248 | 	}
3249 |     }
3250 | 
3251 |     /* Additional context sensitivities */
3252 | 
3253 |     switch(tok) {
3254 | 
3255 | 	/* Any newlines immediately following the */
3256 | 	/* the following tokens are discarded. The */
3257 | 	/* expressions are clearly incomplete. */
3258 | 
3259 |     case '+':
3260 |     case '-':
3261 |     case '*':
3262 |     case '/':
3263 |     case '^':
3264 |     case LT:
3265 |     case LE:
3266 |     case GE:
3267 |     case GT:
3268 |     case EQ:
3269 |     case NE:
3270 |     case OR:
3271 |     case AND:
3272 |     case OR2:
3273 |     case AND2:
3274 |     case SPECIAL:
3275 |     case FUNCTION:
3276 |     case WHILE:
3277 |     case REPEAT:
3278 |     case FOR:
3279 |     case IN:
3280 |     case '?':
3281 |     case '!':
3282 |     case '=':
3283 |     case ':':
3284 |     case '~':
3285 |     case '$':
3286 |     case '@':
3287 |     case LEFT_ASSIGN:
3288 |     case RIGHT_ASSIGN:
3289 |     case ANON:
3290 |     case EQ_ASSIGN:
3291 | 	EatLines = 1;
3292 | 	break;
3293 | 
3294 | 	/* Push any "if" statements found and */
3295 | 	/* discard any immediately following newlines. */
3296 | 
3297 |     case IF:
3298 | 	IfPush();
3299 | 	EatLines = 1;
3300 | 	break;
3301 | 
3302 | 	/* Terminate any immediately preceding "if" */
3303 | 	/* statements and discard any immediately */
3304 | 	/* following newlines. */
3305 | 
3306 |     case ELSE:
3307 | 	ifpop();
3308 | 	EatLines = 1;
3309 | 	break;
3310 | 
3311 | 	/* These tokens terminate any immediately */
3312 | 	/* preceding "if" statements. */
3313 | 
3314 |     case ';':
3315 |     case ',':
3316 | 	ifpop();
3317 | 	break;
3318 | 
3319 | 	/* Any newlines following these tokens can */
3320 | 	/* indicate the end of an expression. */
3321 | 
3322 |     case SYMBOL:
3323 |     case STR_CONST:
3324 |     case NUM_CONST:
3325 |     case NULL_CONST:
3326 |     case NEXT:
3327 |     case BREAK:
3328 | 	EatLines = 0;
3329 | 	break;
3330 | 
3331 | 	/* Handle brackets, braces and parentheses */
3332 | 
3333 |     case LBB:
3334 | 	if(contextp - contextstack >= CONTEXTSTACK_SIZE - 1)
3335 | 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3336 | 	*++contextp = '[';
3337 | 	*++contextp = '[';
3338 | 	break;
3339 | 
3340 |     case '[':
3341 | 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3342 | 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3343 | 	*++contextp = (char) tok;
3344 | 	break;
3345 | 
3346 |     case LBRACE:
3347 | 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3348 | 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3349 | 	*++contextp = (char) tok;
3350 | 	EatLines = 1;
3351 | 	break;
3352 | 
3353 |     case '(':
3354 | 	if(contextp - contextstack >= CONTEXTSTACK_SIZE)
3355 | 	    error(_("contextstack overflow at line %d"), ParseState.xxlineno);
3356 | 	*++contextp = (char) tok;
3357 | 	break;
3358 | 
3359 |     case ']':
3360 | 	while (*contextp == 'i')
3361 | 	    ifpop();
3362 | 	*contextp-- = 0;
3363 | 	EatLines = 0;
3364 | 	break;
3365 | 
3366 |     case RBRACE:
3367 | 	while (*contextp == 'i')
3368 | 	    ifpop();
3369 | 	*contextp-- = 0;
3370 | 	break;
3371 | 
3372 |     case ')':
3373 | 	while (*contextp == 'i')
3374 | 	    ifpop();
3375 | 	*contextp-- = 0;
3376 | 	EatLines = 0;
3377 | 	break;
3378 | 
3379 |     }
3380 |     setlastloc();
3381 |     return tok;
3382 | }
3383 | /**
3384 |  * Records location information about a symbol. The information is
3385 |  * used to fill the data 
3386 |  * 
3387 |  */
3388 | static void record_( int first_parsed, int first_column, int last_parsed, int last_column,
3389 | 	int token, int id, char* text_in ){
3390 | 	
3391 | 	if (!ParseState.keepSrcRefs || !ParseState.keepParseData
3392 | 	    || id == NA_INTEGER) return;
3393 | 	
3394 | 	// don't care about zero sized things
3395 | 	if( !yytext[0] ) return ;
3396 | 	
3397 | 	if (ParseState.data_count == DATA_COUNT)
3398 | 	    growData();
3399 | 	
3400 | 	_FIRST_COLUMN( ParseState.data_count ) = first_column; 
3401 | 	_FIRST_PARSED( ParseState.data_count ) = first_parsed;
3402 | 	_LAST_COLUMN( ParseState.data_count )  = last_column;  
3403 | 	_LAST_PARSED( ParseState.data_count )  = last_parsed; 
3404 | 	_TOKEN( ParseState.data_count )        = token;        
3405 | 	_ID( ParseState.data_count )           = id ;          
3406 | 	_PARENT(ParseState.data_count)         = 0 ; 
3407 | 	if ( text_in )
3408 | 	    SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar(text_in));
3409 | 	else
3410 | 	    SET_STRING_ELT(PS_TEXT, ParseState.data_count, mkChar(""));
3411 | 	
3412 | 	if( id > ID_COUNT )
3413 | 	    growID(id) ;
3414 | 
3415 | 	ID_ID( id ) = ParseState.data_count ; 
3416 | 	
3417 | 	ParseState.data_count++ ;
3418 | }
3419 | 
3420 | /**
3421 |  * records parent as the parent of all its childs. This grows the 
3422 |  * parents list with a new vector. The first element of the new 
3423 |  * vector is the parent id, and other elements are childs id
3424 |  *
3425 |  * @param parent id of the parent expression
3426 |  * @param childs array of location information for all child symbols
3427 |  * @param nchilds number of childs
3428 |  */
3429 | static void recordParents( int parent, yyltype * childs, int nchilds){
3430 | 	
3431 | 	if( parent > ID_COUNT ){
3432 | 		growID(parent) ;
3433 | 	}
3434 | 	
3435 | 	/* some of the childs might be an empty token (like cr)
3436 | 	   which we do not want to track */
3437 | 	int ii;    /* loop index */
3438 | 	yyltype loc ;
3439 | 	for( ii=0; ii<nchilds; ii++){
3440 | 		loc = childs[ii] ;
3441 | 		if( loc.id == NA_INTEGER || (loc.first_line == loc.last_line && loc.first_byte > loc.last_byte) )
3442 | 			continue ;
3443 | 		/*  This shouldn't happen... */
3444 | 		if (loc.id < 0 || loc.id > identifier) {
3445 | 		    error(_("internal parser error at line %d"),  ParseState.xxlineno);
3446 | 		}
3447 | 		ID_PARENT( loc.id ) = parent;
3448 | 	}
3449 | 	
3450 | }
3451 | 
3452 | /**
3453 |  * The token pointed by the location has the wrong token type, 
3454 |  * This updates the type
3455 |  *
3456 |  * @param loc location information for the token to track
3457 |  */ 
3458 | static void modif_token( yyltype* loc, int tok ){
3459 | 	
3460 | 	int id = loc->id ;
3461 | 	
3462 | 	if (!ParseState.keepSrcRefs || !ParseState.keepParseData
3463 | 	    || id < 0 || id > ID_COUNT) return;
3464 | 	    
3465 | 	if( tok == SYMBOL_FUNCTION_CALL ){
3466 | 		// looking for first child of id
3467 | 		int j = ID_ID( id ) ;
3468 | 		int parent = id ;
3469 | 		
3470 | 		if (j < 0 || j > ID_COUNT)
3471 | 	            return;
3472 | 	            
3473 | 		while( ID_PARENT( _ID(j) ) != parent ){
3474 | 		    j-- ; 
3475 | 		    if (j < 0)
3476 | 	        	return;
3477 | 		}
3478 | 			
3479 | 		if( _TOKEN(j) == SYMBOL ){
3480 | 		    _TOKEN(j) = SYMBOL_FUNCTION_CALL ;
3481 | 		}
3482 | 		
3483 | 	} else{
3484 | 		_TOKEN( ID_ID(id) ) = tok ;
3485 | 	}
3486 | 	
3487 | }
3488 | 
3489 | /* this local version of lengthgets() always copies and doesn't fill with NA */
3490 | static SEXP lengthgets2(SEXP x, int len) {
3491 |     SEXP result;
3492 |     PROTECT(result = allocVector( TYPEOF(x), len ));
3493 |     
3494 |     len = (len < length(x)) ? len : length(x);
3495 |     switch(TYPEOF(x)) {
3496 |     	case INTSXP: 
3497 |     	    for (int i = 0; i < len; i++)
3498 |     	    	INTEGER(result)[i] = INTEGER(x)[i];
3499 | 	    for (int i = len; i < length(result); i++)
3500 | 		INTEGER(result)[i] = 0;
3501 |     	    break;
3502 |     	case STRSXP:
3503 |     	    for (int i = 0; i < len; i++)
3504 |     	    	SET_STRING_ELT(result, i, STRING_ELT(x, i));
3505 |     	    break;
3506 |     	default:
3507 | 	    UNIMPLEMENTED_TYPE("lengthgets2", x);
3508 |     }
3509 |     UNPROTECT(1); /* result */
3510 |     return result;
3511 | }
3512 | 
3513 | static void finalizeData( ){
3514 | 	
3515 |     int nloc = ParseState.data_count ;
3516 | 
3517 |     int i, j, id ;
3518 |     int parent ;
3519 | 
3520 |     /* store parents in the data */
3521 |     for( i=0; i<nloc; i++){
3522 | 	id = _ID(i);
3523 | 	parent = ID_PARENT( id ) ;
3524 | 	while( parent != 0 && ID_ID(parent) == 0 )
3525 | 	    parent = ID_PARENT( parent ) ;
3526 | 	_PARENT(i) = parent ;
3527 | 
3528 | #define FD_FAST_UPDATE_PARENTS
3529 | #ifdef FD_FAST_UPDATE_PARENTS
3530 | 	/*
3531 | 	   With long generated expressions, updating the parents can take
3532 | 	   a lot of time due to long chains of nodes not represented in the
3533 | 	   parse data. To reduce the overhead somewhat, we create shortcuts
3534 | 	   in the IDS array to point directly to the parent that is in the
3535 | 	   parse data.
3536 | 	*/
3537 | 	int data_parent = parent;
3538 | 	parent = ID_PARENT( id ) ;
3539 | 	while( parent != data_parent ){
3540 | 	    ID_PARENT( id ) = data_parent; /* set shortcut */
3541 | 	    id = parent;
3542 | 	    parent = ID_PARENT( parent );
3543 | 	}
3544 | #endif
3545 |     }
3546 | 
3547 |     /* attach comments to closest enclosing symbol */
3548 |     /* not updating ID_PARENT anymore */
3549 | 
3550 | #define FD_FAST_ASSIGN_COMMENTS
3551 | #ifdef FD_FAST_ASSIGN_COMMENTS
3552 |     /*
3553 |        All terminals (tokens) are ordered by start and end location, including
3554 |        the comments, in the data.
3555 | 
3556 |        All non-terminals, including to be found parents of the comments, are
3557 |        ordered by their end location. When they have the same end location
3558 |        in the code, they are ordered by their decreasing start location
3559 |        (children before parents).
3560 | 
3561 |        All terminals and non-terminals are also before their parents (if any),
3562 |        so a comment is also befor its parent in the data.
3563 | 
3564 |        Consequently: the first non-terminal after a comment that encloses the
3565 |        comment is its (immediate) parent. The original algorithm for every
3566 |        comment linearly searches for the first enclosing non-terminal and
3567 |        returns it, but it has quadratic complexity and dominates the whole
3568 |        parsing for long inputs (used when FD_FAST_ASSIGN_COMMENTS is not
3569 |        defined).
3570 | 
3571 |        This algorithm uses the parental information available on nodes that
3572 |        follow the comments. That information has been filled by the parser
3573 |        during reductions (but not for comments, because those are not in the
3574 |        grammar). A node following a comment is either the parent of the
3575 |        comment, or some of its parents are, or is an orphan.
3576 | 
3577 |        Note that a non-terminal may end before a terminal (e.g. comment) in the
3578 |        code but be after the terminal in the data (due to look-ahead). It seems
3579 |        that the parent of the comment has to be within parents of the
3580 |        non-terminal as well, but I am not sure how to prove it, so the algorithm
3581 |        just skips non-terminals preceding the comment in the code (so is not
3582 |        strictly linear).
3583 |       */
3584 | 
3585 |     for(i = nloc-1; i >= 0; i--) {
3586 | 	if (_TOKEN(i) == COMMENT) {
3587 | 	    int orphan = 1;
3588 | 	    int istartl = _FIRST_PARSED(i);
3589 | 	    int istartc = _FIRST_COLUMN(i);
3590 | 
3591 | 	    /* look for first node j that does not end before the comment i */
3592 | 	    for(j = i + 1; j < nloc && _LAST_PARSED(j) <= istartl; j++);
3593 | 
3594 | 	    if (j < nloc) {
3595 | 		for(;;) {
3596 | 		    int jstartl = _FIRST_PARSED(j);
3597 | 		    int jstartc = _FIRST_COLUMN(j);
3598 | 
3599 | 		    if (jstartl < istartl || (jstartl == istartl
3600 | 		                              && jstartc <= istartc)) {
3601 | 			/* j starts before or at the comment */
3602 | 			_PARENT(i) = _ID(j);
3603 | 			orphan = 0;
3604 | 			break;
3605 | 		    }
3606 | 		    /* find parent of j */
3607 | 		    int jparent = _PARENT(j);
3608 | 		    if (jparent == 0)
3609 | 			break; /* orphan */
3610 | 		    j = ID_ID(jparent);
3611 | 		}
3612 | 	    }
3613 | 	    if (orphan)
3614 | 		_PARENT(i) = 0;
3615 | 	}
3616 |     }
3617 | #else
3618 |     /* the original algorithm, which is slow for large inputs */
3619 | 
3620 |     int comment_line, comment_first_col;
3621 |     int this_first_parsed, this_last_parsed, this_first_col ;
3622 |     int orphan ;
3623 | 
3624 |     for( i=0; i<nloc; i++){
3625 | 	if( _TOKEN(i) == COMMENT ){
3626 | 	    comment_line = _FIRST_PARSED( i ) ;
3627 | 	    comment_first_col = _FIRST_COLUMN( i ) ;
3628 | 
3629 | 	    orphan = 1 ;
3630 | 	    for( j=i+1; j<nloc; j++){
3631 | 		this_first_parsed = _FIRST_PARSED( j ) ;
3632 | 		this_first_col = _FIRST_COLUMN( j ) ;
3633 | 		this_last_parsed  = _LAST_PARSED( j ) ;
3634 | 
3635 | 		/* the comment needs to start after the current symbol */
3636 | 		if( comment_line < this_first_parsed ) continue ;
3637 | 		if( (comment_line == this_first_parsed) & (comment_first_col < this_first_col) ) continue ;
3638 | 
3639 | 		/* the current symbol must finish after the comment */
3640 | 		if( this_last_parsed <= comment_line ) continue ; 
3641 | 
3642 | 		/* we have a match, record the parent and stop looking */
3643 | 		_PARENT(i) = _ID(j);
3644 | 		orphan = 0;
3645 | 		break ;
3646 | 	    }
3647 | 	    if(orphan){
3648 | 		_PARENT(i) = 0 ;
3649 | 	    }
3650 | 	}
3651 |     }
3652 | #endif
3653 | 
3654 | 
3655 |     /* now rework the parents of comments, we try to attach 
3656 |     comments that are not already attached (parent=0) to the next
3657 |     enclosing top-level expression */ 
3658 | 
3659 |     for( i=0; i<nloc; i++){
3660 | 	int token = _TOKEN(i); 
3661 | 	if( token == COMMENT && _PARENT(i) == 0 ){
3662 | 	    for( j=i; j<nloc; j++){
3663 | 		int token_j = _TOKEN(j); 
3664 | 		if( token_j == COMMENT ) continue ;
3665 | 		if( _PARENT(j) != 0 ) continue ;
3666 | 		_PARENT(i) = - _ID(j) ;
3667 | 		break ;
3668 | 	    }
3669 | 	}
3670 |     }
3671 | 
3672 |     /* attach the token names as an attribute so we don't need to switch to a dataframe, and decide on terminals */
3673 |     SEXP tokens;
3674 |     PROTECT(tokens = allocVector( STRSXP, nloc ) );
3675 |     for (int i=0; i<nloc; i++) {
3676 |         int token = _TOKEN(i);
3677 |         int xlat = yytranslate[token];
3678 |         if (xlat == 2) /* "unknown" */
3679 |             xlat = token;
3680 |         if (xlat < YYNTOKENS + YYNNTS)
3681 |     	    SET_STRING_ELT(tokens, i, mkChar(yytname[xlat]));
3682 |     	else { /* we have a token which doesn't have a name, e.g. an illegal character as in PR#15518 */
3683 |     	    char name[2];
3684 |     	    name[0] = (char) xlat;
3685 |     	    name[1] = 0;
3686 |     	    SET_STRING_ELT(tokens, i, mkChar(name));
3687 |     	}
3688 |     	_TERMINAL(i) = xlat < YYNTOKENS;
3689 |     }
3690 |     SEXP dims, newdata, newtext;
3691 |     if (nloc) {
3692 | 	PROTECT( newdata = lengthgets2(PS_DATA, nloc * DATA_ROWS));
3693 | 	PROTECT( newtext = lengthgets2(PS_TEXT, nloc));
3694 |     } else {
3695 | 	PROTECT( newdata = allocVector( INTSXP, 0));
3696 | 	PROTECT( newtext = allocVector( STRSXP, 0));
3697 |     }
3698 |     PROTECT( dims = allocVector( INTSXP, 2 ) ) ;
3699 |     INTEGER(dims)[0] = DATA_ROWS ;
3700 |     INTEGER(dims)[1] = nloc ;
3701 |     setAttrib( newdata, install( "dim" ), dims ) ;
3702 |     setAttrib( newdata, install("tokens"), tokens );
3703 |     setAttrib( newdata, install("text"), newtext );
3704 |     
3705 |     setAttrib(newdata, R_ClassSymbol, mkString("parseData"));
3706 |     
3707 |     /* Put it into the srcfile environment */
3708 |     if (isEnvironment(PS_SRCFILE))
3709 | 	defineVar(install("parseData"), newdata, PS_SRCFILE);
3710 |     UNPROTECT(4); /* tokens, newdata, newtext, dims */
3711 | }
3712 | 
3713 | /**
3714 |  * Grows the data
3715 |  */
3716 | static void growData(){
3717 | 	
3718 |     int new_data_count;	
3719 |     if (PS_DATA == R_NilValue) {
3720 |         new_data_count = INIT_DATA_COUNT;
3721 | 	PS_SET_DATA(allocVector(INTSXP, 0));
3722 | 	PS_SET_TEXT(allocVector(STRSXP, 0));
3723 |     } else
3724 |         new_data_count = 2*DATA_COUNT;
3725 | 	
3726 |     PS_SET_DATA(lengthgets2(PS_DATA, new_data_count * DATA_ROWS));
3727 |     PS_SET_TEXT(lengthgets2(PS_TEXT, new_data_count));
3728 | }
3729 | 
3730 | /**
3731 |  * Grows the ids vector so that ID_ID(target) can be called
3732 |  */
3733 | static void growID( int target ){
3734 | 	
3735 |     int new_count;
3736 |     if (PS_IDS == R_NilValue) {
3737 |         new_count = INIT_DATA_COUNT/2 - 1;
3738 |         PS_SET_IDS(allocVector(INTSXP, 0));
3739 |     } else
3740 |     	new_count = ID_COUNT;
3741 |     	
3742 |     while (target > new_count)
3743 |     	new_count = 2*new_count + 1;
3744 |     	
3745 |     if (new_count <= ID_COUNT)
3746 |     	return;
3747 |     
3748 |     int new_size = (1 + new_count)*2;
3749 |     PS_SET_IDS(lengthgets2(PS_IDS, new_size));
3750 | }
3751 | 


--------------------------------------------------------------------------------