├── 1 ├── index.php └── mb.c ├── description ├── example-results ├── vim7.3.035-ubuntu1104 │ └── bg3py.vim └── python3.2-3.2-ubuntu1104 │ ├── Parser │ ├── pgenmain.c │ └── grammar.c │ └── Python │ └── ast.c ├── diffs ├── grammar.c.diff ├── pgenmain.c.diff └── ast.c.diff ├── opit.py ├── makefile ├── TODO ├── README.md └── bg.py /description: -------------------------------------------------------------------------------- 1 | python 3 into bulgarian - operators, keywords, functions, types,... ; 2 | or other languages; 3 | (bg) питон 3 на български - оператори, функции и т.н. 4 | 5 | -------------------------------------------------------------------------------- /1/index.php: -------------------------------------------------------------------------------- 1 | 7 | 8 | -------------------------------------------------------------------------------- /example-results/vim7.3.035-ubuntu1104/bg3py.vim: -------------------------------------------------------------------------------- 1 | syn keyword pythonStatement Не Нищо Да 2 | syn keyword pythonStatement като осигури спри продължи изтрий exec глобални 3 | syn keyword pythonStatement ламбда нелокални карай print върни със предай 4 | syn keyword pythonStatement клас деф nextgroup=pythonFunction skipwhite 5 | syn keyword pythonConditional инако иначе ако 6 | syn keyword pythonRepeat за докато 7 | syn keyword pythonOperator и във е не или 8 | syn keyword pythonException освен винаги вдигни опитай 9 | syn keyword pythonInclude от внеси 10 | syn clear pythonFunction 11 | syn match pythonFunction "\%(\%(def\s\|деф\s\|class\s\|клас\s\|@\)\s*\)\@<=[А-Яа-яA-Za-z_]\%([А-Яа-яA-Za-z_0-9]\|\.\)*" contained 12 | -------------------------------------------------------------------------------- /diffs/grammar.c.diff: -------------------------------------------------------------------------------- 1 | --- old/Parser/grammar.c 2010-05-09 18:52:27.000000000 +0300 2 | +++ new/Parser/grammar.c 2011-07-11 11:45:00.000000000 +0300 3 | @@ -9,6 +9,9 @@ 4 | #include "token.h" 5 | #include "grammar.h" 6 | 7 | +#include 8 | +extern int Py_USELOCALE; 9 | + 10 | extern int Py_DebugFlag; 11 | 12 | grammar * 13 | @@ -179,7 +182,15 @@ 14 | } 15 | 16 | if (lb->lb_type == STRING) { 17 | - if (isalpha(Py_CHARMASK(lb->lb_str[1])) || 18 | + int alpha; 19 | + if (Py_USELOCALE) { 20 | + wchar_t x=0; 21 | + mbtowc( &x, lb->lb_str+1, strlen( lb->lb_str+1) ); 22 | + alpha = iswalpha( x); 23 | + } else 24 | + alpha = isalpha(Py_CHARMASK(lb->lb_str[1])); 25 | + 26 | + if (alpha || 27 | lb->lb_str[1] == '_') { 28 | char *p; 29 | char *src; 30 | -------------------------------------------------------------------------------- /1/mb.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | wchar_t * 9 | mbstowcs_alloc (const char *string) 10 | { 11 | size_t size = strlen (string) + 1; 12 | wchar_t *buf = (wchar_t *)malloc (size * sizeof (wchar_t)); 13 | 14 | size = mbstowcs (buf, string, size); 15 | if (size == (size_t) -1) 16 | return NULL; 17 | buf = (wchar_t *)realloc (buf, (size + 1) * sizeof (wchar_t)); 18 | return buf; 19 | } 20 | 21 | int 22 | main(int argc, char **argv) 23 | { 24 | printf( "loca %s\n", setlocale( LC_ALL, "") ); 25 | while (++argv,--argc) { 26 | wchar_t * b = mbstowcs_alloc( *argv) ; 27 | printf( "isa %s %d %d\n", *argv, 28 | *b, 29 | iswalpha( 30 | *b 31 | ) 32 | ); 33 | } 34 | } 35 | 36 | -------------------------------------------------------------------------------- /diffs/pgenmain.c.diff: -------------------------------------------------------------------------------- 1 | --- old/Parser//pgenmain.c 2010-12-29 01:05:20.000000000 +0200 2 | +++ new/Parser/pgenmain.c 2011-07-11 11:43:48.000000000 +0300 3 | @@ -22,6 +22,9 @@ 4 | #include "parsetok.h" 5 | #include "pgen.h" 6 | 7 | +#include 8 | +int Py_USELOCALE; 9 | + 10 | int Py_DebugFlag; 11 | int Py_VerboseFlag; 12 | int Py_IgnoreEnvironmentFlag; 13 | @@ -42,9 +45,16 @@ 14 | FILE *fp; 15 | char *filename, *graminit_h, *graminit_c; 16 | 17 | + Py_DebugFlag= !!getenv("genDBG"); 18 | + Py_USELOCALE= !!getenv("genUSELOCALE"); 19 | + if (Py_USELOCALE) 20 | + printf( "loca %s\n", setlocale( LC_ALL, "") ); 21 | + 22 | if (argc != 4) { 23 | fprintf(stderr, 24 | "usage: %s grammar graminit.h graminit.c\n", argv[0]); 25 | + fprintf(stderr, 26 | + " do setenv genUSELOCALE=1 or genDBG=1 to turn them on\n"); 27 | Py_Exit(2); 28 | } 29 | filename = argv[1]; 30 | -------------------------------------------------------------------------------- /opit.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | клас инфо: 4 | ъ = 34 5 | @classmethod 6 | деф главна( аз): 7 | print( аз.име_от_превод( 'аааа.bg')) 8 | @classmethod 9 | деф име_от_превод( кл, *имена ): 10 | return имена 11 | 12 | ако __name__ == '__main__': 13 | 14 | клас филминфо( инфо): 15 | @classmethod 16 | деф име_от_превод( кл, име ): 17 | имена = [ име ] 18 | ако име.endswith('.bg'): имена.append( име[:-3]) 19 | инако име.endswith('.ru'): имена.append( име[:-3]) 20 | иначе: 21 | имена.append( име+'.bg') 22 | имена.append( име+'.ru') 23 | върни инфо.име_от_превод( *имена) 24 | 25 | деф самопопълва_етикети( аз): 26 | ако аз.етикетер.звук: върни 27 | автор = аз.етикетер.автор 28 | ако не автор: върни 29 | р = автор.lower() във аз._заместители_по_стойност.звук 30 | ако р: аз.слага_етикет( аз.стойности.звук, автор) 31 | #иначе: 32 | # р = аз.етикетер.автор във ' '.join( аз.автори_ан).split() 33 | # ако р: аз.слага_етикет( аз.стойности.звук, аз.стойности.ан) 34 | върни р 35 | 36 | филминфо.главна() 37 | 38 | # vim:ts=4:sw=4:expandtab 39 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # svilendobrev.com 2 | 3 | TARGET ?= /usr/src/python3.2-3.2/ 4 | 5 | 6 | #configure , patch , pgen 7 | now: pgen 8 | all: now 9 | $(MAKE) -C $(TARGET) GRAMMAR_INPUT=$(GRAMMAR_INPUT) 10 | 11 | pgen: patch grammar #make pgen then make grammar 12 | rm -f $(TARGET)/Parser/pgen.stamp 13 | export genUSELOCALE=1; $(MAKE) -C $(TARGET) GRAMMAR_INPUT=$(GRAMMAR_INPUT) Parser/pgen.stamp 14 | 15 | configure=$(TARGET)/Makefile 16 | 17 | patch: patch.ok 18 | patch.ok: $(configure) 19 | - cat diffs/*.diff | patch -p 1 -d $(TARGET) 20 | touch $@ 21 | 22 | 23 | BGPY = PYTHONPATH=~/src/bin/util/ python3 bg.py 24 | 25 | $(configure): 26 | cd $(TARGET); ./configure 27 | 28 | orgGRAMMAR_INPUT= $(TARGET)/Grammar/Grammar 29 | GRAMMAR_INPUT = $(TARGET)/Grammar/bgGrammar 30 | 31 | grammar: $(GRAMMAR_INPUT) 32 | $(GRAMMAR_INPUT): $(orgGRAMMAR_INPUT) bg.py 33 | $(BGPY) --grammar < $< > $@ 34 | 35 | 36 | PGEN = Parser/pgen 37 | GRAMMAR_H= Include/graminit.h 38 | GRAMMAR_C= Python/graminit.c 39 | 40 | grammargen: 41 | $(PGEN) $(GRAMMAR_INPUT) $(GRAMMAR_H) $(GRAMMAR_C) 42 | 43 | vim: bg3py.vim 44 | bg3py.vim: bg.py 45 | $(BGPY) --vim < /usr/share/vim/vim73/syntax/python.vim > $@ 46 | cat $@ 47 | 48 | diff: 49 | diff -Naur $(TARGET)/$(DIFF) ./$(DIFF) 50 | 51 | opit: 52 | $(TARGET)/python opit.py 53 | # vim:ts=4:sw=4:noexpandtab 54 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | TODO 2 | * + python.vim 3 | * + ast.c, .... strcmp, "\w+", '\w' 4 | * ast.c/diff also via bg.py 5 | * syntax error - points wrong place because of UTF ; see http://bugs.python.org/issue2382 6 | * translating: 7 | * error messages: Python/errors.c , Objects/exceptions.c 8 | * probably via (exposed) dictionary when printing the error, thus keep the Exception intact 9 | * strings to map: look for PyErr_Format 10 | * see PyTypeObject.tp_name, .tp_doc; 11 | * methods of base structural bricks: str, list, dict, set, .. 12 | * append to existing docstrings AND extend the methods with copy but translated names 13 | * look for PyMethodDef ..[] and PyDoc_STRVAR: e.g. Objects/dictobject.c#mapp_methods 14 | * for now, leave __ (protocol) names alone (e.g. __contains__ etc) 15 | * builtin functions - len, locals, isinstance, issubclass, classmethod?, staticmethod?, 16 | enumerate, min, max, sum, super, repr, sorted, reversed, open, 17 | * Python/bltinmodule.c 18 | * super is in Objects/typeobject.c 19 | * builtin types - str, bool, int, float, tuple, type, Exception, Warning, 20 | * Python/bltinmodule.c#_PyBuiltin_Init(void) 21 | 22 | * some way of help for the translated-mapping in effect 23 | * ? multiple alternatives/aliases - easy 24 | * ? 1251 / UTF - switching encoding 25 | 26 | * helper for translating such ready code to other languages: extract names/identifiers/comments 27 | * docstring/error translations can become core of python-doc-translation - or vice-versa if any 28 | 29 | # svilendobrev.com 30 | # vim:ts=4:sw=4:expandtab 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Welcome to smok 2 | =============== 3 | 4 | Translated Python 3 clone: replacing english words (operators, names, funcs, messages - if,else,for,..) with another language (Bulgarian - ако,иначе,за). 5 | For kids and people to think and write in their own language, instead of suffer with the alternatives - near-"english", transliteration into latin, mixed latin-cyrilic. 6 | 7 | Преведен Питон 3: замества английските думи (оператори, имена и съобщения - if,else,for,..) с друг език (Български - ако,иначе,за). 8 | За деца, и хора които искат да мислят и пишат на собствения си език - вместо да се мъчат с алтернативите - почти-"английски", български на латиница или смес от кирилица и латиница. 9 | 10 | 11 | -------- 12 | 13 | this is attempt at translated python, into using bulgarian keywords - replacing the english (if,else,for,..). 14 | 15 | * why: 16 | * python3 allows any identifiers out of the box, but reading the mixed english-bulgarian result is.. nonsense. 17 | * i prefer to express my self in my language. and just then eventualy think of translation, if at all. and Nomen est omen. Even if i think correctly i may put the wrong english word. So it might be better to translate the code (identifiers+comments) properly after the concept is well expressed. And using english/latin alphabet for transliteraion is not good. 18 | * kids dont know english, nor the specific terms 19 | * what: 20 | * all reserved keywords 21 | * eventualy: 22 | * most builtin functions and types 23 | * the most used types' methods (like str.upper, list.append, dict.pop) 24 | * vim - syntax addition 25 | * how: 26 | * recompiles python 3, with appended grammar etc 27 | * both english and bulgarian (UTF8) wordings are usable 28 | * it's simple, other translations can be done (in other languages or charsets) 29 | 30 | 31 | добре дошъл при смока 32 | ===================== 33 | 34 | това е опит за преведен питон, ползващ български думи (вместо if,else,for - напр. ако,иначе,за) 35 | 36 | * защо: 37 | * питон 3 поддържа имена на всякакви езици, но резултатната англо-българска смесица е.. безсмислена. 38 | * предпочитам да се изразявам на собствения си език. и чак после да мисля за превод, ако изобщо. а Имената са Всичко. Дори и да мисля правилно, може да сложа грешна чужда дума. Така че по-добре да се преведе кода (променливи + коментари) впоследствие, след като е вече измислен. А да се ползва английска/латинска азбука с транслитерация т.е. шльокавица, си е отврат. 39 | * децата не знаят английски, нито пък значението на термините 40 | * какво: 41 | * всички запазени думи (class, if, ...) 42 | * евентуално, имената на: 43 | * повечето вградени функции и типове 44 | * методите на най-ползваните типове (напр. str.upper, list.append, dict.pop) 45 | * vim: допълнителен оцветител на синтаксиса 46 | * как: 47 | * прекомпилира се питон 3, с допълнена граматика и пр. 48 | * достъпни са едновременно и българските (UTF8) и английските имена 49 | * схемата е проста, може да се направят и други преводи (към езици или кодови таблици) 50 | 51 | ``` 52 | клас Филминфо( Инфо): #сега: почти преведено / now: almost translated 53 | @classmethod 54 | деф сглоби_име( кл, име ): 55 | имена = [ име ] 56 | ако име.endswith('.bg'): имена.append( име[:-3]) 57 | инако име.endswith('.ru'): имена.append( име[:-3]) 58 | иначе: 59 | имена.append( име+'.bg') 60 | имена.append( име+'.ru') 61 | върни инфо.име_от_превод( *имена) 62 | 63 | 64 | клас Филминфо( Инфо): #TODO: изцяло преведено / fully translated 65 | @класметод 66 | деф сглоби_име( кл, име ): 67 | имена = [ име ] 68 | ако име.завършва('.bg'): имена.допълни( име[:-3]) 69 | инако име.завършва('.ru'): имена.допълни( име[:-3]) 70 | иначе: имена.допълни( име) 71 | върни Инфо.сглоби_име( *имена) 72 | 73 | 74 | class Filminfo( Info): #изцяло на английски / fully in english 75 | @classmethod 76 | def make_name( kl, name): 77 | names = [ name] 78 | if name.endswith('.bg'): names.append( ime[:-3]) 79 | elif name.endswith('.ru'): names.append( име[:-3]) 80 | else: names.append( ime) 81 | return Info.make_name( *names) 82 | 83 | ``` 84 | 85 | : http://smok.sourceforge.net/ 86 | -------------------------------------------------------------------------------- /example-results/python3.2-3.2-ubuntu1104/Parser/pgenmain.c: -------------------------------------------------------------------------------- 1 | 2 | /* Parser generator main program */ 3 | 4 | /* This expects a filename containing the grammar as argv[1] (UNIX) 5 | or asks the console for such a file name (THINK C). 6 | It writes its output on two files in the current directory: 7 | - "graminit.c" gets the grammar as a bunch of initialized data 8 | - "graminit.h" gets the grammar's non-terminals as #defines. 9 | Error messages and status info during the generation process are 10 | written to stdout, or sometimes to stderr. */ 11 | 12 | /* XXX TO DO: 13 | - check for duplicate definitions of names (instead of fatal err) 14 | */ 15 | 16 | #define PGEN 17 | 18 | #include "Python.h" 19 | #include "pgenheaders.h" 20 | #include "grammar.h" 21 | #include "node.h" 22 | #include "parsetok.h" 23 | #include "pgen.h" 24 | 25 | #include 26 | int Py_USELOCALE; 27 | 28 | int Py_DebugFlag; 29 | int Py_VerboseFlag; 30 | int Py_IgnoreEnvironmentFlag; 31 | 32 | /* Forward */ 33 | grammar *getgrammar(char *filename); 34 | 35 | void 36 | Py_Exit(int sts) 37 | { 38 | exit(sts); 39 | } 40 | 41 | int 42 | main(int argc, char **argv) 43 | { 44 | grammar *g; 45 | FILE *fp; 46 | char *filename, *graminit_h, *graminit_c; 47 | 48 | Py_DebugFlag= !!getenv("genDBG"); 49 | Py_USELOCALE= !!getenv("genUSELOCALE"); 50 | if (Py_USELOCALE) 51 | printf( "loca %s\n", setlocale( LC_ALL, "") ); 52 | 53 | if (argc != 4) { 54 | fprintf(stderr, 55 | "usage: %s grammar graminit.h graminit.c\n", argv[0]); 56 | fprintf(stderr, 57 | " do setenv genUSELOCALE=1 or genDBG=1 to turn them on\n"); 58 | Py_Exit(2); 59 | } 60 | filename = argv[1]; 61 | graminit_h = argv[2]; 62 | graminit_c = argv[3]; 63 | g = getgrammar(filename); 64 | fp = fopen(graminit_c, "w"); 65 | if (fp == NULL) { 66 | perror(graminit_c); 67 | Py_Exit(1); 68 | } 69 | if (Py_DebugFlag) 70 | printf("Writing %s ...\n", graminit_c); 71 | printgrammar(g, fp); 72 | fclose(fp); 73 | fp = fopen(graminit_h, "w"); 74 | if (fp == NULL) { 75 | perror(graminit_h); 76 | Py_Exit(1); 77 | } 78 | if (Py_DebugFlag) 79 | printf("Writing %s ...\n", graminit_h); 80 | printnonterminals(g, fp); 81 | fclose(fp); 82 | Py_Exit(0); 83 | return 0; /* Make gcc -Wall happy */ 84 | } 85 | 86 | grammar * 87 | getgrammar(char *filename) 88 | { 89 | FILE *fp; 90 | node *n; 91 | grammar *g0, *g; 92 | perrdetail err; 93 | 94 | fp = fopen(filename, "r"); 95 | if (fp == NULL) { 96 | perror(filename); 97 | Py_Exit(1); 98 | } 99 | g0 = meta_grammar(); 100 | n = PyParser_ParseFile(fp, filename, g0, g0->g_start, 101 | (char *)NULL, (char *)NULL, &err); 102 | fclose(fp); 103 | if (n == NULL) { 104 | fprintf(stderr, "Parsing error %d, line %d.\n", 105 | err.error, err.lineno); 106 | if (err.text != NULL) { 107 | size_t i; 108 | fprintf(stderr, "%s", err.text); 109 | i = strlen(err.text); 110 | if (i == 0 || err.text[i-1] != '\n') 111 | fprintf(stderr, "\n"); 112 | for (i = 0; i < err.offset; i++) { 113 | if (err.text[i] == '\t') 114 | putc('\t', stderr); 115 | else 116 | putc(' ', stderr); 117 | } 118 | fprintf(stderr, "^\n"); 119 | PyObject_FREE(err.text); 120 | } 121 | Py_Exit(1); 122 | } 123 | g = pgen(n); 124 | if (g == NULL) { 125 | printf("Bad grammar.\n"); 126 | Py_Exit(1); 127 | } 128 | return g; 129 | } 130 | 131 | /* Can't happen in pgen */ 132 | PyObject* 133 | PyErr_Occurred() 134 | { 135 | return 0; 136 | } 137 | 138 | void 139 | Py_FatalError(const char *msg) 140 | { 141 | fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg); 142 | Py_Exit(1); 143 | } 144 | 145 | /* No-nonsense my_readline() for tokenizer.c */ 146 | 147 | char * 148 | PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) 149 | { 150 | size_t n = 1000; 151 | char *p = (char *)PyMem_MALLOC(n); 152 | char *q; 153 | if (p == NULL) 154 | return NULL; 155 | fprintf(stderr, "%s", prompt); 156 | q = fgets(p, n, sys_stdin); 157 | if (q == NULL) { 158 | *p = '\0'; 159 | return p; 160 | } 161 | n = strlen(p); 162 | if (n > 0 && p[n-1] != '\n') 163 | p[n-1] = '\n'; 164 | return (char *)PyMem_REALLOC(p, n+1); 165 | } 166 | 167 | /* No-nonsense fgets */ 168 | char * 169 | Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) 170 | { 171 | return fgets(buf, n, stream); 172 | } 173 | 174 | 175 | #include 176 | 177 | void 178 | PySys_WriteStderr(const char *format, ...) 179 | { 180 | va_list va; 181 | 182 | va_start(va, format); 183 | vfprintf(stderr, format, va); 184 | va_end(va); 185 | } 186 | -------------------------------------------------------------------------------- /diffs/ast.c.diff: -------------------------------------------------------------------------------- 1 | --- old/Python/ast.c 2010-11-30 11:41:01.000000000 +0200 2 | +++ new/Python/ast.c 2011-07-11 18:28:59.000000000 +0300 3 | @@ -14,6 +14,15 @@ 4 | 5 | #include 6 | 7 | +static const char bg_else[] = "иначе"; 8 | +static const char bg_elif[] = "инако"; 9 | +static const char bg_in[] = "във"; 10 | +static const char bg_and[] = "и"; 11 | +static const char bg_is[] = "е"; 12 | +static const char bg_finally[] = "винаги"; 13 | +#define streq( x, name) ( !strcmp( x, #name) || !strcmp( x, bg_##name) ) 14 | + 15 | + 16 | /* Data structure used internally */ 17 | struct compiling { 18 | char *c_encoding; /* source encoding */ 19 | @@ -374,6 +383,34 @@ 20 | NULL, 21 | }; 22 | 23 | +static const Py_UNICODE ubgNone[] = { 1053, 1080, 1097, 1086, 0}; 24 | +static const Py_UNICODE ubgTrue[] = { 1044, 1072, 0}; 25 | +static const Py_UNICODE ubgFalse[]= { 1053, 1077, 0}; 26 | + 27 | +static const Py_UNICODE * uFORBIDDEN[] = { 28 | + ubgNone, 29 | + ubgTrue, 30 | + ubgFalse, 31 | + NULL 32 | +}; 33 | + 34 | +int 35 | +PyUnicode_CompareWithUnicode( PyObject* uni, const Py_UNICODE * str) 36 | +{ /* same as PyUnicode_CompareWithASCIIString */ 37 | + int i; 38 | + Py_UNICODE *id; 39 | + assert(PyUnicode_Check(uni)); 40 | + id = PyUnicode_AS_UNICODE(uni); 41 | + for (i = 0; id[i] && str[i]; i++) 42 | + if (id[i] != str[i]) 43 | + return (id[i] < str[i]) ? -1 : 1; 44 | + if (PyUnicode_GET_SIZE(uni) != i || id[i]) 45 | + return 1; /* uni is longer */ 46 | + if (str[i]) 47 | + return -1; /* str is longer */ 48 | + return 0; 49 | +} 50 | + 51 | static int 52 | forbidden_name(identifier name, const node *n, int full_checks) 53 | { 54 | @@ -390,6 +427,13 @@ 55 | return 1; 56 | } 57 | } 58 | + const Py_UNICODE **u; 59 | + for (u = uFORBIDDEN; *u; u++) { 60 | + if (PyUnicode_CompareWithUnicode( name, *u) ==0) { 61 | + ast_error(n, "assignment to keyword"); 62 | + return 1; 63 | + } 64 | + } 65 | } 66 | return 0; 67 | } 68 | @@ -585,9 +629,9 @@ 69 | case NOTEQUAL: 70 | return NotEq; 71 | case NAME: 72 | - if (strcmp(STR(n), "in") == 0) 73 | + if (streq(STR(n), in)) 74 | return In; 75 | - if (strcmp(STR(n), "is") == 0) 76 | + if (streq(STR(n), is)) 77 | return Is; 78 | default: 79 | PyErr_Format(PyExc_SystemError, "invalid comp_op: %s", 80 | @@ -599,9 +643,9 @@ 81 | /* handle "not in" and "is not" */ 82 | switch (TYPE(CHILD(n, 0))) { 83 | case NAME: 84 | - if (strcmp(STR(CHILD(n, 1)), "in") == 0) 85 | + if (streq(STR(CHILD(n, 1)), in)) 86 | return NotIn; 87 | - if (strcmp(STR(CHILD(n, 0)), "is") == 0) 88 | + if (streq(STR(CHILD(n, 0)), is)) 89 | return IsNot; 90 | default: 91 | PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s", 92 | @@ -1816,10 +1860,10 @@ 93 | return NULL; 94 | asdl_seq_SET(seq, i / 2, e); 95 | } 96 | - if (!strcmp(STR(CHILD(n, 1)), "and")) 97 | + if (streq(STR(CHILD(n, 1)), and)) 98 | return BoolOp(And, seq, LINENO(n), n->n_col_offset, 99 | c->c_arena); 100 | - assert(!strcmp(STR(CHILD(n, 1)), "or")); 101 | + assert( streq(STR(CHILD(n, 1)), or)); 102 | return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena); 103 | case not_test: 104 | if (NCH(n) == 1) { 105 | @@ -2633,6 +2677,7 @@ 106 | return seq; 107 | } 108 | 109 | + 110 | static stmt_ty 111 | ast_for_if_stmt(struct compiling *c, const node *n) 112 | { 113 | @@ -2663,7 +2708,7 @@ 114 | 's' for el_s_e, or 115 | 'i' for el_i_f 116 | */ 117 | - if (s[2] == 's') { 118 | + if (streq( s, else)) { 119 | expr_ty expression; 120 | asdl_seq *seq1, *seq2; 121 | 122 | @@ -2680,7 +2725,7 @@ 123 | return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, 124 | c->c_arena); 125 | } 126 | - else if (s[2] == 'i') { 127 | + else if (streq( s, elif)) { 128 | int i, n_elif, has_else = 0; 129 | expr_ty expression; 130 | asdl_seq *suite_seq; 131 | @@ -2689,7 +2734,7 @@ 132 | /* must reference the child n_elif+1 since 'else' token is third, 133 | not fourth, child from the end. */ 134 | if (TYPE(CHILD(n, (n_elif + 1))) == NAME 135 | - && STR(CHILD(n, (n_elif + 1)))[2] == 's') { 136 | + && streq( STR(CHILD(n, (n_elif + 1))), else)) { 137 | has_else = 1; 138 | n_elif -= 3; 139 | } 140 | @@ -2901,7 +2946,7 @@ 141 | return NULL; 142 | 143 | if (TYPE(CHILD(n, nch - 3)) == NAME) { 144 | - if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) { 145 | + if (streq(STR(CHILD(n, nch - 3)), finally)) { 146 | if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) { 147 | /* we can assume it's an "else", 148 | because nch >= 9 for try-else-finally and 149 | -------------------------------------------------------------------------------- /example-results/python3.2-3.2-ubuntu1104/Parser/grammar.c: -------------------------------------------------------------------------------- 1 | 2 | /* Grammar implementation */ 3 | 4 | #include "Python.h" 5 | #include "pgenheaders.h" 6 | 7 | #include 8 | 9 | #include "token.h" 10 | #include "grammar.h" 11 | 12 | #include 13 | extern int Py_USELOCALE; 14 | 15 | extern int Py_DebugFlag; 16 | 17 | grammar * 18 | newgrammar(int start) 19 | { 20 | grammar *g; 21 | 22 | g = (grammar *)PyObject_MALLOC(sizeof(grammar)); 23 | if (g == NULL) 24 | Py_FatalError("no mem for new grammar"); 25 | g->g_ndfas = 0; 26 | g->g_dfa = NULL; 27 | g->g_start = start; 28 | g->g_ll.ll_nlabels = 0; 29 | g->g_ll.ll_label = NULL; 30 | g->g_accel = 0; 31 | return g; 32 | } 33 | 34 | dfa * 35 | adddfa(grammar *g, int type, char *name) 36 | { 37 | dfa *d; 38 | 39 | g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, 40 | sizeof(dfa) * (g->g_ndfas + 1)); 41 | if (g->g_dfa == NULL) 42 | Py_FatalError("no mem to resize dfa in adddfa"); 43 | d = &g->g_dfa[g->g_ndfas++]; 44 | d->d_type = type; 45 | d->d_name = strdup(name); 46 | d->d_nstates = 0; 47 | d->d_state = NULL; 48 | d->d_initial = -1; 49 | d->d_first = NULL; 50 | return d; /* Only use while fresh! */ 51 | } 52 | 53 | int 54 | addstate(dfa *d) 55 | { 56 | state *s; 57 | 58 | d->d_state = (state *)PyObject_REALLOC(d->d_state, 59 | sizeof(state) * (d->d_nstates + 1)); 60 | if (d->d_state == NULL) 61 | Py_FatalError("no mem to resize state in addstate"); 62 | s = &d->d_state[d->d_nstates++]; 63 | s->s_narcs = 0; 64 | s->s_arc = NULL; 65 | s->s_lower = 0; 66 | s->s_upper = 0; 67 | s->s_accel = NULL; 68 | s->s_accept = 0; 69 | return s - d->d_state; 70 | } 71 | 72 | void 73 | addarc(dfa *d, int from, int to, int lbl) 74 | { 75 | state *s; 76 | arc *a; 77 | 78 | assert(0 <= from && from < d->d_nstates); 79 | assert(0 <= to && to < d->d_nstates); 80 | 81 | s = &d->d_state[from]; 82 | s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); 83 | if (s->s_arc == NULL) 84 | Py_FatalError("no mem to resize arc list in addarc"); 85 | a = &s->s_arc[s->s_narcs++]; 86 | a->a_lbl = lbl; 87 | a->a_arrow = to; 88 | } 89 | 90 | int 91 | addlabel(labellist *ll, int type, char *str) 92 | { 93 | int i; 94 | label *lb; 95 | 96 | for (i = 0; i < ll->ll_nlabels; i++) { 97 | if (ll->ll_label[i].lb_type == type && 98 | strcmp(ll->ll_label[i].lb_str, str) == 0) 99 | return i; 100 | } 101 | ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, 102 | sizeof(label) * (ll->ll_nlabels + 1)); 103 | if (ll->ll_label == NULL) 104 | Py_FatalError("no mem to resize labellist in addlabel"); 105 | lb = &ll->ll_label[ll->ll_nlabels++]; 106 | lb->lb_type = type; 107 | lb->lb_str = strdup(str); 108 | if (Py_DebugFlag) 109 | printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, 110 | PyGrammar_LabelRepr(lb)); 111 | return lb - ll->ll_label; 112 | } 113 | 114 | /* Same, but rather dies than adds */ 115 | 116 | int 117 | findlabel(labellist *ll, int type, char *str) 118 | { 119 | int i; 120 | 121 | for (i = 0; i < ll->ll_nlabels; i++) { 122 | if (ll->ll_label[i].lb_type == type /*&& 123 | strcmp(ll->ll_label[i].lb_str, str) == 0*/) 124 | return i; 125 | } 126 | fprintf(stderr, "Label %d/'%s' not found\n", type, str); 127 | Py_FatalError("grammar.c:findlabel()"); 128 | return 0; /* Make gcc -Wall happy */ 129 | } 130 | 131 | /* Forward */ 132 | static void translabel(grammar *, label *); 133 | 134 | void 135 | translatelabels(grammar *g) 136 | { 137 | int i; 138 | 139 | #ifdef Py_DEBUG 140 | printf("Translating labels ...\n"); 141 | #endif 142 | /* Don't translate EMPTY */ 143 | for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) 144 | translabel(g, &g->g_ll.ll_label[i]); 145 | } 146 | 147 | static void 148 | translabel(grammar *g, label *lb) 149 | { 150 | int i; 151 | 152 | if (Py_DebugFlag) 153 | printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); 154 | 155 | if (lb->lb_type == NAME) { 156 | for (i = 0; i < g->g_ndfas; i++) { 157 | if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { 158 | if (Py_DebugFlag) 159 | printf( 160 | "Label %s is non-terminal %d.\n", 161 | lb->lb_str, 162 | g->g_dfa[i].d_type); 163 | lb->lb_type = g->g_dfa[i].d_type; 164 | free(lb->lb_str); 165 | lb->lb_str = NULL; 166 | return; 167 | } 168 | } 169 | for (i = 0; i < (int)N_TOKENS; i++) { 170 | if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { 171 | if (Py_DebugFlag) 172 | printf("Label %s is terminal %d.\n", 173 | lb->lb_str, i); 174 | lb->lb_type = i; 175 | free(lb->lb_str); 176 | lb->lb_str = NULL; 177 | return; 178 | } 179 | } 180 | printf("Can't translate NAME label '%s'\n", lb->lb_str); 181 | return; 182 | } 183 | 184 | if (lb->lb_type == STRING) { 185 | int alpha; 186 | if (Py_USELOCALE) { 187 | wchar_t x=0; 188 | mbtowc( &x, lb->lb_str+1, strlen( lb->lb_str+1) ); 189 | alpha = iswalpha( x); 190 | } else 191 | alpha = isalpha(Py_CHARMASK(lb->lb_str[1])); 192 | 193 | if (alpha || 194 | lb->lb_str[1] == '_') { 195 | char *p; 196 | char *src; 197 | char *dest; 198 | size_t name_len; 199 | if (Py_DebugFlag) 200 | printf("Label %s is a keyword\n", lb->lb_str); 201 | lb->lb_type = NAME; 202 | src = lb->lb_str + 1; 203 | p = strchr(src, '\''); 204 | if (p) 205 | name_len = p - src; 206 | else 207 | name_len = strlen(src); 208 | dest = (char *)malloc(name_len + 1); 209 | if (!dest) { 210 | printf("Can't alloc dest '%s'\n", src); 211 | return; 212 | } 213 | strncpy(dest, src, name_len); 214 | dest[name_len] = '\0'; 215 | free(lb->lb_str); 216 | lb->lb_str = dest; 217 | } 218 | else if (lb->lb_str[2] == lb->lb_str[0]) { 219 | int type = (int) PyToken_OneChar(lb->lb_str[1]); 220 | if (type != OP) { 221 | lb->lb_type = type; 222 | free(lb->lb_str); 223 | lb->lb_str = NULL; 224 | } 225 | else 226 | printf("Unknown OP label %s\n", 227 | lb->lb_str); 228 | } 229 | else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { 230 | int type = (int) PyToken_TwoChars(lb->lb_str[1], 231 | lb->lb_str[2]); 232 | if (type != OP) { 233 | lb->lb_type = type; 234 | free(lb->lb_str); 235 | lb->lb_str = NULL; 236 | } 237 | else 238 | printf("Unknown OP label %s\n", 239 | lb->lb_str); 240 | } 241 | else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { 242 | int type = (int) PyToken_ThreeChars(lb->lb_str[1], 243 | lb->lb_str[2], 244 | lb->lb_str[3]); 245 | if (type != OP) { 246 | lb->lb_type = type; 247 | free(lb->lb_str); 248 | lb->lb_str = NULL; 249 | } 250 | else 251 | printf("Unknown OP label %s\n", 252 | lb->lb_str); 253 | } 254 | else 255 | printf("Can't translate STRING label %s\n", 256 | lb->lb_str); 257 | } 258 | else 259 | printf("Can't translate label '%s'\n", 260 | PyGrammar_LabelRepr(lb)); 261 | } 262 | -------------------------------------------------------------------------------- /bg.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #for p in sorted( set(re.findall( "'\w+'", file( '/home/az/pygrammar').read()))): print p[1:-1] 3 | 4 | translation_constants = ''' 5 | False Не Лъжа 6 | None Нищо 7 | True Да Истина 8 | ''' 9 | 10 | translation = translation_constants + ''' 11 | and и 12 | as като 13 | assert осигури 14 | break спри 15 | class клас 16 | continue продължи 17 | def деф 18 | del изтрий 19 | elif инако 20 | else иначе 21 | except освен 22 | finally винаги задължително 23 | for за всяко всички 24 | from от 25 | global глобални 26 | if ако 27 | import внеси 28 | in във 29 | is е 30 | lambda ламбда 31 | nonlocal нелокални 32 | not не 33 | or или 34 | pass карай 35 | raise вдигни 36 | return върни 37 | try опитай 38 | while докато 39 | with със 40 | yield предай произведи добий 41 | ''' 42 | extra_charset = 'А-Яа-я' 43 | 44 | 45 | p2b = dict( a.strip().split()[:2] 46 | for a in translation.split('\n') 47 | if a.strip() ) 48 | 49 | import sys, re 50 | from svd_util import optz 51 | optz.bool( 'lat2cyr', default= False) 52 | optz.bool( 'grammar', default= False) 53 | optz.bool( 'vim', default= False) 54 | #optz.bool( 'all_variants', default= False, help= 'allow all alternatives; default: only first listed') 55 | optz.bool( 'dump', ) 56 | opts,args = optz.get() 57 | 58 | def tx( w): 59 | w = p2b.get(w,w) 60 | if opts.lat2cyr: 61 | from util import lat2cyr 62 | w = lat2cyr.zvuchene.lat2cyr(w) 63 | return w 64 | 65 | 66 | class reader: 67 | def __init__( me): 68 | me.pending = None 69 | for a in sys.stdin: 70 | a = a.rstrip() 71 | me.doer( a) 72 | 73 | 74 | class grammar( reader): 75 | def doer( me, a): 76 | l = re.split( "('\w+')", a) 77 | if len(l) == 1: r = l 78 | else: 79 | #print( '#',l) 80 | r = [] 81 | for i,w in enumerate( l): 82 | if w and i%2 ==1: 83 | wt = p2b[ w[1:-1] ] 84 | w = "( %(w)s | '%(wt)s' )" % locals() 85 | r.append(w) 86 | print( ''.join( r )) 87 | 88 | class vim( reader): 89 | funchdr = 'syn match pythonFunction' 90 | @classmethod 91 | def vimfuncpat( me, t): 92 | # \ "\%(\%(def\s\|class\s\|@\)\s*\)\@<=\h\%(\w\|\.\)*" contained 93 | 94 | def replfunc( m): 95 | e = m.group(0) 96 | r = [] 97 | for w in m.group(1).split('\\|'): 98 | r.append( w) 99 | i = w.split( '\\s', 1)[0] 100 | w = tx(i) 101 | if w != i: r.append( w+'\\s') 102 | return e[ :m.start(1)-m.start(0)] + '\\|'.join( r) + e[ m.end(1)-m.start(0): ] 103 | 104 | t = t.strip() 105 | if t.startswith( '\\ '): t = t[2:] 106 | t = t.replace( '\\w', '['+extra_charset+ 'A-Za-z_0-9]') 107 | t = t.replace( '\\h', '['+extra_charset+ 'A-Za-z_]') 108 | alt = re.sub( r'\((\w.*?)\\\)', replfunc, t) 109 | #l = re.split( '(\\.|\W)', t) 110 | print( 'syn clear pythonFunction') #else duplicates 111 | print( me.funchdr, alt) 112 | 113 | def doer( me, a): 114 | if a.strip() and me.pending: 115 | me.pending( a) 116 | me.pending = None 117 | return 118 | 119 | l = re.split( '[\s,]+', a) 120 | if l[:2] == 'syn keyword'.split() and l[2] in ''' 121 | pythonStatement 122 | pythonStatement 123 | pythonStatement 124 | pythonStatement 125 | pythonConditional 126 | pythonRepeat 127 | pythonOperator 128 | pythonException 129 | pythonInclude 130 | '''.split(): 131 | 132 | r = [tx(w) for w in l[3:]] 133 | print( ' '.join( l[:3] + r )) 134 | return 135 | 136 | if l[:3] == me.funchdr.split(): 137 | r = l[3:] 138 | if r: me.vimfuncpat( a.split( 'pythonFunction')[-1] ) 139 | else: me.pending = me.vimfuncpat 140 | 141 | class plain( reader): 142 | def doer( me, a): 143 | l = re.split( '(\W+)', a) 144 | r = [tx(w) for w in l] 145 | print( ''.join( r )) 146 | 147 | if opts.dump: 148 | print( '\n'.join( k+':\t '+v for k,v in sorted( p2b.items()))) 149 | else: 150 | if opts.grammar: doer = grammar 151 | elif opts.vim: doer = vim 152 | else: doer = plain 153 | doer() 154 | 155 | #for x in 'Нищо Да Не'.split(): print( ', '.join( str(ord(y)) for y in x), '//', x) 156 | 157 | methods = dict( 158 | builtin = ''' 159 | abs абс 160 | all всички 161 | any коедае 162 | ascii текстпрост 163 | bin двоично 164 | callable изпълним 165 | chr символно 166 | compile компилирай 167 | delattr изтрийатр 168 | dir съдържание 169 | divmod остмод 170 | eval изчисли 171 | exec изпълни 172 | format оформи 173 | getattr дайатр 174 | globals глобални 175 | hasattr имаатр 176 | hash хеш 177 | hex ш16чно 178 | id ид 179 | input въведи 180 | isinstance еекземпляр 181 | issubclass еподклас 182 | iter итер 183 | len бр 184 | locals локални 185 | max макс 186 | min мин 187 | next следващ 188 | oct осмично 189 | ord поредно 190 | pow степен 191 | print печат печатай 192 | repr текстпълен 193 | round закръгли 194 | setattr сложиатр 195 | sorted подредени 196 | sum сума 197 | vars променливи 198 | ''', 199 | 200 | builtin_types = translation_constants + ''' 201 | Ellipsis Многоточие 202 | NotImplemented НеНаправено 203 | bool булево 204 | memoryview памет 205 | bytearray байтовмасив 206 | bytes байтове 207 | classmethod класовметод 208 | complex комплексно 209 | dict речник 210 | enumerate изброени 211 | filter филтър 212 | float плаващочисло 213 | frozenset множество 214 | property свойство 215 | int цялочисло 216 | list списък 217 | map съответствие 218 | object обект 219 | range поредни 220 | reversed наобратно обърнати преобърнати 221 | set множество множ 222 | slice резен 223 | staticmethod статиченметод 224 | str текст низ стр стринг 225 | super свръх 226 | tuple двойка 227 | type тип 228 | zip цип зип 229 | ''', 230 | 231 | dict= ''' 232 | get дай 233 | setdefault сложиаконяма 234 | pop извади изкарай 235 | popitem извадидвойка 236 | keys ключове 237 | items двойки 238 | values стойности 239 | update обнови 240 | fromkeys сключове 241 | clear изчисти 242 | copy копирай 243 | ''', 244 | 245 | list= ''' 246 | append допълни 247 | insert вмъкни 248 | extend разшири удължи 249 | pop извади изкарай 250 | remove изтрий 251 | index намери 252 | count брой 253 | reverse обърниреда 254 | sort подреди 255 | ''', 256 | 257 | set= ''' 258 | add добави 259 | clear изчисти 260 | copy копирай 261 | discard махни 262 | difference разлика 263 | difference_update обнови_като_разлика 264 | intersection сечение 265 | intersection_update обнови_като_сечение 266 | isdisjoint непресича 267 | issubset еподмножество 268 | issuperset енадмножество 269 | pop извади изкарай 270 | remove изтрий 271 | symmetric_difference двупосочна_разлика двойна_разлика 272 | symmetric_difference_update обнови_като_двупосочна_разлика обнови_като_двойна_разлика 273 | union обединение 274 | update обнови 275 | ''', 276 | str= ''' 277 | encode кодирай 278 | replace замести 279 | split раздели 280 | rsplit драздели раздели_отзад раздели_открая 281 | join слепи сглоби 282 | capitalize заглавие 283 | title заглавнидуми 284 | center центрирай всредатаподравни подравни_среда 285 | count брой 286 | expandtabs разпънитаб 287 | find търси 288 | rfind дтърси търси_отзад търси–открая 289 | index намери 290 | rindex днамери намери_отзад намери_открая 291 | partition разцепи 292 | rpartition дразцепи разцепи_отзад разцепи_открая 293 | ljust лподравни лявоподравни подравни_ляво 294 | rjust дподравни дясноподравни подравни_дясно 295 | strip почисти 296 | lstrip лпочисти почисти_отпред почисти_отначало 297 | rstrip дпочисти почисти_отзад почисти_открая 298 | splitlines разделиредове 299 | translate преведи 300 | swapcase размени_големималки размени_големималкибукви 301 | lower малки малкибукви 302 | upper големи големибукви 303 | startswith започвас 304 | endswith свършвас 305 | islower емалка емалкабуква 306 | isupper еголяма еголямабуква 307 | istitle езаглавнидуми 308 | isspace eмясто еразредка еинтервал 309 | isdecimal едесетично 310 | isdigit ецифра 311 | isnumeric ечисло 312 | isalpha ебуква 313 | isalnum ебуквочисло 314 | isidentifier еидентификатор еиме 315 | isprintable епечатаемо етекст 316 | zfill запълни0 запълнинули 317 | format формат оформи 318 | format_map формат_речник оформи_речник 319 | maketrans направи_превод 320 | ''', 321 | ) 322 | # vim:ts=4:sw=4:expandtab 323 | -------------------------------------------------------------------------------- /example-results/python3.2-3.2-ubuntu1104/Python/ast.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file includes functions to transform a concrete syntax tree (CST) to 3 | * an abstract syntax tree (AST). The main function is PyAST_FromNode(). 4 | * 5 | */ 6 | #include "Python.h" 7 | #include "Python-ast.h" 8 | #include "grammar.h" 9 | #include "node.h" 10 | #include "ast.h" 11 | #include "token.h" 12 | #include "parsetok.h" 13 | #include "graminit.h" 14 | 15 | #include 16 | 17 | static const char bg_else[] = "иначе"; 18 | static const char bg_elif[] = "инако"; 19 | static const char bg_in[] = "във"; 20 | static const char bg_and[] = "и"; 21 | static const char bg_is[] = "е"; 22 | static const char bg_finally[] = "винаги"; 23 | #define streq( x, name) ( !strcmp( x, #name) || !strcmp( x, bg_##name) ) 24 | 25 | 26 | /* Data structure used internally */ 27 | struct compiling { 28 | char *c_encoding; /* source encoding */ 29 | PyArena *c_arena; /* arena for allocating memeory */ 30 | const char *c_filename; /* filename */ 31 | }; 32 | 33 | static asdl_seq *seq_for_testlist(struct compiling *, const node *); 34 | static expr_ty ast_for_expr(struct compiling *, const node *); 35 | static stmt_ty ast_for_stmt(struct compiling *, const node *); 36 | static asdl_seq *ast_for_suite(struct compiling *, const node *); 37 | static asdl_seq *ast_for_exprlist(struct compiling *, const node *, 38 | expr_context_ty); 39 | static expr_ty ast_for_testlist(struct compiling *, const node *); 40 | static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *); 41 | 42 | /* Note different signature for ast_for_call */ 43 | static expr_ty ast_for_call(struct compiling *, const node *, expr_ty); 44 | 45 | static PyObject *parsenumber(struct compiling *, const char *); 46 | static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode); 47 | static PyObject *parsestrplus(struct compiling *, const node *n, 48 | int *bytesmode); 49 | 50 | #ifndef LINENO 51 | #define LINENO(n) ((n)->n_lineno) 52 | #endif 53 | 54 | #define COMP_GENEXP 0 55 | #define COMP_LISTCOMP 1 56 | #define COMP_SETCOMP 2 57 | 58 | static identifier 59 | new_identifier(const char* n, PyArena *arena) 60 | { 61 | PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL); 62 | Py_UNICODE *u; 63 | if (!id) 64 | return NULL; 65 | u = PyUnicode_AS_UNICODE(id); 66 | /* Check whether there are non-ASCII characters in the 67 | identifier; if so, normalize to NFKC. */ 68 | for (; *u; u++) { 69 | if (*u >= 128) { 70 | PyObject *m = PyImport_ImportModuleNoBlock("unicodedata"); 71 | PyObject *id2; 72 | if (!m) 73 | return NULL; 74 | id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id); 75 | Py_DECREF(m); 76 | if (!id2) 77 | return NULL; 78 | Py_DECREF(id); 79 | id = id2; 80 | break; 81 | } 82 | } 83 | PyUnicode_InternInPlace(&id); 84 | PyArena_AddPyObject(arena, id); 85 | return id; 86 | } 87 | 88 | #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena) 89 | 90 | /* This routine provides an invalid object for the syntax error. 91 | The outermost routine must unpack this error and create the 92 | proper object. We do this so that we don't have to pass 93 | the filename to everything function. 94 | 95 | XXX Maybe we should just pass the filename... 96 | */ 97 | 98 | static int 99 | ast_error(const node *n, const char *errstr) 100 | { 101 | PyObject *u = Py_BuildValue("zii", errstr, LINENO(n), n->n_col_offset); 102 | if (!u) 103 | return 0; 104 | PyErr_SetObject(PyExc_SyntaxError, u); 105 | Py_DECREF(u); 106 | return 0; 107 | } 108 | 109 | static void 110 | ast_error_finish(const char *filename) 111 | { 112 | PyObject *type, *value, *tback, *errstr, *offset, *loc, *tmp; 113 | PyObject *filename_obj; 114 | long lineno; 115 | 116 | assert(PyErr_Occurred()); 117 | if (!PyErr_ExceptionMatches(PyExc_SyntaxError)) 118 | return; 119 | 120 | PyErr_Fetch(&type, &value, &tback); 121 | errstr = PyTuple_GetItem(value, 0); 122 | if (!errstr) 123 | return; 124 | Py_INCREF(errstr); 125 | lineno = PyLong_AsLong(PyTuple_GetItem(value, 1)); 126 | if (lineno == -1) { 127 | Py_DECREF(errstr); 128 | return; 129 | } 130 | offset = PyTuple_GetItem(value, 2); 131 | if (!offset) { 132 | Py_DECREF(errstr); 133 | return; 134 | } 135 | Py_DECREF(value); 136 | 137 | loc = PyErr_ProgramText(filename, lineno); 138 | if (!loc) { 139 | Py_INCREF(Py_None); 140 | loc = Py_None; 141 | } 142 | if (filename != NULL) 143 | filename_obj = PyUnicode_DecodeFSDefault(filename); 144 | else { 145 | Py_INCREF(Py_None); 146 | filename_obj = Py_None; 147 | } 148 | if (filename_obj != NULL) 149 | tmp = Py_BuildValue("(NlOO)", filename_obj, lineno, offset, loc); 150 | else 151 | tmp = NULL; 152 | Py_DECREF(loc); 153 | if (!tmp) { 154 | Py_DECREF(errstr); 155 | return; 156 | } 157 | value = PyTuple_Pack(2, errstr, tmp); 158 | Py_DECREF(errstr); 159 | Py_DECREF(tmp); 160 | if (!value) 161 | return; 162 | PyErr_Restore(type, value, tback); 163 | } 164 | 165 | /* num_stmts() returns number of contained statements. 166 | 167 | Use this routine to determine how big a sequence is needed for 168 | the statements in a parse tree. Its raison d'etre is this bit of 169 | grammar: 170 | 171 | stmt: simple_stmt | compound_stmt 172 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 173 | 174 | A simple_stmt can contain multiple small_stmt elements joined 175 | by semicolons. If the arg is a simple_stmt, the number of 176 | small_stmt elements is returned. 177 | */ 178 | 179 | static int 180 | num_stmts(const node *n) 181 | { 182 | int i, l; 183 | node *ch; 184 | 185 | switch (TYPE(n)) { 186 | case single_input: 187 | if (TYPE(CHILD(n, 0)) == NEWLINE) 188 | return 0; 189 | else 190 | return num_stmts(CHILD(n, 0)); 191 | case file_input: 192 | l = 0; 193 | for (i = 0; i < NCH(n); i++) { 194 | ch = CHILD(n, i); 195 | if (TYPE(ch) == stmt) 196 | l += num_stmts(ch); 197 | } 198 | return l; 199 | case stmt: 200 | return num_stmts(CHILD(n, 0)); 201 | case compound_stmt: 202 | return 1; 203 | case simple_stmt: 204 | return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */ 205 | case suite: 206 | if (NCH(n) == 1) 207 | return num_stmts(CHILD(n, 0)); 208 | else { 209 | l = 0; 210 | for (i = 2; i < (NCH(n) - 1); i++) 211 | l += num_stmts(CHILD(n, i)); 212 | return l; 213 | } 214 | default: { 215 | char buf[128]; 216 | 217 | sprintf(buf, "Non-statement found: %d %d", 218 | TYPE(n), NCH(n)); 219 | Py_FatalError(buf); 220 | } 221 | } 222 | assert(0); 223 | return 0; 224 | } 225 | 226 | /* Transform the CST rooted at node * to the appropriate AST 227 | */ 228 | 229 | mod_ty 230 | PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename, 231 | PyArena *arena) 232 | { 233 | int i, j, k, num; 234 | asdl_seq *stmts = NULL; 235 | stmt_ty s; 236 | node *ch; 237 | struct compiling c; 238 | 239 | if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) { 240 | c.c_encoding = "utf-8"; 241 | if (TYPE(n) == encoding_decl) { 242 | #if 0 243 | ast_error(n, "encoding declaration in Unicode string"); 244 | goto error; 245 | #endif 246 | n = CHILD(n, 0); 247 | } 248 | } else if (TYPE(n) == encoding_decl) { 249 | c.c_encoding = STR(n); 250 | n = CHILD(n, 0); 251 | } else { 252 | /* PEP 3120 */ 253 | c.c_encoding = "utf-8"; 254 | } 255 | c.c_arena = arena; 256 | c.c_filename = filename; 257 | 258 | k = 0; 259 | switch (TYPE(n)) { 260 | case file_input: 261 | stmts = asdl_seq_new(num_stmts(n), arena); 262 | if (!stmts) 263 | return NULL; 264 | for (i = 0; i < NCH(n) - 1; i++) { 265 | ch = CHILD(n, i); 266 | if (TYPE(ch) == NEWLINE) 267 | continue; 268 | REQ(ch, stmt); 269 | num = num_stmts(ch); 270 | if (num == 1) { 271 | s = ast_for_stmt(&c, ch); 272 | if (!s) 273 | goto error; 274 | asdl_seq_SET(stmts, k++, s); 275 | } 276 | else { 277 | ch = CHILD(ch, 0); 278 | REQ(ch, simple_stmt); 279 | for (j = 0; j < num; j++) { 280 | s = ast_for_stmt(&c, CHILD(ch, j * 2)); 281 | if (!s) 282 | goto error; 283 | asdl_seq_SET(stmts, k++, s); 284 | } 285 | } 286 | } 287 | return Module(stmts, arena); 288 | case eval_input: { 289 | expr_ty testlist_ast; 290 | 291 | /* XXX Why not comp_for here? */ 292 | testlist_ast = ast_for_testlist(&c, CHILD(n, 0)); 293 | if (!testlist_ast) 294 | goto error; 295 | return Expression(testlist_ast, arena); 296 | } 297 | case single_input: 298 | if (TYPE(CHILD(n, 0)) == NEWLINE) { 299 | stmts = asdl_seq_new(1, arena); 300 | if (!stmts) 301 | goto error; 302 | asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset, 303 | arena)); 304 | if (!asdl_seq_GET(stmts, 0)) 305 | goto error; 306 | return Interactive(stmts, arena); 307 | } 308 | else { 309 | n = CHILD(n, 0); 310 | num = num_stmts(n); 311 | stmts = asdl_seq_new(num, arena); 312 | if (!stmts) 313 | goto error; 314 | if (num == 1) { 315 | s = ast_for_stmt(&c, n); 316 | if (!s) 317 | goto error; 318 | asdl_seq_SET(stmts, 0, s); 319 | } 320 | else { 321 | /* Only a simple_stmt can contain multiple statements. */ 322 | REQ(n, simple_stmt); 323 | for (i = 0; i < NCH(n); i += 2) { 324 | if (TYPE(CHILD(n, i)) == NEWLINE) 325 | break; 326 | s = ast_for_stmt(&c, CHILD(n, i)); 327 | if (!s) 328 | goto error; 329 | asdl_seq_SET(stmts, i / 2, s); 330 | } 331 | } 332 | 333 | return Interactive(stmts, arena); 334 | } 335 | default: 336 | PyErr_Format(PyExc_SystemError, 337 | "invalid node %d for PyAST_FromNode", TYPE(n)); 338 | goto error; 339 | } 340 | error: 341 | ast_error_finish(filename); 342 | return NULL; 343 | } 344 | 345 | /* Return the AST repr. of the operator represented as syntax (|, ^, etc.) 346 | */ 347 | 348 | static operator_ty 349 | get_operator(const node *n) 350 | { 351 | switch (TYPE(n)) { 352 | case VBAR: 353 | return BitOr; 354 | case CIRCUMFLEX: 355 | return BitXor; 356 | case AMPER: 357 | return BitAnd; 358 | case LEFTSHIFT: 359 | return LShift; 360 | case RIGHTSHIFT: 361 | return RShift; 362 | case PLUS: 363 | return Add; 364 | case MINUS: 365 | return Sub; 366 | case STAR: 367 | return Mult; 368 | case SLASH: 369 | return Div; 370 | case DOUBLESLASH: 371 | return FloorDiv; 372 | case PERCENT: 373 | return Mod; 374 | default: 375 | return (operator_ty)0; 376 | } 377 | } 378 | 379 | static const char* FORBIDDEN[] = { 380 | "None", 381 | "True", 382 | "False", 383 | NULL, 384 | }; 385 | 386 | static const Py_UNICODE ubgNone[] = { 1053, 1080, 1097, 1086, 0}; 387 | static const Py_UNICODE ubgTrue[] = { 1044, 1072, 0}; 388 | static const Py_UNICODE ubgFalse[]= { 1053, 1077, 0}; 389 | 390 | static const Py_UNICODE * uFORBIDDEN[] = { 391 | ubgNone, 392 | ubgTrue, 393 | ubgFalse, 394 | NULL 395 | }; 396 | 397 | int 398 | PyUnicode_CompareWithUnicode( PyObject* uni, const Py_UNICODE * str) 399 | { /* same as PyUnicode_CompareWithASCIIString */ 400 | int i; 401 | Py_UNICODE *id; 402 | assert(PyUnicode_Check(uni)); 403 | id = PyUnicode_AS_UNICODE(uni); 404 | for (i = 0; id[i] && str[i]; i++) 405 | if (id[i] != str[i]) 406 | return (id[i] < str[i]) ? -1 : 1; 407 | if (PyUnicode_GET_SIZE(uni) != i || id[i]) 408 | return 1; /* uni is longer */ 409 | if (str[i]) 410 | return -1; /* str is longer */ 411 | return 0; 412 | } 413 | 414 | static int 415 | forbidden_name(identifier name, const node *n, int full_checks) 416 | { 417 | assert(PyUnicode_Check(name)); 418 | if (PyUnicode_CompareWithASCIIString(name, "__debug__") == 0) { 419 | ast_error(n, "assignment to keyword"); 420 | return 1; 421 | } 422 | if (full_checks) { 423 | const char **p; 424 | for (p = FORBIDDEN; *p; p++) { 425 | if (PyUnicode_CompareWithASCIIString(name, *p) == 0) { 426 | ast_error(n, "assignment to keyword"); 427 | return 1; 428 | } 429 | } 430 | const Py_UNICODE **u; 431 | for (u = uFORBIDDEN; *u; u++) { 432 | if (PyUnicode_CompareWithUnicode( name, *u) ==0) { 433 | ast_error(n, "assignment to keyword"); 434 | return 1; 435 | } 436 | } 437 | } 438 | return 0; 439 | } 440 | 441 | /* Set the context ctx for expr_ty e, recursively traversing e. 442 | 443 | Only sets context for expr kinds that "can appear in assignment context" 444 | (according to ../Parser/Python.asdl). For other expr kinds, it sets 445 | an appropriate syntax error and returns false. 446 | */ 447 | 448 | static int 449 | set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n) 450 | { 451 | asdl_seq *s = NULL; 452 | /* If a particular expression type can't be used for assign / delete, 453 | set expr_name to its name and an error message will be generated. 454 | */ 455 | const char* expr_name = NULL; 456 | 457 | /* The ast defines augmented store and load contexts, but the 458 | implementation here doesn't actually use them. The code may be 459 | a little more complex than necessary as a result. It also means 460 | that expressions in an augmented assignment have a Store context. 461 | Consider restructuring so that augmented assignment uses 462 | set_context(), too. 463 | */ 464 | assert(ctx != AugStore && ctx != AugLoad); 465 | 466 | switch (e->kind) { 467 | case Attribute_kind: 468 | e->v.Attribute.ctx = ctx; 469 | if (ctx == Store && forbidden_name(e->v.Attribute.attr, n, 1)) 470 | return 0; 471 | break; 472 | case Subscript_kind: 473 | e->v.Subscript.ctx = ctx; 474 | break; 475 | case Starred_kind: 476 | e->v.Starred.ctx = ctx; 477 | if (!set_context(c, e->v.Starred.value, ctx, n)) 478 | return 0; 479 | break; 480 | case Name_kind: 481 | if (ctx == Store) { 482 | if (forbidden_name(e->v.Name.id, n, 1)) 483 | return 0; /* forbidden_name() calls ast_error() */ 484 | } 485 | e->v.Name.ctx = ctx; 486 | break; 487 | case List_kind: 488 | e->v.List.ctx = ctx; 489 | s = e->v.List.elts; 490 | break; 491 | case Tuple_kind: 492 | if (asdl_seq_LEN(e->v.Tuple.elts)) { 493 | e->v.Tuple.ctx = ctx; 494 | s = e->v.Tuple.elts; 495 | } 496 | else { 497 | expr_name = "()"; 498 | } 499 | break; 500 | case Lambda_kind: 501 | expr_name = "lambda"; 502 | break; 503 | case Call_kind: 504 | expr_name = "function call"; 505 | break; 506 | case BoolOp_kind: 507 | case BinOp_kind: 508 | case UnaryOp_kind: 509 | expr_name = "operator"; 510 | break; 511 | case GeneratorExp_kind: 512 | expr_name = "generator expression"; 513 | break; 514 | case Yield_kind: 515 | expr_name = "yield expression"; 516 | break; 517 | case ListComp_kind: 518 | expr_name = "list comprehension"; 519 | break; 520 | case SetComp_kind: 521 | expr_name = "set comprehension"; 522 | break; 523 | case DictComp_kind: 524 | expr_name = "dict comprehension"; 525 | break; 526 | case Dict_kind: 527 | case Set_kind: 528 | case Num_kind: 529 | case Str_kind: 530 | expr_name = "literal"; 531 | break; 532 | case Ellipsis_kind: 533 | expr_name = "Ellipsis"; 534 | break; 535 | case Compare_kind: 536 | expr_name = "comparison"; 537 | break; 538 | case IfExp_kind: 539 | expr_name = "conditional expression"; 540 | break; 541 | default: 542 | PyErr_Format(PyExc_SystemError, 543 | "unexpected expression in assignment %d (line %d)", 544 | e->kind, e->lineno); 545 | return 0; 546 | } 547 | /* Check for error string set by switch */ 548 | if (expr_name) { 549 | char buf[300]; 550 | PyOS_snprintf(buf, sizeof(buf), 551 | "can't %s %s", 552 | ctx == Store ? "assign to" : "delete", 553 | expr_name); 554 | return ast_error(n, buf); 555 | } 556 | 557 | /* If the LHS is a list or tuple, we need to set the assignment 558 | context for all the contained elements. 559 | */ 560 | if (s) { 561 | int i; 562 | 563 | for (i = 0; i < asdl_seq_LEN(s); i++) { 564 | if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n)) 565 | return 0; 566 | } 567 | } 568 | return 1; 569 | } 570 | 571 | static operator_ty 572 | ast_for_augassign(struct compiling *c, const node *n) 573 | { 574 | REQ(n, augassign); 575 | n = CHILD(n, 0); 576 | switch (STR(n)[0]) { 577 | case '+': 578 | return Add; 579 | case '-': 580 | return Sub; 581 | case '/': 582 | if (STR(n)[1] == '/') 583 | return FloorDiv; 584 | else 585 | return Div; 586 | case '%': 587 | return Mod; 588 | case '<': 589 | return LShift; 590 | case '>': 591 | return RShift; 592 | case '&': 593 | return BitAnd; 594 | case '^': 595 | return BitXor; 596 | case '|': 597 | return BitOr; 598 | case '*': 599 | if (STR(n)[1] == '*') 600 | return Pow; 601 | else 602 | return Mult; 603 | default: 604 | PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n)); 605 | return (operator_ty)0; 606 | } 607 | } 608 | 609 | static cmpop_ty 610 | ast_for_comp_op(struct compiling *c, const node *n) 611 | { 612 | /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is' 613 | |'is' 'not' 614 | */ 615 | REQ(n, comp_op); 616 | if (NCH(n) == 1) { 617 | n = CHILD(n, 0); 618 | switch (TYPE(n)) { 619 | case LESS: 620 | return Lt; 621 | case GREATER: 622 | return Gt; 623 | case EQEQUAL: /* == */ 624 | return Eq; 625 | case LESSEQUAL: 626 | return LtE; 627 | case GREATEREQUAL: 628 | return GtE; 629 | case NOTEQUAL: 630 | return NotEq; 631 | case NAME: 632 | if (streq(STR(n), in)) 633 | return In; 634 | if (streq(STR(n), is)) 635 | return Is; 636 | default: 637 | PyErr_Format(PyExc_SystemError, "invalid comp_op: %s", 638 | STR(n)); 639 | return (cmpop_ty)0; 640 | } 641 | } 642 | else if (NCH(n) == 2) { 643 | /* handle "not in" and "is not" */ 644 | switch (TYPE(CHILD(n, 0))) { 645 | case NAME: 646 | if (streq(STR(CHILD(n, 1)), in)) 647 | return NotIn; 648 | if (streq(STR(CHILD(n, 0)), is)) 649 | return IsNot; 650 | default: 651 | PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s", 652 | STR(CHILD(n, 0)), STR(CHILD(n, 1))); 653 | return (cmpop_ty)0; 654 | } 655 | } 656 | PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children", 657 | NCH(n)); 658 | return (cmpop_ty)0; 659 | } 660 | 661 | static asdl_seq * 662 | seq_for_testlist(struct compiling *c, const node *n) 663 | { 664 | /* testlist: test (',' test)* [','] 665 | testlist_star_expr: test|star_expr (',' test|star_expr)* [','] 666 | */ 667 | asdl_seq *seq; 668 | expr_ty expression; 669 | int i; 670 | assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp); 671 | 672 | seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 673 | if (!seq) 674 | return NULL; 675 | 676 | for (i = 0; i < NCH(n); i += 2) { 677 | const node *ch = CHILD(n, i); 678 | assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr); 679 | 680 | expression = ast_for_expr(c, ch); 681 | if (!expression) 682 | return NULL; 683 | 684 | assert(i / 2 < seq->size); 685 | asdl_seq_SET(seq, i / 2, expression); 686 | } 687 | return seq; 688 | } 689 | 690 | static arg_ty 691 | compiler_arg(struct compiling *c, const node *n) 692 | { 693 | identifier name; 694 | expr_ty annotation = NULL; 695 | node *ch; 696 | 697 | assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef); 698 | ch = CHILD(n, 0); 699 | name = NEW_IDENTIFIER(ch); 700 | if (!name) 701 | return NULL; 702 | if (forbidden_name(name, ch, 0)) 703 | return NULL; 704 | 705 | if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) { 706 | annotation = ast_for_expr(c, CHILD(n, 2)); 707 | if (!annotation) 708 | return NULL; 709 | } 710 | 711 | return arg(name, annotation, c->c_arena); 712 | #if 0 713 | result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena); 714 | if (!set_context(c, result, Store, n)) 715 | return NULL; 716 | return result; 717 | #endif 718 | } 719 | 720 | /* returns -1 if failed to handle keyword only arguments 721 | returns new position to keep processing if successful 722 | (',' tfpdef ['=' test])* 723 | ^^^ 724 | start pointing here 725 | */ 726 | static int 727 | handle_keywordonly_args(struct compiling *c, const node *n, int start, 728 | asdl_seq *kwonlyargs, asdl_seq *kwdefaults) 729 | { 730 | PyObject *argname; 731 | node *ch; 732 | expr_ty expression, annotation; 733 | arg_ty arg; 734 | int i = start; 735 | int j = 0; /* index for kwdefaults and kwonlyargs */ 736 | 737 | if (kwonlyargs == NULL) { 738 | ast_error(CHILD(n, start), "named arguments must follow bare *"); 739 | return -1; 740 | } 741 | assert(kwdefaults != NULL); 742 | while (i < NCH(n)) { 743 | ch = CHILD(n, i); 744 | switch (TYPE(ch)) { 745 | case vfpdef: 746 | case tfpdef: 747 | if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { 748 | expression = ast_for_expr(c, CHILD(n, i + 2)); 749 | if (!expression) 750 | goto error; 751 | asdl_seq_SET(kwdefaults, j, expression); 752 | i += 2; /* '=' and test */ 753 | } 754 | else { /* setting NULL if no default value exists */ 755 | asdl_seq_SET(kwdefaults, j, NULL); 756 | } 757 | if (NCH(ch) == 3) { 758 | /* ch is NAME ':' test */ 759 | annotation = ast_for_expr(c, CHILD(ch, 2)); 760 | if (!annotation) 761 | goto error; 762 | } 763 | else { 764 | annotation = NULL; 765 | } 766 | ch = CHILD(ch, 0); 767 | argname = NEW_IDENTIFIER(ch); 768 | if (!argname) 769 | goto error; 770 | if (forbidden_name(argname, ch, 0)) 771 | goto error; 772 | arg = arg(argname, annotation, c->c_arena); 773 | if (!arg) 774 | goto error; 775 | asdl_seq_SET(kwonlyargs, j++, arg); 776 | i += 2; /* the name and the comma */ 777 | break; 778 | case DOUBLESTAR: 779 | return i; 780 | default: 781 | ast_error(ch, "unexpected node"); 782 | goto error; 783 | } 784 | } 785 | return i; 786 | error: 787 | return -1; 788 | } 789 | 790 | /* Create AST for argument list. */ 791 | 792 | static arguments_ty 793 | ast_for_arguments(struct compiling *c, const node *n) 794 | { 795 | /* This function handles both typedargslist (function definition) 796 | and varargslist (lambda definition). 797 | 798 | parameters: '(' [typedargslist] ')' 799 | typedargslist: ((tfpdef ['=' test] ',')* 800 | ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] 801 | | '**' tfpdef) 802 | | tfpdef ['=' test] (',' tfpdef ['=' test])* [',']) 803 | tfpdef: NAME [':' test] 804 | varargslist: ((vfpdef ['=' test] ',')* 805 | ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] 806 | | '**' vfpdef) 807 | | vfpdef ['=' test] (',' vfpdef ['=' test])* [',']) 808 | vfpdef: NAME 809 | */ 810 | int i, j, k, nposargs = 0, nkwonlyargs = 0; 811 | int nposdefaults = 0, found_default = 0; 812 | asdl_seq *posargs, *posdefaults, *kwonlyargs, *kwdefaults; 813 | identifier vararg = NULL, kwarg = NULL; 814 | arg_ty arg; 815 | expr_ty varargannotation = NULL, kwargannotation = NULL; 816 | node *ch; 817 | 818 | if (TYPE(n) == parameters) { 819 | if (NCH(n) == 2) /* () as argument list */ 820 | return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, 821 | NULL, c->c_arena); 822 | n = CHILD(n, 1); 823 | } 824 | assert(TYPE(n) == typedargslist || TYPE(n) == varargslist); 825 | 826 | /* First count the number of positional args & defaults. The 827 | variable i is the loop index for this for loop and the next. 828 | The next loop picks up where the first leaves off. 829 | */ 830 | for (i = 0; i < NCH(n); i++) { 831 | ch = CHILD(n, i); 832 | if (TYPE(ch) == STAR) { 833 | /* skip star */ 834 | i++; 835 | if (i < NCH(n) && /* skip argument following star */ 836 | (TYPE(CHILD(n, i)) == tfpdef || 837 | TYPE(CHILD(n, i)) == vfpdef)) { 838 | i++; 839 | } 840 | break; 841 | } 842 | if (TYPE(ch) == DOUBLESTAR) break; 843 | if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++; 844 | if (TYPE(ch) == EQUAL) nposdefaults++; 845 | } 846 | /* count the number of keyword only args & 847 | defaults for keyword only args */ 848 | for ( ; i < NCH(n); ++i) { 849 | ch = CHILD(n, i); 850 | if (TYPE(ch) == DOUBLESTAR) break; 851 | if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++; 852 | } 853 | posargs = (nposargs ? asdl_seq_new(nposargs, c->c_arena) : NULL); 854 | if (!posargs && nposargs) 855 | return NULL; 856 | kwonlyargs = (nkwonlyargs ? 857 | asdl_seq_new(nkwonlyargs, c->c_arena) : NULL); 858 | if (!kwonlyargs && nkwonlyargs) 859 | return NULL; 860 | posdefaults = (nposdefaults ? 861 | asdl_seq_new(nposdefaults, c->c_arena) : NULL); 862 | if (!posdefaults && nposdefaults) 863 | return NULL; 864 | /* The length of kwonlyargs and kwdefaults are same 865 | since we set NULL as default for keyword only argument w/o default 866 | - we have sequence data structure, but no dictionary */ 867 | kwdefaults = (nkwonlyargs ? 868 | asdl_seq_new(nkwonlyargs, c->c_arena) : NULL); 869 | if (!kwdefaults && nkwonlyargs) 870 | return NULL; 871 | 872 | if (nposargs + nkwonlyargs > 255) { 873 | ast_error(n, "more than 255 arguments"); 874 | return NULL; 875 | } 876 | 877 | /* tfpdef: NAME [':' test] 878 | vfpdef: NAME 879 | */ 880 | i = 0; 881 | j = 0; /* index for defaults */ 882 | k = 0; /* index for args */ 883 | while (i < NCH(n)) { 884 | ch = CHILD(n, i); 885 | switch (TYPE(ch)) { 886 | case tfpdef: 887 | case vfpdef: 888 | /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is 889 | anything other than EQUAL or a comma? */ 890 | /* XXX Should NCH(n) check be made a separate check? */ 891 | if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) { 892 | expr_ty expression = ast_for_expr(c, CHILD(n, i + 2)); 893 | if (!expression) 894 | return NULL; 895 | assert(posdefaults != NULL); 896 | asdl_seq_SET(posdefaults, j++, expression); 897 | i += 2; 898 | found_default = 1; 899 | } 900 | else if (found_default) { 901 | ast_error(n, 902 | "non-default argument follows default argument"); 903 | return NULL; 904 | } 905 | arg = compiler_arg(c, ch); 906 | if (!arg) 907 | return NULL; 908 | asdl_seq_SET(posargs, k++, arg); 909 | i += 2; /* the name and the comma */ 910 | break; 911 | case STAR: 912 | if (i+1 >= NCH(n)) { 913 | ast_error(CHILD(n, i), 914 | "named arguments must follow bare *"); 915 | return NULL; 916 | } 917 | ch = CHILD(n, i+1); /* tfpdef or COMMA */ 918 | if (TYPE(ch) == COMMA) { 919 | int res = 0; 920 | i += 2; /* now follows keyword only arguments */ 921 | res = handle_keywordonly_args(c, n, i, 922 | kwonlyargs, kwdefaults); 923 | if (res == -1) return NULL; 924 | i = res; /* res has new position to process */ 925 | } 926 | else { 927 | vararg = NEW_IDENTIFIER(CHILD(ch, 0)); 928 | if (!vararg) 929 | return NULL; 930 | if (forbidden_name(vararg, CHILD(ch, 0), 0)) 931 | return NULL; 932 | if (NCH(ch) > 1) { 933 | /* there is an annotation on the vararg */ 934 | varargannotation = ast_for_expr(c, CHILD(ch, 2)); 935 | if (!varargannotation) 936 | return NULL; 937 | } 938 | i += 3; 939 | if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef 940 | || TYPE(CHILD(n, i)) == vfpdef)) { 941 | int res = 0; 942 | res = handle_keywordonly_args(c, n, i, 943 | kwonlyargs, kwdefaults); 944 | if (res == -1) return NULL; 945 | i = res; /* res has new position to process */ 946 | } 947 | } 948 | break; 949 | case DOUBLESTAR: 950 | ch = CHILD(n, i+1); /* tfpdef */ 951 | assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef); 952 | kwarg = NEW_IDENTIFIER(CHILD(ch, 0)); 953 | if (!kwarg) 954 | return NULL; 955 | if (NCH(ch) > 1) { 956 | /* there is an annotation on the kwarg */ 957 | kwargannotation = ast_for_expr(c, CHILD(ch, 2)); 958 | if (!kwargannotation) 959 | return NULL; 960 | } 961 | if (forbidden_name(kwarg, CHILD(ch, 0), 0)) 962 | return NULL; 963 | i += 3; 964 | break; 965 | default: 966 | PyErr_Format(PyExc_SystemError, 967 | "unexpected node in varargslist: %d @ %d", 968 | TYPE(ch), i); 969 | return NULL; 970 | } 971 | } 972 | return arguments(posargs, vararg, varargannotation, kwonlyargs, kwarg, 973 | kwargannotation, posdefaults, kwdefaults, c->c_arena); 974 | } 975 | 976 | static expr_ty 977 | ast_for_dotted_name(struct compiling *c, const node *n) 978 | { 979 | expr_ty e; 980 | identifier id; 981 | int lineno, col_offset; 982 | int i; 983 | 984 | REQ(n, dotted_name); 985 | 986 | lineno = LINENO(n); 987 | col_offset = n->n_col_offset; 988 | 989 | id = NEW_IDENTIFIER(CHILD(n, 0)); 990 | if (!id) 991 | return NULL; 992 | e = Name(id, Load, lineno, col_offset, c->c_arena); 993 | if (!e) 994 | return NULL; 995 | 996 | for (i = 2; i < NCH(n); i+=2) { 997 | id = NEW_IDENTIFIER(CHILD(n, i)); 998 | if (!id) 999 | return NULL; 1000 | e = Attribute(e, id, Load, lineno, col_offset, c->c_arena); 1001 | if (!e) 1002 | return NULL; 1003 | } 1004 | 1005 | return e; 1006 | } 1007 | 1008 | static expr_ty 1009 | ast_for_decorator(struct compiling *c, const node *n) 1010 | { 1011 | /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */ 1012 | expr_ty d = NULL; 1013 | expr_ty name_expr; 1014 | 1015 | REQ(n, decorator); 1016 | REQ(CHILD(n, 0), AT); 1017 | REQ(RCHILD(n, -1), NEWLINE); 1018 | 1019 | name_expr = ast_for_dotted_name(c, CHILD(n, 1)); 1020 | if (!name_expr) 1021 | return NULL; 1022 | 1023 | if (NCH(n) == 3) { /* No arguments */ 1024 | d = name_expr; 1025 | name_expr = NULL; 1026 | } 1027 | else if (NCH(n) == 5) { /* Call with no arguments */ 1028 | d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n), 1029 | n->n_col_offset, c->c_arena); 1030 | if (!d) 1031 | return NULL; 1032 | name_expr = NULL; 1033 | } 1034 | else { 1035 | d = ast_for_call(c, CHILD(n, 3), name_expr); 1036 | if (!d) 1037 | return NULL; 1038 | name_expr = NULL; 1039 | } 1040 | 1041 | return d; 1042 | } 1043 | 1044 | static asdl_seq* 1045 | ast_for_decorators(struct compiling *c, const node *n) 1046 | { 1047 | asdl_seq* decorator_seq; 1048 | expr_ty d; 1049 | int i; 1050 | 1051 | REQ(n, decorators); 1052 | decorator_seq = asdl_seq_new(NCH(n), c->c_arena); 1053 | if (!decorator_seq) 1054 | return NULL; 1055 | 1056 | for (i = 0; i < NCH(n); i++) { 1057 | d = ast_for_decorator(c, CHILD(n, i)); 1058 | if (!d) 1059 | return NULL; 1060 | asdl_seq_SET(decorator_seq, i, d); 1061 | } 1062 | return decorator_seq; 1063 | } 1064 | 1065 | static stmt_ty 1066 | ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) 1067 | { 1068 | /* funcdef: 'def' NAME parameters ['->' test] ':' suite */ 1069 | identifier name; 1070 | arguments_ty args; 1071 | asdl_seq *body; 1072 | expr_ty returns = NULL; 1073 | int name_i = 1; 1074 | 1075 | REQ(n, funcdef); 1076 | 1077 | name = NEW_IDENTIFIER(CHILD(n, name_i)); 1078 | if (!name) 1079 | return NULL; 1080 | if (forbidden_name(name, CHILD(n, name_i), 0)) 1081 | return NULL; 1082 | args = ast_for_arguments(c, CHILD(n, name_i + 1)); 1083 | if (!args) 1084 | return NULL; 1085 | if (TYPE(CHILD(n, name_i+2)) == RARROW) { 1086 | returns = ast_for_expr(c, CHILD(n, name_i + 3)); 1087 | if (!returns) 1088 | return NULL; 1089 | name_i += 2; 1090 | } 1091 | body = ast_for_suite(c, CHILD(n, name_i + 3)); 1092 | if (!body) 1093 | return NULL; 1094 | 1095 | return FunctionDef(name, args, body, decorator_seq, returns, LINENO(n), 1096 | n->n_col_offset, c->c_arena); 1097 | } 1098 | 1099 | static stmt_ty 1100 | ast_for_decorated(struct compiling *c, const node *n) 1101 | { 1102 | /* decorated: decorators (classdef | funcdef) */ 1103 | stmt_ty thing = NULL; 1104 | asdl_seq *decorator_seq = NULL; 1105 | 1106 | REQ(n, decorated); 1107 | 1108 | decorator_seq = ast_for_decorators(c, CHILD(n, 0)); 1109 | if (!decorator_seq) 1110 | return NULL; 1111 | 1112 | assert(TYPE(CHILD(n, 1)) == funcdef || 1113 | TYPE(CHILD(n, 1)) == classdef); 1114 | 1115 | if (TYPE(CHILD(n, 1)) == funcdef) { 1116 | thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq); 1117 | } else if (TYPE(CHILD(n, 1)) == classdef) { 1118 | thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq); 1119 | } 1120 | /* we count the decorators in when talking about the class' or 1121 | * function's line number */ 1122 | if (thing) { 1123 | thing->lineno = LINENO(n); 1124 | thing->col_offset = n->n_col_offset; 1125 | } 1126 | return thing; 1127 | } 1128 | 1129 | static expr_ty 1130 | ast_for_lambdef(struct compiling *c, const node *n) 1131 | { 1132 | /* lambdef: 'lambda' [varargslist] ':' test 1133 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */ 1134 | arguments_ty args; 1135 | expr_ty expression; 1136 | 1137 | if (NCH(n) == 3) { 1138 | args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1139 | NULL, c->c_arena); 1140 | if (!args) 1141 | return NULL; 1142 | expression = ast_for_expr(c, CHILD(n, 2)); 1143 | if (!expression) 1144 | return NULL; 1145 | } 1146 | else { 1147 | args = ast_for_arguments(c, CHILD(n, 1)); 1148 | if (!args) 1149 | return NULL; 1150 | expression = ast_for_expr(c, CHILD(n, 3)); 1151 | if (!expression) 1152 | return NULL; 1153 | } 1154 | 1155 | return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena); 1156 | } 1157 | 1158 | static expr_ty 1159 | ast_for_ifexpr(struct compiling *c, const node *n) 1160 | { 1161 | /* test: or_test 'if' or_test 'else' test */ 1162 | expr_ty expression, body, orelse; 1163 | 1164 | assert(NCH(n) == 5); 1165 | body = ast_for_expr(c, CHILD(n, 0)); 1166 | if (!body) 1167 | return NULL; 1168 | expression = ast_for_expr(c, CHILD(n, 2)); 1169 | if (!expression) 1170 | return NULL; 1171 | orelse = ast_for_expr(c, CHILD(n, 4)); 1172 | if (!orelse) 1173 | return NULL; 1174 | return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset, 1175 | c->c_arena); 1176 | } 1177 | 1178 | /* 1179 | Count the number of 'for' loops in a comprehension. 1180 | 1181 | Helper for ast_for_comprehension(). 1182 | */ 1183 | 1184 | static int 1185 | count_comp_fors(struct compiling *c, const node *n) 1186 | { 1187 | int n_fors = 0; 1188 | 1189 | count_comp_for: 1190 | n_fors++; 1191 | REQ(n, comp_for); 1192 | if (NCH(n) == 5) 1193 | n = CHILD(n, 4); 1194 | else 1195 | return n_fors; 1196 | count_comp_iter: 1197 | REQ(n, comp_iter); 1198 | n = CHILD(n, 0); 1199 | if (TYPE(n) == comp_for) 1200 | goto count_comp_for; 1201 | else if (TYPE(n) == comp_if) { 1202 | if (NCH(n) == 3) { 1203 | n = CHILD(n, 2); 1204 | goto count_comp_iter; 1205 | } 1206 | else 1207 | return n_fors; 1208 | } 1209 | 1210 | /* Should never be reached */ 1211 | PyErr_SetString(PyExc_SystemError, 1212 | "logic error in count_comp_fors"); 1213 | return -1; 1214 | } 1215 | 1216 | /* Count the number of 'if' statements in a comprehension. 1217 | 1218 | Helper for ast_for_comprehension(). 1219 | */ 1220 | 1221 | static int 1222 | count_comp_ifs(struct compiling *c, const node *n) 1223 | { 1224 | int n_ifs = 0; 1225 | 1226 | while (1) { 1227 | REQ(n, comp_iter); 1228 | if (TYPE(CHILD(n, 0)) == comp_for) 1229 | return n_ifs; 1230 | n = CHILD(n, 0); 1231 | REQ(n, comp_if); 1232 | n_ifs++; 1233 | if (NCH(n) == 2) 1234 | return n_ifs; 1235 | n = CHILD(n, 2); 1236 | } 1237 | } 1238 | 1239 | static asdl_seq * 1240 | ast_for_comprehension(struct compiling *c, const node *n) 1241 | { 1242 | int i, n_fors; 1243 | asdl_seq *comps; 1244 | 1245 | n_fors = count_comp_fors(c, n); 1246 | if (n_fors == -1) 1247 | return NULL; 1248 | 1249 | comps = asdl_seq_new(n_fors, c->c_arena); 1250 | if (!comps) 1251 | return NULL; 1252 | 1253 | for (i = 0; i < n_fors; i++) { 1254 | comprehension_ty comp; 1255 | asdl_seq *t; 1256 | expr_ty expression, first; 1257 | node *for_ch; 1258 | 1259 | REQ(n, comp_for); 1260 | 1261 | for_ch = CHILD(n, 1); 1262 | t = ast_for_exprlist(c, for_ch, Store); 1263 | if (!t) 1264 | return NULL; 1265 | expression = ast_for_expr(c, CHILD(n, 3)); 1266 | if (!expression) 1267 | return NULL; 1268 | 1269 | /* Check the # of children rather than the length of t, since 1270 | (x for x, in ...) has 1 element in t, but still requires a Tuple. */ 1271 | first = (expr_ty)asdl_seq_GET(t, 0); 1272 | if (NCH(for_ch) == 1) 1273 | comp = comprehension(first, expression, NULL, c->c_arena); 1274 | else 1275 | comp = comprehension(Tuple(t, Store, first->lineno, first->col_offset, 1276 | c->c_arena), 1277 | expression, NULL, c->c_arena); 1278 | if (!comp) 1279 | return NULL; 1280 | 1281 | if (NCH(n) == 5) { 1282 | int j, n_ifs; 1283 | asdl_seq *ifs; 1284 | 1285 | n = CHILD(n, 4); 1286 | n_ifs = count_comp_ifs(c, n); 1287 | if (n_ifs == -1) 1288 | return NULL; 1289 | 1290 | ifs = asdl_seq_new(n_ifs, c->c_arena); 1291 | if (!ifs) 1292 | return NULL; 1293 | 1294 | for (j = 0; j < n_ifs; j++) { 1295 | REQ(n, comp_iter); 1296 | n = CHILD(n, 0); 1297 | REQ(n, comp_if); 1298 | 1299 | expression = ast_for_expr(c, CHILD(n, 1)); 1300 | if (!expression) 1301 | return NULL; 1302 | asdl_seq_SET(ifs, j, expression); 1303 | if (NCH(n) == 3) 1304 | n = CHILD(n, 2); 1305 | } 1306 | /* on exit, must guarantee that n is a comp_for */ 1307 | if (TYPE(n) == comp_iter) 1308 | n = CHILD(n, 0); 1309 | comp->ifs = ifs; 1310 | } 1311 | asdl_seq_SET(comps, i, comp); 1312 | } 1313 | return comps; 1314 | } 1315 | 1316 | static expr_ty 1317 | ast_for_itercomp(struct compiling *c, const node *n, int type) 1318 | { 1319 | /* testlist_comp: test ( comp_for | (',' test)* [','] ) 1320 | argument: [test '='] test [comp_for] # Really [keyword '='] test */ 1321 | expr_ty elt; 1322 | asdl_seq *comps; 1323 | 1324 | assert(NCH(n) > 1); 1325 | 1326 | elt = ast_for_expr(c, CHILD(n, 0)); 1327 | if (!elt) 1328 | return NULL; 1329 | 1330 | comps = ast_for_comprehension(c, CHILD(n, 1)); 1331 | if (!comps) 1332 | return NULL; 1333 | 1334 | if (type == COMP_GENEXP) 1335 | return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena); 1336 | else if (type == COMP_LISTCOMP) 1337 | return ListComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena); 1338 | else if (type == COMP_SETCOMP) 1339 | return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena); 1340 | else 1341 | /* Should never happen */ 1342 | return NULL; 1343 | } 1344 | 1345 | static expr_ty 1346 | ast_for_dictcomp(struct compiling *c, const node *n) 1347 | { 1348 | expr_ty key, value; 1349 | asdl_seq *comps; 1350 | 1351 | assert(NCH(n) > 3); 1352 | REQ(CHILD(n, 1), COLON); 1353 | 1354 | key = ast_for_expr(c, CHILD(n, 0)); 1355 | if (!key) 1356 | return NULL; 1357 | value = ast_for_expr(c, CHILD(n, 2)); 1358 | if (!value) 1359 | return NULL; 1360 | 1361 | comps = ast_for_comprehension(c, CHILD(n, 3)); 1362 | if (!comps) 1363 | return NULL; 1364 | 1365 | return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena); 1366 | } 1367 | 1368 | static expr_ty 1369 | ast_for_genexp(struct compiling *c, const node *n) 1370 | { 1371 | assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument)); 1372 | return ast_for_itercomp(c, n, COMP_GENEXP); 1373 | } 1374 | 1375 | static expr_ty 1376 | ast_for_listcomp(struct compiling *c, const node *n) 1377 | { 1378 | assert(TYPE(n) == (testlist_comp)); 1379 | return ast_for_itercomp(c, n, COMP_LISTCOMP); 1380 | } 1381 | 1382 | static expr_ty 1383 | ast_for_setcomp(struct compiling *c, const node *n) 1384 | { 1385 | assert(TYPE(n) == (dictorsetmaker)); 1386 | return ast_for_itercomp(c, n, COMP_SETCOMP); 1387 | } 1388 | 1389 | 1390 | static expr_ty 1391 | ast_for_atom(struct compiling *c, const node *n) 1392 | { 1393 | /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']' 1394 | | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+ 1395 | | '...' | 'None' | 'True' | 'False' 1396 | */ 1397 | node *ch = CHILD(n, 0); 1398 | int bytesmode = 0; 1399 | 1400 | switch (TYPE(ch)) { 1401 | case NAME: { 1402 | /* All names start in Load context, but may later be 1403 | changed. */ 1404 | PyObject *name = NEW_IDENTIFIER(ch); 1405 | if (!name) 1406 | return NULL; 1407 | return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena); 1408 | } 1409 | case STRING: { 1410 | PyObject *str = parsestrplus(c, n, &bytesmode); 1411 | if (!str) { 1412 | if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { 1413 | PyObject *type, *value, *tback, *errstr; 1414 | PyErr_Fetch(&type, &value, &tback); 1415 | errstr = PyObject_Str(value); 1416 | if (errstr) { 1417 | char *s = ""; 1418 | char buf[128]; 1419 | s = _PyUnicode_AsString(errstr); 1420 | PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s); 1421 | ast_error(n, buf); 1422 | Py_DECREF(errstr); 1423 | } else { 1424 | ast_error(n, "(unicode error) unknown error"); 1425 | } 1426 | Py_DECREF(type); 1427 | Py_DECREF(value); 1428 | Py_XDECREF(tback); 1429 | } 1430 | return NULL; 1431 | } 1432 | PyArena_AddPyObject(c->c_arena, str); 1433 | if (bytesmode) 1434 | return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena); 1435 | else 1436 | return Str(str, LINENO(n), n->n_col_offset, c->c_arena); 1437 | } 1438 | case NUMBER: { 1439 | PyObject *pynum = parsenumber(c, STR(ch)); 1440 | if (!pynum) 1441 | return NULL; 1442 | 1443 | PyArena_AddPyObject(c->c_arena, pynum); 1444 | return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena); 1445 | } 1446 | case ELLIPSIS: /* Ellipsis */ 1447 | return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena); 1448 | case LPAR: /* some parenthesized expressions */ 1449 | ch = CHILD(n, 1); 1450 | 1451 | if (TYPE(ch) == RPAR) 1452 | return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena); 1453 | 1454 | if (TYPE(ch) == yield_expr) 1455 | return ast_for_expr(c, ch); 1456 | 1457 | /* testlist_comp: test ( comp_for | (',' test)* [','] ) */ 1458 | if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == comp_for)) 1459 | return ast_for_genexp(c, ch); 1460 | 1461 | return ast_for_testlist(c, ch); 1462 | case LSQB: /* list (or list comprehension) */ 1463 | ch = CHILD(n, 1); 1464 | 1465 | if (TYPE(ch) == RSQB) 1466 | return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena); 1467 | 1468 | REQ(ch, testlist_comp); 1469 | if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { 1470 | asdl_seq *elts = seq_for_testlist(c, ch); 1471 | if (!elts) 1472 | return NULL; 1473 | 1474 | return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena); 1475 | } 1476 | else 1477 | return ast_for_listcomp(c, ch); 1478 | case LBRACE: { 1479 | /* dictorsetmaker: test ':' test (',' test ':' test)* [','] | 1480 | * test (gen_for | (',' test)* [',']) */ 1481 | int i, size; 1482 | asdl_seq *keys, *values; 1483 | 1484 | ch = CHILD(n, 1); 1485 | if (TYPE(ch) == RBRACE) { 1486 | /* it's an empty dict */ 1487 | return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena); 1488 | } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) { 1489 | /* it's a simple set */ 1490 | asdl_seq *elts; 1491 | size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */ 1492 | elts = asdl_seq_new(size, c->c_arena); 1493 | if (!elts) 1494 | return NULL; 1495 | for (i = 0; i < NCH(ch); i += 2) { 1496 | expr_ty expression; 1497 | expression = ast_for_expr(c, CHILD(ch, i)); 1498 | if (!expression) 1499 | return NULL; 1500 | asdl_seq_SET(elts, i / 2, expression); 1501 | } 1502 | return Set(elts, LINENO(n), n->n_col_offset, c->c_arena); 1503 | } else if (TYPE(CHILD(ch, 1)) == comp_for) { 1504 | /* it's a set comprehension */ 1505 | return ast_for_setcomp(c, ch); 1506 | } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) { 1507 | return ast_for_dictcomp(c, ch); 1508 | } else { 1509 | /* it's a dict */ 1510 | size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */ 1511 | keys = asdl_seq_new(size, c->c_arena); 1512 | if (!keys) 1513 | return NULL; 1514 | 1515 | values = asdl_seq_new(size, c->c_arena); 1516 | if (!values) 1517 | return NULL; 1518 | 1519 | for (i = 0; i < NCH(ch); i += 4) { 1520 | expr_ty expression; 1521 | 1522 | expression = ast_for_expr(c, CHILD(ch, i)); 1523 | if (!expression) 1524 | return NULL; 1525 | 1526 | asdl_seq_SET(keys, i / 4, expression); 1527 | 1528 | expression = ast_for_expr(c, CHILD(ch, i + 2)); 1529 | if (!expression) 1530 | return NULL; 1531 | 1532 | asdl_seq_SET(values, i / 4, expression); 1533 | } 1534 | return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena); 1535 | } 1536 | } 1537 | default: 1538 | PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch)); 1539 | return NULL; 1540 | } 1541 | } 1542 | 1543 | static slice_ty 1544 | ast_for_slice(struct compiling *c, const node *n) 1545 | { 1546 | node *ch; 1547 | expr_ty lower = NULL, upper = NULL, step = NULL; 1548 | 1549 | REQ(n, subscript); 1550 | 1551 | /* 1552 | subscript: test | [test] ':' [test] [sliceop] 1553 | sliceop: ':' [test] 1554 | */ 1555 | ch = CHILD(n, 0); 1556 | if (NCH(n) == 1 && TYPE(ch) == test) { 1557 | /* 'step' variable hold no significance in terms of being used over 1558 | other vars */ 1559 | step = ast_for_expr(c, ch); 1560 | if (!step) 1561 | return NULL; 1562 | 1563 | return Index(step, c->c_arena); 1564 | } 1565 | 1566 | if (TYPE(ch) == test) { 1567 | lower = ast_for_expr(c, ch); 1568 | if (!lower) 1569 | return NULL; 1570 | } 1571 | 1572 | /* If there's an upper bound it's in the second or third position. */ 1573 | if (TYPE(ch) == COLON) { 1574 | if (NCH(n) > 1) { 1575 | node *n2 = CHILD(n, 1); 1576 | 1577 | if (TYPE(n2) == test) { 1578 | upper = ast_for_expr(c, n2); 1579 | if (!upper) 1580 | return NULL; 1581 | } 1582 | } 1583 | } else if (NCH(n) > 2) { 1584 | node *n2 = CHILD(n, 2); 1585 | 1586 | if (TYPE(n2) == test) { 1587 | upper = ast_for_expr(c, n2); 1588 | if (!upper) 1589 | return NULL; 1590 | } 1591 | } 1592 | 1593 | ch = CHILD(n, NCH(n) - 1); 1594 | if (TYPE(ch) == sliceop) { 1595 | if (NCH(ch) != 1) { 1596 | ch = CHILD(ch, 1); 1597 | if (TYPE(ch) == test) { 1598 | step = ast_for_expr(c, ch); 1599 | if (!step) 1600 | return NULL; 1601 | } 1602 | } 1603 | } 1604 | 1605 | return Slice(lower, upper, step, c->c_arena); 1606 | } 1607 | 1608 | static expr_ty 1609 | ast_for_binop(struct compiling *c, const node *n) 1610 | { 1611 | /* Must account for a sequence of expressions. 1612 | How should A op B op C by represented? 1613 | BinOp(BinOp(A, op, B), op, C). 1614 | */ 1615 | 1616 | int i, nops; 1617 | expr_ty expr1, expr2, result; 1618 | operator_ty newoperator; 1619 | 1620 | expr1 = ast_for_expr(c, CHILD(n, 0)); 1621 | if (!expr1) 1622 | return NULL; 1623 | 1624 | expr2 = ast_for_expr(c, CHILD(n, 2)); 1625 | if (!expr2) 1626 | return NULL; 1627 | 1628 | newoperator = get_operator(CHILD(n, 1)); 1629 | if (!newoperator) 1630 | return NULL; 1631 | 1632 | result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, 1633 | c->c_arena); 1634 | if (!result) 1635 | return NULL; 1636 | 1637 | nops = (NCH(n) - 1) / 2; 1638 | for (i = 1; i < nops; i++) { 1639 | expr_ty tmp_result, tmp; 1640 | const node* next_oper = CHILD(n, i * 2 + 1); 1641 | 1642 | newoperator = get_operator(next_oper); 1643 | if (!newoperator) 1644 | return NULL; 1645 | 1646 | tmp = ast_for_expr(c, CHILD(n, i * 2 + 2)); 1647 | if (!tmp) 1648 | return NULL; 1649 | 1650 | tmp_result = BinOp(result, newoperator, tmp, 1651 | LINENO(next_oper), next_oper->n_col_offset, 1652 | c->c_arena); 1653 | if (!tmp_result) 1654 | return NULL; 1655 | result = tmp_result; 1656 | } 1657 | return result; 1658 | } 1659 | 1660 | static expr_ty 1661 | ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr) 1662 | { 1663 | /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 1664 | subscriptlist: subscript (',' subscript)* [','] 1665 | subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 1666 | */ 1667 | REQ(n, trailer); 1668 | if (TYPE(CHILD(n, 0)) == LPAR) { 1669 | if (NCH(n) == 2) 1670 | return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n), 1671 | n->n_col_offset, c->c_arena); 1672 | else 1673 | return ast_for_call(c, CHILD(n, 1), left_expr); 1674 | } 1675 | else if (TYPE(CHILD(n, 0)) == DOT ) { 1676 | PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1)); 1677 | if (!attr_id) 1678 | return NULL; 1679 | return Attribute(left_expr, attr_id, Load, 1680 | LINENO(n), n->n_col_offset, c->c_arena); 1681 | } 1682 | else { 1683 | REQ(CHILD(n, 0), LSQB); 1684 | REQ(CHILD(n, 2), RSQB); 1685 | n = CHILD(n, 1); 1686 | if (NCH(n) == 1) { 1687 | slice_ty slc = ast_for_slice(c, CHILD(n, 0)); 1688 | if (!slc) 1689 | return NULL; 1690 | return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset, 1691 | c->c_arena); 1692 | } 1693 | else { 1694 | /* The grammar is ambiguous here. The ambiguity is resolved 1695 | by treating the sequence as a tuple literal if there are 1696 | no slice features. 1697 | */ 1698 | int j; 1699 | slice_ty slc; 1700 | expr_ty e; 1701 | int simple = 1; 1702 | asdl_seq *slices, *elts; 1703 | slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 1704 | if (!slices) 1705 | return NULL; 1706 | for (j = 0; j < NCH(n); j += 2) { 1707 | slc = ast_for_slice(c, CHILD(n, j)); 1708 | if (!slc) 1709 | return NULL; 1710 | if (slc->kind != Index_kind) 1711 | simple = 0; 1712 | asdl_seq_SET(slices, j / 2, slc); 1713 | } 1714 | if (!simple) { 1715 | return Subscript(left_expr, ExtSlice(slices, c->c_arena), 1716 | Load, LINENO(n), n->n_col_offset, c->c_arena); 1717 | } 1718 | /* extract Index values and put them in a Tuple */ 1719 | elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena); 1720 | if (!elts) 1721 | return NULL; 1722 | for (j = 0; j < asdl_seq_LEN(slices); ++j) { 1723 | slc = (slice_ty)asdl_seq_GET(slices, j); 1724 | assert(slc->kind == Index_kind && slc->v.Index.value); 1725 | asdl_seq_SET(elts, j, slc->v.Index.value); 1726 | } 1727 | e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena); 1728 | if (!e) 1729 | return NULL; 1730 | return Subscript(left_expr, Index(e, c->c_arena), 1731 | Load, LINENO(n), n->n_col_offset, c->c_arena); 1732 | } 1733 | } 1734 | } 1735 | 1736 | static expr_ty 1737 | ast_for_factor(struct compiling *c, const node *n) 1738 | { 1739 | expr_ty expression; 1740 | 1741 | expression = ast_for_expr(c, CHILD(n, 1)); 1742 | if (!expression) 1743 | return NULL; 1744 | 1745 | switch (TYPE(CHILD(n, 0))) { 1746 | case PLUS: 1747 | return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset, 1748 | c->c_arena); 1749 | case MINUS: 1750 | return UnaryOp(USub, expression, LINENO(n), n->n_col_offset, 1751 | c->c_arena); 1752 | case TILDE: 1753 | return UnaryOp(Invert, expression, LINENO(n), 1754 | n->n_col_offset, c->c_arena); 1755 | } 1756 | PyErr_Format(PyExc_SystemError, "unhandled factor: %d", 1757 | TYPE(CHILD(n, 0))); 1758 | return NULL; 1759 | } 1760 | 1761 | static expr_ty 1762 | ast_for_power(struct compiling *c, const node *n) 1763 | { 1764 | /* power: atom trailer* ('**' factor)* 1765 | */ 1766 | int i; 1767 | expr_ty e, tmp; 1768 | REQ(n, power); 1769 | e = ast_for_atom(c, CHILD(n, 0)); 1770 | if (!e) 1771 | return NULL; 1772 | if (NCH(n) == 1) 1773 | return e; 1774 | for (i = 1; i < NCH(n); i++) { 1775 | node *ch = CHILD(n, i); 1776 | if (TYPE(ch) != trailer) 1777 | break; 1778 | tmp = ast_for_trailer(c, ch, e); 1779 | if (!tmp) 1780 | return NULL; 1781 | tmp->lineno = e->lineno; 1782 | tmp->col_offset = e->col_offset; 1783 | e = tmp; 1784 | } 1785 | if (TYPE(CHILD(n, NCH(n) - 1)) == factor) { 1786 | expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1)); 1787 | if (!f) 1788 | return NULL; 1789 | tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena); 1790 | if (!tmp) 1791 | return NULL; 1792 | e = tmp; 1793 | } 1794 | return e; 1795 | } 1796 | 1797 | static expr_ty 1798 | ast_for_starred(struct compiling *c, const node *n) 1799 | { 1800 | expr_ty tmp; 1801 | REQ(n, star_expr); 1802 | 1803 | tmp = ast_for_expr(c, CHILD(n, 1)); 1804 | if (!tmp) 1805 | return NULL; 1806 | 1807 | /* The Load context is changed later. */ 1808 | return Starred(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena); 1809 | } 1810 | 1811 | 1812 | /* Do not name a variable 'expr'! Will cause a compile error. 1813 | */ 1814 | 1815 | static expr_ty 1816 | ast_for_expr(struct compiling *c, const node *n) 1817 | { 1818 | /* handle the full range of simple expressions 1819 | test: or_test ['if' or_test 'else' test] | lambdef 1820 | test_nocond: or_test | lambdef_nocond 1821 | or_test: and_test ('or' and_test)* 1822 | and_test: not_test ('and' not_test)* 1823 | not_test: 'not' not_test | comparison 1824 | comparison: expr (comp_op expr)* 1825 | expr: xor_expr ('|' xor_expr)* 1826 | xor_expr: and_expr ('^' and_expr)* 1827 | and_expr: shift_expr ('&' shift_expr)* 1828 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 1829 | arith_expr: term (('+'|'-') term)* 1830 | term: factor (('*'|'/'|'%'|'//') factor)* 1831 | factor: ('+'|'-'|'~') factor | power 1832 | power: atom trailer* ('**' factor)* 1833 | */ 1834 | 1835 | asdl_seq *seq; 1836 | int i; 1837 | 1838 | loop: 1839 | switch (TYPE(n)) { 1840 | case test: 1841 | case test_nocond: 1842 | if (TYPE(CHILD(n, 0)) == lambdef || 1843 | TYPE(CHILD(n, 0)) == lambdef_nocond) 1844 | return ast_for_lambdef(c, CHILD(n, 0)); 1845 | else if (NCH(n) > 1) 1846 | return ast_for_ifexpr(c, n); 1847 | /* Fallthrough */ 1848 | case or_test: 1849 | case and_test: 1850 | if (NCH(n) == 1) { 1851 | n = CHILD(n, 0); 1852 | goto loop; 1853 | } 1854 | seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 1855 | if (!seq) 1856 | return NULL; 1857 | for (i = 0; i < NCH(n); i += 2) { 1858 | expr_ty e = ast_for_expr(c, CHILD(n, i)); 1859 | if (!e) 1860 | return NULL; 1861 | asdl_seq_SET(seq, i / 2, e); 1862 | } 1863 | if (streq(STR(CHILD(n, 1)), and)) 1864 | return BoolOp(And, seq, LINENO(n), n->n_col_offset, 1865 | c->c_arena); 1866 | assert( streq(STR(CHILD(n, 1)), or)); 1867 | return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena); 1868 | case not_test: 1869 | if (NCH(n) == 1) { 1870 | n = CHILD(n, 0); 1871 | goto loop; 1872 | } 1873 | else { 1874 | expr_ty expression = ast_for_expr(c, CHILD(n, 1)); 1875 | if (!expression) 1876 | return NULL; 1877 | 1878 | return UnaryOp(Not, expression, LINENO(n), n->n_col_offset, 1879 | c->c_arena); 1880 | } 1881 | case comparison: 1882 | if (NCH(n) == 1) { 1883 | n = CHILD(n, 0); 1884 | goto loop; 1885 | } 1886 | else { 1887 | expr_ty expression; 1888 | asdl_int_seq *ops; 1889 | asdl_seq *cmps; 1890 | ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena); 1891 | if (!ops) 1892 | return NULL; 1893 | cmps = asdl_seq_new(NCH(n) / 2, c->c_arena); 1894 | if (!cmps) { 1895 | return NULL; 1896 | } 1897 | for (i = 1; i < NCH(n); i += 2) { 1898 | cmpop_ty newoperator; 1899 | 1900 | newoperator = ast_for_comp_op(c, CHILD(n, i)); 1901 | if (!newoperator) { 1902 | return NULL; 1903 | } 1904 | 1905 | expression = ast_for_expr(c, CHILD(n, i + 1)); 1906 | if (!expression) { 1907 | return NULL; 1908 | } 1909 | 1910 | asdl_seq_SET(ops, i / 2, newoperator); 1911 | asdl_seq_SET(cmps, i / 2, expression); 1912 | } 1913 | expression = ast_for_expr(c, CHILD(n, 0)); 1914 | if (!expression) { 1915 | return NULL; 1916 | } 1917 | 1918 | return Compare(expression, ops, cmps, LINENO(n), 1919 | n->n_col_offset, c->c_arena); 1920 | } 1921 | break; 1922 | 1923 | case star_expr: 1924 | return ast_for_starred(c, n); 1925 | /* The next five cases all handle BinOps. The main body of code 1926 | is the same in each case, but the switch turned inside out to 1927 | reuse the code for each type of operator. 1928 | */ 1929 | case expr: 1930 | case xor_expr: 1931 | case and_expr: 1932 | case shift_expr: 1933 | case arith_expr: 1934 | case term: 1935 | if (NCH(n) == 1) { 1936 | n = CHILD(n, 0); 1937 | goto loop; 1938 | } 1939 | return ast_for_binop(c, n); 1940 | case yield_expr: { 1941 | expr_ty exp = NULL; 1942 | if (NCH(n) == 2) { 1943 | exp = ast_for_testlist(c, CHILD(n, 1)); 1944 | if (!exp) 1945 | return NULL; 1946 | } 1947 | return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena); 1948 | } 1949 | case factor: 1950 | if (NCH(n) == 1) { 1951 | n = CHILD(n, 0); 1952 | goto loop; 1953 | } 1954 | return ast_for_factor(c, n); 1955 | case power: 1956 | return ast_for_power(c, n); 1957 | default: 1958 | PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n)); 1959 | return NULL; 1960 | } 1961 | /* should never get here unless if error is set */ 1962 | return NULL; 1963 | } 1964 | 1965 | static expr_ty 1966 | ast_for_call(struct compiling *c, const node *n, expr_ty func) 1967 | { 1968 | /* 1969 | arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] 1970 | | '**' test) 1971 | argument: [test '='] test [comp_for] # Really [keyword '='] test 1972 | */ 1973 | 1974 | int i, nargs, nkeywords, ngens; 1975 | asdl_seq *args; 1976 | asdl_seq *keywords; 1977 | expr_ty vararg = NULL, kwarg = NULL; 1978 | 1979 | REQ(n, arglist); 1980 | 1981 | nargs = 0; 1982 | nkeywords = 0; 1983 | ngens = 0; 1984 | for (i = 0; i < NCH(n); i++) { 1985 | node *ch = CHILD(n, i); 1986 | if (TYPE(ch) == argument) { 1987 | if (NCH(ch) == 1) 1988 | nargs++; 1989 | else if (TYPE(CHILD(ch, 1)) == comp_for) 1990 | ngens++; 1991 | else 1992 | nkeywords++; 1993 | } 1994 | } 1995 | if (ngens > 1 || (ngens && (nargs || nkeywords))) { 1996 | ast_error(n, "Generator expression must be parenthesized " 1997 | "if not sole argument"); 1998 | return NULL; 1999 | } 2000 | 2001 | if (nargs + nkeywords + ngens > 255) { 2002 | ast_error(n, "more than 255 arguments"); 2003 | return NULL; 2004 | } 2005 | 2006 | args = asdl_seq_new(nargs + ngens, c->c_arena); 2007 | if (!args) 2008 | return NULL; 2009 | keywords = asdl_seq_new(nkeywords, c->c_arena); 2010 | if (!keywords) 2011 | return NULL; 2012 | nargs = 0; 2013 | nkeywords = 0; 2014 | for (i = 0; i < NCH(n); i++) { 2015 | node *ch = CHILD(n, i); 2016 | if (TYPE(ch) == argument) { 2017 | expr_ty e; 2018 | if (NCH(ch) == 1) { 2019 | if (nkeywords) { 2020 | ast_error(CHILD(ch, 0), 2021 | "non-keyword arg after keyword arg"); 2022 | return NULL; 2023 | } 2024 | if (vararg) { 2025 | ast_error(CHILD(ch, 0), 2026 | "only named arguments may follow *expression"); 2027 | return NULL; 2028 | } 2029 | e = ast_for_expr(c, CHILD(ch, 0)); 2030 | if (!e) 2031 | return NULL; 2032 | asdl_seq_SET(args, nargs++, e); 2033 | } 2034 | else if (TYPE(CHILD(ch, 1)) == comp_for) { 2035 | e = ast_for_genexp(c, ch); 2036 | if (!e) 2037 | return NULL; 2038 | asdl_seq_SET(args, nargs++, e); 2039 | } 2040 | else { 2041 | keyword_ty kw; 2042 | identifier key, tmp; 2043 | int k; 2044 | 2045 | /* CHILD(ch, 0) is test, but must be an identifier? */ 2046 | e = ast_for_expr(c, CHILD(ch, 0)); 2047 | if (!e) 2048 | return NULL; 2049 | /* f(lambda x: x[0] = 3) ends up getting parsed with 2050 | * LHS test = lambda x: x[0], and RHS test = 3. 2051 | * SF bug 132313 points out that complaining about a keyword 2052 | * then is very confusing. 2053 | */ 2054 | if (e->kind == Lambda_kind) { 2055 | ast_error(CHILD(ch, 0), "lambda cannot contain assignment"); 2056 | return NULL; 2057 | } else if (e->kind != Name_kind) { 2058 | ast_error(CHILD(ch, 0), "keyword can't be an expression"); 2059 | return NULL; 2060 | } else if (forbidden_name(e->v.Name.id, ch, 1)) { 2061 | return NULL; 2062 | } 2063 | key = e->v.Name.id; 2064 | for (k = 0; k < nkeywords; k++) { 2065 | tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg; 2066 | if (!PyUnicode_Compare(tmp, key)) { 2067 | ast_error(CHILD(ch, 0), "keyword argument repeated"); 2068 | return NULL; 2069 | } 2070 | } 2071 | e = ast_for_expr(c, CHILD(ch, 2)); 2072 | if (!e) 2073 | return NULL; 2074 | kw = keyword(key, e, c->c_arena); 2075 | if (!kw) 2076 | return NULL; 2077 | asdl_seq_SET(keywords, nkeywords++, kw); 2078 | } 2079 | } 2080 | else if (TYPE(ch) == STAR) { 2081 | vararg = ast_for_expr(c, CHILD(n, i+1)); 2082 | if (!vararg) 2083 | return NULL; 2084 | i++; 2085 | } 2086 | else if (TYPE(ch) == DOUBLESTAR) { 2087 | kwarg = ast_for_expr(c, CHILD(n, i+1)); 2088 | if (!kwarg) 2089 | return NULL; 2090 | i++; 2091 | } 2092 | } 2093 | 2094 | return Call(func, args, keywords, vararg, kwarg, func->lineno, func->col_offset, c->c_arena); 2095 | } 2096 | 2097 | static expr_ty 2098 | ast_for_testlist(struct compiling *c, const node* n) 2099 | { 2100 | /* testlist_comp: test (comp_for | (',' test)* [',']) */ 2101 | /* testlist: test (',' test)* [','] */ 2102 | assert(NCH(n) > 0); 2103 | if (TYPE(n) == testlist_comp) { 2104 | if (NCH(n) > 1) 2105 | assert(TYPE(CHILD(n, 1)) != comp_for); 2106 | } 2107 | else { 2108 | assert(TYPE(n) == testlist || 2109 | TYPE(n) == testlist_star_expr); 2110 | } 2111 | if (NCH(n) == 1) 2112 | return ast_for_expr(c, CHILD(n, 0)); 2113 | else { 2114 | asdl_seq *tmp = seq_for_testlist(c, n); 2115 | if (!tmp) 2116 | return NULL; 2117 | return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena); 2118 | } 2119 | } 2120 | 2121 | static stmt_ty 2122 | ast_for_expr_stmt(struct compiling *c, const node *n) 2123 | { 2124 | REQ(n, expr_stmt); 2125 | /* expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) 2126 | | ('=' (yield_expr|testlist))*) 2127 | testlist_star_expr: (test|star_expr) (',' test|star_expr)* [','] 2128 | augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' 2129 | | '<<=' | '>>=' | '**=' | '//=' 2130 | test: ... here starts the operator precendence dance 2131 | */ 2132 | 2133 | if (NCH(n) == 1) { 2134 | expr_ty e = ast_for_testlist(c, CHILD(n, 0)); 2135 | if (!e) 2136 | return NULL; 2137 | 2138 | return Expr(e, LINENO(n), n->n_col_offset, c->c_arena); 2139 | } 2140 | else if (TYPE(CHILD(n, 1)) == augassign) { 2141 | expr_ty expr1, expr2; 2142 | operator_ty newoperator; 2143 | node *ch = CHILD(n, 0); 2144 | 2145 | expr1 = ast_for_testlist(c, ch); 2146 | if (!expr1) 2147 | return NULL; 2148 | if(!set_context(c, expr1, Store, ch)) 2149 | return NULL; 2150 | /* set_context checks that most expressions are not the left side. 2151 | Augmented assignments can only have a name, a subscript, or an 2152 | attribute on the left, though, so we have to explicitly check for 2153 | those. */ 2154 | switch (expr1->kind) { 2155 | case Name_kind: 2156 | case Attribute_kind: 2157 | case Subscript_kind: 2158 | break; 2159 | default: 2160 | ast_error(ch, "illegal expression for augmented assignment"); 2161 | return NULL; 2162 | } 2163 | 2164 | ch = CHILD(n, 2); 2165 | if (TYPE(ch) == testlist) 2166 | expr2 = ast_for_testlist(c, ch); 2167 | else 2168 | expr2 = ast_for_expr(c, ch); 2169 | if (!expr2) 2170 | return NULL; 2171 | 2172 | newoperator = ast_for_augassign(c, CHILD(n, 1)); 2173 | if (!newoperator) 2174 | return NULL; 2175 | 2176 | return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena); 2177 | } 2178 | else { 2179 | int i; 2180 | asdl_seq *targets; 2181 | node *value; 2182 | expr_ty expression; 2183 | 2184 | /* a normal assignment */ 2185 | REQ(CHILD(n, 1), EQUAL); 2186 | targets = asdl_seq_new(NCH(n) / 2, c->c_arena); 2187 | if (!targets) 2188 | return NULL; 2189 | for (i = 0; i < NCH(n) - 2; i += 2) { 2190 | expr_ty e; 2191 | node *ch = CHILD(n, i); 2192 | if (TYPE(ch) == yield_expr) { 2193 | ast_error(ch, "assignment to yield expression not possible"); 2194 | return NULL; 2195 | } 2196 | e = ast_for_testlist(c, ch); 2197 | if (!e) 2198 | return NULL; 2199 | 2200 | /* set context to assign */ 2201 | if (!set_context(c, e, Store, CHILD(n, i))) 2202 | return NULL; 2203 | 2204 | asdl_seq_SET(targets, i / 2, e); 2205 | } 2206 | value = CHILD(n, NCH(n) - 1); 2207 | if (TYPE(value) == testlist_star_expr) 2208 | expression = ast_for_testlist(c, value); 2209 | else 2210 | expression = ast_for_expr(c, value); 2211 | if (!expression) 2212 | return NULL; 2213 | return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena); 2214 | } 2215 | } 2216 | 2217 | 2218 | static asdl_seq * 2219 | ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context) 2220 | { 2221 | asdl_seq *seq; 2222 | int i; 2223 | expr_ty e; 2224 | 2225 | REQ(n, exprlist); 2226 | 2227 | seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 2228 | if (!seq) 2229 | return NULL; 2230 | for (i = 0; i < NCH(n); i += 2) { 2231 | e = ast_for_expr(c, CHILD(n, i)); 2232 | if (!e) 2233 | return NULL; 2234 | asdl_seq_SET(seq, i / 2, e); 2235 | if (context && !set_context(c, e, context, CHILD(n, i))) 2236 | return NULL; 2237 | } 2238 | return seq; 2239 | } 2240 | 2241 | static stmt_ty 2242 | ast_for_del_stmt(struct compiling *c, const node *n) 2243 | { 2244 | asdl_seq *expr_list; 2245 | 2246 | /* del_stmt: 'del' exprlist */ 2247 | REQ(n, del_stmt); 2248 | 2249 | expr_list = ast_for_exprlist(c, CHILD(n, 1), Del); 2250 | if (!expr_list) 2251 | return NULL; 2252 | return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena); 2253 | } 2254 | 2255 | static stmt_ty 2256 | ast_for_flow_stmt(struct compiling *c, const node *n) 2257 | { 2258 | /* 2259 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt 2260 | | yield_stmt 2261 | break_stmt: 'break' 2262 | continue_stmt: 'continue' 2263 | return_stmt: 'return' [testlist] 2264 | yield_stmt: yield_expr 2265 | yield_expr: 'yield' testlist 2266 | raise_stmt: 'raise' [test [',' test [',' test]]] 2267 | */ 2268 | node *ch; 2269 | 2270 | REQ(n, flow_stmt); 2271 | ch = CHILD(n, 0); 2272 | switch (TYPE(ch)) { 2273 | case break_stmt: 2274 | return Break(LINENO(n), n->n_col_offset, c->c_arena); 2275 | case continue_stmt: 2276 | return Continue(LINENO(n), n->n_col_offset, c->c_arena); 2277 | case yield_stmt: { /* will reduce to yield_expr */ 2278 | expr_ty exp = ast_for_expr(c, CHILD(ch, 0)); 2279 | if (!exp) 2280 | return NULL; 2281 | return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena); 2282 | } 2283 | case return_stmt: 2284 | if (NCH(ch) == 1) 2285 | return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena); 2286 | else { 2287 | expr_ty expression = ast_for_testlist(c, CHILD(ch, 1)); 2288 | if (!expression) 2289 | return NULL; 2290 | return Return(expression, LINENO(n), n->n_col_offset, c->c_arena); 2291 | } 2292 | case raise_stmt: 2293 | if (NCH(ch) == 1) 2294 | return Raise(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena); 2295 | else if (NCH(ch) >= 2) { 2296 | expr_ty cause = NULL; 2297 | expr_ty expression = ast_for_expr(c, CHILD(ch, 1)); 2298 | if (!expression) 2299 | return NULL; 2300 | if (NCH(ch) == 4) { 2301 | cause = ast_for_expr(c, CHILD(ch, 3)); 2302 | if (!cause) 2303 | return NULL; 2304 | } 2305 | return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c_arena); 2306 | } 2307 | default: 2308 | PyErr_Format(PyExc_SystemError, 2309 | "unexpected flow_stmt: %d", TYPE(ch)); 2310 | return NULL; 2311 | } 2312 | 2313 | PyErr_SetString(PyExc_SystemError, "unhandled flow statement"); 2314 | return NULL; 2315 | } 2316 | 2317 | static alias_ty 2318 | alias_for_import_name(struct compiling *c, const node *n, int store) 2319 | { 2320 | /* 2321 | import_as_name: NAME ['as' NAME] 2322 | dotted_as_name: dotted_name ['as' NAME] 2323 | dotted_name: NAME ('.' NAME)* 2324 | */ 2325 | identifier str, name; 2326 | 2327 | loop: 2328 | switch (TYPE(n)) { 2329 | case import_as_name: { 2330 | node *name_node = CHILD(n, 0); 2331 | str = NULL; 2332 | name = NEW_IDENTIFIER(name_node); 2333 | if (!name) 2334 | return NULL; 2335 | if (NCH(n) == 3) { 2336 | node *str_node = CHILD(n, 2); 2337 | str = NEW_IDENTIFIER(str_node); 2338 | if (!str) 2339 | return NULL; 2340 | if (store && forbidden_name(str, str_node, 0)) 2341 | return NULL; 2342 | } 2343 | else { 2344 | if (forbidden_name(name, name_node, 0)) 2345 | return NULL; 2346 | } 2347 | return alias(name, str, c->c_arena); 2348 | } 2349 | case dotted_as_name: 2350 | if (NCH(n) == 1) { 2351 | n = CHILD(n, 0); 2352 | goto loop; 2353 | } 2354 | else { 2355 | node *asname_node = CHILD(n, 2); 2356 | alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0); 2357 | if (!a) 2358 | return NULL; 2359 | assert(!a->asname); 2360 | a->asname = NEW_IDENTIFIER(asname_node); 2361 | if (!a->asname) 2362 | return NULL; 2363 | if (forbidden_name(a->asname, asname_node, 0)) 2364 | return NULL; 2365 | return a; 2366 | } 2367 | break; 2368 | case dotted_name: 2369 | if (NCH(n) == 1) { 2370 | node *name_node = CHILD(n, 0); 2371 | name = NEW_IDENTIFIER(name_node); 2372 | if (!name) 2373 | return NULL; 2374 | if (store && forbidden_name(name, name_node, 0)) 2375 | return NULL; 2376 | return alias(name, NULL, c->c_arena); 2377 | } 2378 | else { 2379 | /* Create a string of the form "a.b.c" */ 2380 | int i; 2381 | size_t len; 2382 | char *s; 2383 | PyObject *uni; 2384 | 2385 | len = 0; 2386 | for (i = 0; i < NCH(n); i += 2) 2387 | /* length of string plus one for the dot */ 2388 | len += strlen(STR(CHILD(n, i))) + 1; 2389 | len--; /* the last name doesn't have a dot */ 2390 | str = PyBytes_FromStringAndSize(NULL, len); 2391 | if (!str) 2392 | return NULL; 2393 | s = PyBytes_AS_STRING(str); 2394 | if (!s) 2395 | return NULL; 2396 | for (i = 0; i < NCH(n); i += 2) { 2397 | char *sch = STR(CHILD(n, i)); 2398 | strcpy(s, STR(CHILD(n, i))); 2399 | s += strlen(sch); 2400 | *s++ = '.'; 2401 | } 2402 | --s; 2403 | *s = '\0'; 2404 | uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), 2405 | PyBytes_GET_SIZE(str), 2406 | NULL); 2407 | Py_DECREF(str); 2408 | if (!uni) 2409 | return NULL; 2410 | str = uni; 2411 | PyUnicode_InternInPlace(&str); 2412 | PyArena_AddPyObject(c->c_arena, str); 2413 | return alias(str, NULL, c->c_arena); 2414 | } 2415 | break; 2416 | case STAR: 2417 | str = PyUnicode_InternFromString("*"); 2418 | PyArena_AddPyObject(c->c_arena, str); 2419 | return alias(str, NULL, c->c_arena); 2420 | default: 2421 | PyErr_Format(PyExc_SystemError, 2422 | "unexpected import name: %d", TYPE(n)); 2423 | return NULL; 2424 | } 2425 | 2426 | PyErr_SetString(PyExc_SystemError, "unhandled import name condition"); 2427 | return NULL; 2428 | } 2429 | 2430 | static stmt_ty 2431 | ast_for_import_stmt(struct compiling *c, const node *n) 2432 | { 2433 | /* 2434 | import_stmt: import_name | import_from 2435 | import_name: 'import' dotted_as_names 2436 | import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 2437 | 'import' ('*' | '(' import_as_names ')' | import_as_names) 2438 | */ 2439 | int lineno; 2440 | int col_offset; 2441 | int i; 2442 | asdl_seq *aliases; 2443 | 2444 | REQ(n, import_stmt); 2445 | lineno = LINENO(n); 2446 | col_offset = n->n_col_offset; 2447 | n = CHILD(n, 0); 2448 | if (TYPE(n) == import_name) { 2449 | n = CHILD(n, 1); 2450 | REQ(n, dotted_as_names); 2451 | aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena); 2452 | if (!aliases) 2453 | return NULL; 2454 | for (i = 0; i < NCH(n); i += 2) { 2455 | alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1); 2456 | if (!import_alias) 2457 | return NULL; 2458 | asdl_seq_SET(aliases, i / 2, import_alias); 2459 | } 2460 | return Import(aliases, lineno, col_offset, c->c_arena); 2461 | } 2462 | else if (TYPE(n) == import_from) { 2463 | int n_children; 2464 | int idx, ndots = 0; 2465 | alias_ty mod = NULL; 2466 | identifier modname = NULL; 2467 | 2468 | /* Count the number of dots (for relative imports) and check for the 2469 | optional module name */ 2470 | for (idx = 1; idx < NCH(n); idx++) { 2471 | if (TYPE(CHILD(n, idx)) == dotted_name) { 2472 | mod = alias_for_import_name(c, CHILD(n, idx), 0); 2473 | if (!mod) 2474 | return NULL; 2475 | idx++; 2476 | break; 2477 | } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) { 2478 | /* three consecutive dots are tokenized as one ELLIPSIS */ 2479 | ndots += 3; 2480 | continue; 2481 | } else if (TYPE(CHILD(n, idx)) != DOT) { 2482 | break; 2483 | } 2484 | ndots++; 2485 | } 2486 | idx++; /* skip over the 'import' keyword */ 2487 | switch (TYPE(CHILD(n, idx))) { 2488 | case STAR: 2489 | /* from ... import * */ 2490 | n = CHILD(n, idx); 2491 | n_children = 1; 2492 | break; 2493 | case LPAR: 2494 | /* from ... import (x, y, z) */ 2495 | n = CHILD(n, idx + 1); 2496 | n_children = NCH(n); 2497 | break; 2498 | case import_as_names: 2499 | /* from ... import x, y, z */ 2500 | n = CHILD(n, idx); 2501 | n_children = NCH(n); 2502 | if (n_children % 2 == 0) { 2503 | ast_error(n, "trailing comma not allowed without" 2504 | " surrounding parentheses"); 2505 | return NULL; 2506 | } 2507 | break; 2508 | default: 2509 | ast_error(n, "Unexpected node-type in from-import"); 2510 | return NULL; 2511 | } 2512 | 2513 | aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena); 2514 | if (!aliases) 2515 | return NULL; 2516 | 2517 | /* handle "from ... import *" special b/c there's no children */ 2518 | if (TYPE(n) == STAR) { 2519 | alias_ty import_alias = alias_for_import_name(c, n, 1); 2520 | if (!import_alias) 2521 | return NULL; 2522 | asdl_seq_SET(aliases, 0, import_alias); 2523 | } 2524 | else { 2525 | for (i = 0; i < NCH(n); i += 2) { 2526 | alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1); 2527 | if (!import_alias) 2528 | return NULL; 2529 | asdl_seq_SET(aliases, i / 2, import_alias); 2530 | } 2531 | } 2532 | if (mod != NULL) 2533 | modname = mod->name; 2534 | return ImportFrom(modname, aliases, ndots, lineno, col_offset, 2535 | c->c_arena); 2536 | } 2537 | PyErr_Format(PyExc_SystemError, 2538 | "unknown import statement: starts with command '%s'", 2539 | STR(CHILD(n, 0))); 2540 | return NULL; 2541 | } 2542 | 2543 | static stmt_ty 2544 | ast_for_global_stmt(struct compiling *c, const node *n) 2545 | { 2546 | /* global_stmt: 'global' NAME (',' NAME)* */ 2547 | identifier name; 2548 | asdl_seq *s; 2549 | int i; 2550 | 2551 | REQ(n, global_stmt); 2552 | s = asdl_seq_new(NCH(n) / 2, c->c_arena); 2553 | if (!s) 2554 | return NULL; 2555 | for (i = 1; i < NCH(n); i += 2) { 2556 | name = NEW_IDENTIFIER(CHILD(n, i)); 2557 | if (!name) 2558 | return NULL; 2559 | asdl_seq_SET(s, i / 2, name); 2560 | } 2561 | return Global(s, LINENO(n), n->n_col_offset, c->c_arena); 2562 | } 2563 | 2564 | static stmt_ty 2565 | ast_for_nonlocal_stmt(struct compiling *c, const node *n) 2566 | { 2567 | /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */ 2568 | identifier name; 2569 | asdl_seq *s; 2570 | int i; 2571 | 2572 | REQ(n, nonlocal_stmt); 2573 | s = asdl_seq_new(NCH(n) / 2, c->c_arena); 2574 | if (!s) 2575 | return NULL; 2576 | for (i = 1; i < NCH(n); i += 2) { 2577 | name = NEW_IDENTIFIER(CHILD(n, i)); 2578 | if (!name) 2579 | return NULL; 2580 | asdl_seq_SET(s, i / 2, name); 2581 | } 2582 | return Nonlocal(s, LINENO(n), n->n_col_offset, c->c_arena); 2583 | } 2584 | 2585 | static stmt_ty 2586 | ast_for_assert_stmt(struct compiling *c, const node *n) 2587 | { 2588 | /* assert_stmt: 'assert' test [',' test] */ 2589 | REQ(n, assert_stmt); 2590 | if (NCH(n) == 2) { 2591 | expr_ty expression = ast_for_expr(c, CHILD(n, 1)); 2592 | if (!expression) 2593 | return NULL; 2594 | return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena); 2595 | } 2596 | else if (NCH(n) == 4) { 2597 | expr_ty expr1, expr2; 2598 | 2599 | expr1 = ast_for_expr(c, CHILD(n, 1)); 2600 | if (!expr1) 2601 | return NULL; 2602 | expr2 = ast_for_expr(c, CHILD(n, 3)); 2603 | if (!expr2) 2604 | return NULL; 2605 | 2606 | return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena); 2607 | } 2608 | PyErr_Format(PyExc_SystemError, 2609 | "improper number of parts to 'assert' statement: %d", 2610 | NCH(n)); 2611 | return NULL; 2612 | } 2613 | 2614 | static asdl_seq * 2615 | ast_for_suite(struct compiling *c, const node *n) 2616 | { 2617 | /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */ 2618 | asdl_seq *seq; 2619 | stmt_ty s; 2620 | int i, total, num, end, pos = 0; 2621 | node *ch; 2622 | 2623 | REQ(n, suite); 2624 | 2625 | total = num_stmts(n); 2626 | seq = asdl_seq_new(total, c->c_arena); 2627 | if (!seq) 2628 | return NULL; 2629 | if (TYPE(CHILD(n, 0)) == simple_stmt) { 2630 | n = CHILD(n, 0); 2631 | /* simple_stmt always ends with a NEWLINE, 2632 | and may have a trailing SEMI 2633 | */ 2634 | end = NCH(n) - 1; 2635 | if (TYPE(CHILD(n, end - 1)) == SEMI) 2636 | end--; 2637 | /* loop by 2 to skip semi-colons */ 2638 | for (i = 0; i < end; i += 2) { 2639 | ch = CHILD(n, i); 2640 | s = ast_for_stmt(c, ch); 2641 | if (!s) 2642 | return NULL; 2643 | asdl_seq_SET(seq, pos++, s); 2644 | } 2645 | } 2646 | else { 2647 | for (i = 2; i < (NCH(n) - 1); i++) { 2648 | ch = CHILD(n, i); 2649 | REQ(ch, stmt); 2650 | num = num_stmts(ch); 2651 | if (num == 1) { 2652 | /* small_stmt or compound_stmt with only one child */ 2653 | s = ast_for_stmt(c, ch); 2654 | if (!s) 2655 | return NULL; 2656 | asdl_seq_SET(seq, pos++, s); 2657 | } 2658 | else { 2659 | int j; 2660 | ch = CHILD(ch, 0); 2661 | REQ(ch, simple_stmt); 2662 | for (j = 0; j < NCH(ch); j += 2) { 2663 | /* statement terminates with a semi-colon ';' */ 2664 | if (NCH(CHILD(ch, j)) == 0) { 2665 | assert((j + 1) == NCH(ch)); 2666 | break; 2667 | } 2668 | s = ast_for_stmt(c, CHILD(ch, j)); 2669 | if (!s) 2670 | return NULL; 2671 | asdl_seq_SET(seq, pos++, s); 2672 | } 2673 | } 2674 | } 2675 | } 2676 | assert(pos == seq->size); 2677 | return seq; 2678 | } 2679 | 2680 | 2681 | static stmt_ty 2682 | ast_for_if_stmt(struct compiling *c, const node *n) 2683 | { 2684 | /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)* 2685 | ['else' ':' suite] 2686 | */ 2687 | char *s; 2688 | 2689 | REQ(n, if_stmt); 2690 | 2691 | if (NCH(n) == 4) { 2692 | expr_ty expression; 2693 | asdl_seq *suite_seq; 2694 | 2695 | expression = ast_for_expr(c, CHILD(n, 1)); 2696 | if (!expression) 2697 | return NULL; 2698 | suite_seq = ast_for_suite(c, CHILD(n, 3)); 2699 | if (!suite_seq) 2700 | return NULL; 2701 | 2702 | return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, 2703 | c->c_arena); 2704 | } 2705 | 2706 | s = STR(CHILD(n, 4)); 2707 | /* s[2], the third character in the string, will be 2708 | 's' for el_s_e, or 2709 | 'i' for el_i_f 2710 | */ 2711 | if (streq( s, else)) { 2712 | expr_ty expression; 2713 | asdl_seq *seq1, *seq2; 2714 | 2715 | expression = ast_for_expr(c, CHILD(n, 1)); 2716 | if (!expression) 2717 | return NULL; 2718 | seq1 = ast_for_suite(c, CHILD(n, 3)); 2719 | if (!seq1) 2720 | return NULL; 2721 | seq2 = ast_for_suite(c, CHILD(n, 6)); 2722 | if (!seq2) 2723 | return NULL; 2724 | 2725 | return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, 2726 | c->c_arena); 2727 | } 2728 | else if (streq( s, elif)) { 2729 | int i, n_elif, has_else = 0; 2730 | expr_ty expression; 2731 | asdl_seq *suite_seq; 2732 | asdl_seq *orelse = NULL; 2733 | n_elif = NCH(n) - 4; 2734 | /* must reference the child n_elif+1 since 'else' token is third, 2735 | not fourth, child from the end. */ 2736 | if (TYPE(CHILD(n, (n_elif + 1))) == NAME 2737 | && streq( STR(CHILD(n, (n_elif + 1))), else)) { 2738 | has_else = 1; 2739 | n_elif -= 3; 2740 | } 2741 | n_elif /= 4; 2742 | 2743 | if (has_else) { 2744 | asdl_seq *suite_seq2; 2745 | 2746 | orelse = asdl_seq_new(1, c->c_arena); 2747 | if (!orelse) 2748 | return NULL; 2749 | expression = ast_for_expr(c, CHILD(n, NCH(n) - 6)); 2750 | if (!expression) 2751 | return NULL; 2752 | suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4)); 2753 | if (!suite_seq) 2754 | return NULL; 2755 | suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1)); 2756 | if (!suite_seq2) 2757 | return NULL; 2758 | 2759 | asdl_seq_SET(orelse, 0, 2760 | If(expression, suite_seq, suite_seq2, 2761 | LINENO(CHILD(n, NCH(n) - 6)), 2762 | CHILD(n, NCH(n) - 6)->n_col_offset, 2763 | c->c_arena)); 2764 | /* the just-created orelse handled the last elif */ 2765 | n_elif--; 2766 | } 2767 | 2768 | for (i = 0; i < n_elif; i++) { 2769 | int off = 5 + (n_elif - i - 1) * 4; 2770 | asdl_seq *newobj = asdl_seq_new(1, c->c_arena); 2771 | if (!newobj) 2772 | return NULL; 2773 | expression = ast_for_expr(c, CHILD(n, off)); 2774 | if (!expression) 2775 | return NULL; 2776 | suite_seq = ast_for_suite(c, CHILD(n, off + 2)); 2777 | if (!suite_seq) 2778 | return NULL; 2779 | 2780 | asdl_seq_SET(newobj, 0, 2781 | If(expression, suite_seq, orelse, 2782 | LINENO(CHILD(n, off)), 2783 | CHILD(n, off)->n_col_offset, c->c_arena)); 2784 | orelse = newobj; 2785 | } 2786 | expression = ast_for_expr(c, CHILD(n, 1)); 2787 | if (!expression) 2788 | return NULL; 2789 | suite_seq = ast_for_suite(c, CHILD(n, 3)); 2790 | if (!suite_seq) 2791 | return NULL; 2792 | return If(expression, suite_seq, orelse, 2793 | LINENO(n), n->n_col_offset, c->c_arena); 2794 | } 2795 | 2796 | PyErr_Format(PyExc_SystemError, 2797 | "unexpected token in 'if' statement: %s", s); 2798 | return NULL; 2799 | } 2800 | 2801 | static stmt_ty 2802 | ast_for_while_stmt(struct compiling *c, const node *n) 2803 | { 2804 | /* while_stmt: 'while' test ':' suite ['else' ':' suite] */ 2805 | REQ(n, while_stmt); 2806 | 2807 | if (NCH(n) == 4) { 2808 | expr_ty expression; 2809 | asdl_seq *suite_seq; 2810 | 2811 | expression = ast_for_expr(c, CHILD(n, 1)); 2812 | if (!expression) 2813 | return NULL; 2814 | suite_seq = ast_for_suite(c, CHILD(n, 3)); 2815 | if (!suite_seq) 2816 | return NULL; 2817 | return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena); 2818 | } 2819 | else if (NCH(n) == 7) { 2820 | expr_ty expression; 2821 | asdl_seq *seq1, *seq2; 2822 | 2823 | expression = ast_for_expr(c, CHILD(n, 1)); 2824 | if (!expression) 2825 | return NULL; 2826 | seq1 = ast_for_suite(c, CHILD(n, 3)); 2827 | if (!seq1) 2828 | return NULL; 2829 | seq2 = ast_for_suite(c, CHILD(n, 6)); 2830 | if (!seq2) 2831 | return NULL; 2832 | 2833 | return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena); 2834 | } 2835 | 2836 | PyErr_Format(PyExc_SystemError, 2837 | "wrong number of tokens for 'while' statement: %d", 2838 | NCH(n)); 2839 | return NULL; 2840 | } 2841 | 2842 | static stmt_ty 2843 | ast_for_for_stmt(struct compiling *c, const node *n) 2844 | { 2845 | asdl_seq *_target, *seq = NULL, *suite_seq; 2846 | expr_ty expression; 2847 | expr_ty target, first; 2848 | const node *node_target; 2849 | /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */ 2850 | REQ(n, for_stmt); 2851 | 2852 | if (NCH(n) == 9) { 2853 | seq = ast_for_suite(c, CHILD(n, 8)); 2854 | if (!seq) 2855 | return NULL; 2856 | } 2857 | 2858 | node_target = CHILD(n, 1); 2859 | _target = ast_for_exprlist(c, node_target, Store); 2860 | if (!_target) 2861 | return NULL; 2862 | /* Check the # of children rather than the length of _target, since 2863 | for x, in ... has 1 element in _target, but still requires a Tuple. */ 2864 | first = (expr_ty)asdl_seq_GET(_target, 0); 2865 | if (NCH(node_target) == 1) 2866 | target = first; 2867 | else 2868 | target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena); 2869 | 2870 | expression = ast_for_testlist(c, CHILD(n, 3)); 2871 | if (!expression) 2872 | return NULL; 2873 | suite_seq = ast_for_suite(c, CHILD(n, 5)); 2874 | if (!suite_seq) 2875 | return NULL; 2876 | 2877 | return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset, 2878 | c->c_arena); 2879 | } 2880 | 2881 | static excepthandler_ty 2882 | ast_for_except_clause(struct compiling *c, const node *exc, node *body) 2883 | { 2884 | /* except_clause: 'except' [test ['as' test]] */ 2885 | REQ(exc, except_clause); 2886 | REQ(body, suite); 2887 | 2888 | if (NCH(exc) == 1) { 2889 | asdl_seq *suite_seq = ast_for_suite(c, body); 2890 | if (!suite_seq) 2891 | return NULL; 2892 | 2893 | return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc), 2894 | exc->n_col_offset, c->c_arena); 2895 | } 2896 | else if (NCH(exc) == 2) { 2897 | expr_ty expression; 2898 | asdl_seq *suite_seq; 2899 | 2900 | expression = ast_for_expr(c, CHILD(exc, 1)); 2901 | if (!expression) 2902 | return NULL; 2903 | suite_seq = ast_for_suite(c, body); 2904 | if (!suite_seq) 2905 | return NULL; 2906 | 2907 | return ExceptHandler(expression, NULL, suite_seq, LINENO(exc), 2908 | exc->n_col_offset, c->c_arena); 2909 | } 2910 | else if (NCH(exc) == 4) { 2911 | asdl_seq *suite_seq; 2912 | expr_ty expression; 2913 | identifier e = NEW_IDENTIFIER(CHILD(exc, 3)); 2914 | if (!e) 2915 | return NULL; 2916 | if (forbidden_name(e, CHILD(exc, 3), 0)) 2917 | return NULL; 2918 | expression = ast_for_expr(c, CHILD(exc, 1)); 2919 | if (!expression) 2920 | return NULL; 2921 | suite_seq = ast_for_suite(c, body); 2922 | if (!suite_seq) 2923 | return NULL; 2924 | 2925 | return ExceptHandler(expression, e, suite_seq, LINENO(exc), 2926 | exc->n_col_offset, c->c_arena); 2927 | } 2928 | 2929 | PyErr_Format(PyExc_SystemError, 2930 | "wrong number of children for 'except' clause: %d", 2931 | NCH(exc)); 2932 | return NULL; 2933 | } 2934 | 2935 | static stmt_ty 2936 | ast_for_try_stmt(struct compiling *c, const node *n) 2937 | { 2938 | const int nch = NCH(n); 2939 | int n_except = (nch - 3)/3; 2940 | asdl_seq *body, *orelse = NULL, *finally = NULL; 2941 | 2942 | REQ(n, try_stmt); 2943 | 2944 | body = ast_for_suite(c, CHILD(n, 2)); 2945 | if (body == NULL) 2946 | return NULL; 2947 | 2948 | if (TYPE(CHILD(n, nch - 3)) == NAME) { 2949 | if (streq(STR(CHILD(n, nch - 3)), finally)) { 2950 | if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) { 2951 | /* we can assume it's an "else", 2952 | because nch >= 9 for try-else-finally and 2953 | it would otherwise have a type of except_clause */ 2954 | orelse = ast_for_suite(c, CHILD(n, nch - 4)); 2955 | if (orelse == NULL) 2956 | return NULL; 2957 | n_except--; 2958 | } 2959 | 2960 | finally = ast_for_suite(c, CHILD(n, nch - 1)); 2961 | if (finally == NULL) 2962 | return NULL; 2963 | n_except--; 2964 | } 2965 | else { 2966 | /* we can assume it's an "else", 2967 | otherwise it would have a type of except_clause */ 2968 | orelse = ast_for_suite(c, CHILD(n, nch - 1)); 2969 | if (orelse == NULL) 2970 | return NULL; 2971 | n_except--; 2972 | } 2973 | } 2974 | else if (TYPE(CHILD(n, nch - 3)) != except_clause) { 2975 | ast_error(n, "malformed 'try' statement"); 2976 | return NULL; 2977 | } 2978 | 2979 | if (n_except > 0) { 2980 | int i; 2981 | stmt_ty except_st; 2982 | /* process except statements to create a try ... except */ 2983 | asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena); 2984 | if (handlers == NULL) 2985 | return NULL; 2986 | 2987 | for (i = 0; i < n_except; i++) { 2988 | excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3), 2989 | CHILD(n, 5 + i * 3)); 2990 | if (!e) 2991 | return NULL; 2992 | asdl_seq_SET(handlers, i, e); 2993 | } 2994 | 2995 | except_st = TryExcept(body, handlers, orelse, LINENO(n), 2996 | n->n_col_offset, c->c_arena); 2997 | if (!finally) 2998 | return except_st; 2999 | 3000 | /* if a 'finally' is present too, we nest the TryExcept within a 3001 | TryFinally to emulate try ... except ... finally */ 3002 | body = asdl_seq_new(1, c->c_arena); 3003 | if (body == NULL) 3004 | return NULL; 3005 | asdl_seq_SET(body, 0, except_st); 3006 | } 3007 | 3008 | /* must be a try ... finally (except clauses are in body, if any exist) */ 3009 | assert(finally != NULL); 3010 | return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena); 3011 | } 3012 | 3013 | /* with_item: test ['as' expr] */ 3014 | static stmt_ty 3015 | ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content) 3016 | { 3017 | expr_ty context_expr, optional_vars = NULL; 3018 | 3019 | REQ(n, with_item); 3020 | context_expr = ast_for_expr(c, CHILD(n, 0)); 3021 | if (!context_expr) 3022 | return NULL; 3023 | if (NCH(n) == 3) { 3024 | optional_vars = ast_for_expr(c, CHILD(n, 2)); 3025 | 3026 | if (!optional_vars) { 3027 | return NULL; 3028 | } 3029 | if (!set_context(c, optional_vars, Store, n)) { 3030 | return NULL; 3031 | } 3032 | } 3033 | 3034 | return With(context_expr, optional_vars, content, LINENO(n), 3035 | n->n_col_offset, c->c_arena); 3036 | } 3037 | 3038 | /* with_stmt: 'with' with_item (',' with_item)* ':' suite */ 3039 | static stmt_ty 3040 | ast_for_with_stmt(struct compiling *c, const node *n) 3041 | { 3042 | int i; 3043 | stmt_ty ret; 3044 | asdl_seq *inner; 3045 | 3046 | REQ(n, with_stmt); 3047 | 3048 | /* process the with items inside-out */ 3049 | i = NCH(n) - 1; 3050 | /* the suite of the innermost with item is the suite of the with stmt */ 3051 | inner = ast_for_suite(c, CHILD(n, i)); 3052 | if (!inner) 3053 | return NULL; 3054 | 3055 | for (;;) { 3056 | i -= 2; 3057 | ret = ast_for_with_item(c, CHILD(n, i), inner); 3058 | if (!ret) 3059 | return NULL; 3060 | /* was this the last item? */ 3061 | if (i == 1) 3062 | break; 3063 | /* if not, wrap the result so far in a new sequence */ 3064 | inner = asdl_seq_new(1, c->c_arena); 3065 | if (!inner) 3066 | return NULL; 3067 | asdl_seq_SET(inner, 0, ret); 3068 | } 3069 | 3070 | return ret; 3071 | } 3072 | 3073 | static stmt_ty 3074 | ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq) 3075 | { 3076 | /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */ 3077 | PyObject *classname; 3078 | asdl_seq *s; 3079 | expr_ty call; 3080 | 3081 | REQ(n, classdef); 3082 | 3083 | if (NCH(n) == 4) { /* class NAME ':' suite */ 3084 | s = ast_for_suite(c, CHILD(n, 3)); 3085 | if (!s) 3086 | return NULL; 3087 | classname = NEW_IDENTIFIER(CHILD(n, 1)); 3088 | if (!classname) 3089 | return NULL; 3090 | if (forbidden_name(classname, CHILD(n, 3), 0)) 3091 | return NULL; 3092 | return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq, 3093 | LINENO(n), n->n_col_offset, c->c_arena); 3094 | } 3095 | 3096 | if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */ 3097 | s = ast_for_suite(c, CHILD(n,5)); 3098 | if (!s) 3099 | return NULL; 3100 | classname = NEW_IDENTIFIER(CHILD(n, 1)); 3101 | if (!classname) 3102 | return NULL; 3103 | if (forbidden_name(classname, CHILD(n, 3), 0)) 3104 | return NULL; 3105 | return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq, 3106 | LINENO(n), n->n_col_offset, c->c_arena); 3107 | } 3108 | 3109 | /* class NAME '(' arglist ')' ':' suite */ 3110 | /* build up a fake Call node so we can extract its pieces */ 3111 | { 3112 | PyObject *dummy_name; 3113 | expr_ty dummy; 3114 | dummy_name = NEW_IDENTIFIER(CHILD(n, 1)); 3115 | if (!dummy_name) 3116 | return NULL; 3117 | dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, c->c_arena); 3118 | call = ast_for_call(c, CHILD(n, 3), dummy); 3119 | if (!call) 3120 | return NULL; 3121 | } 3122 | s = ast_for_suite(c, CHILD(n, 6)); 3123 | if (!s) 3124 | return NULL; 3125 | classname = NEW_IDENTIFIER(CHILD(n, 1)); 3126 | if (!classname) 3127 | return NULL; 3128 | if (forbidden_name(classname, CHILD(n, 1), 0)) 3129 | return NULL; 3130 | 3131 | return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, 3132 | call->v.Call.starargs, call->v.Call.kwargs, s, 3133 | decorator_seq, LINENO(n), n->n_col_offset, c->c_arena); 3134 | } 3135 | 3136 | static stmt_ty 3137 | ast_for_stmt(struct compiling *c, const node *n) 3138 | { 3139 | if (TYPE(n) == stmt) { 3140 | assert(NCH(n) == 1); 3141 | n = CHILD(n, 0); 3142 | } 3143 | if (TYPE(n) == simple_stmt) { 3144 | assert(num_stmts(n) == 1); 3145 | n = CHILD(n, 0); 3146 | } 3147 | if (TYPE(n) == small_stmt) { 3148 | n = CHILD(n, 0); 3149 | /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt 3150 | | import_stmt | global_stmt | nonlocal_stmt | assert_stmt 3151 | */ 3152 | switch (TYPE(n)) { 3153 | case expr_stmt: 3154 | return ast_for_expr_stmt(c, n); 3155 | case del_stmt: 3156 | return ast_for_del_stmt(c, n); 3157 | case pass_stmt: 3158 | return Pass(LINENO(n), n->n_col_offset, c->c_arena); 3159 | case flow_stmt: 3160 | return ast_for_flow_stmt(c, n); 3161 | case import_stmt: 3162 | return ast_for_import_stmt(c, n); 3163 | case global_stmt: 3164 | return ast_for_global_stmt(c, n); 3165 | case nonlocal_stmt: 3166 | return ast_for_nonlocal_stmt(c, n); 3167 | case assert_stmt: 3168 | return ast_for_assert_stmt(c, n); 3169 | default: 3170 | PyErr_Format(PyExc_SystemError, 3171 | "unhandled small_stmt: TYPE=%d NCH=%d\n", 3172 | TYPE(n), NCH(n)); 3173 | return NULL; 3174 | } 3175 | } 3176 | else { 3177 | /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt 3178 | | funcdef | classdef | decorated 3179 | */ 3180 | node *ch = CHILD(n, 0); 3181 | REQ(n, compound_stmt); 3182 | switch (TYPE(ch)) { 3183 | case if_stmt: 3184 | return ast_for_if_stmt(c, ch); 3185 | case while_stmt: 3186 | return ast_for_while_stmt(c, ch); 3187 | case for_stmt: 3188 | return ast_for_for_stmt(c, ch); 3189 | case try_stmt: 3190 | return ast_for_try_stmt(c, ch); 3191 | case with_stmt: 3192 | return ast_for_with_stmt(c, ch); 3193 | case funcdef: 3194 | return ast_for_funcdef(c, ch, NULL); 3195 | case classdef: 3196 | return ast_for_classdef(c, ch, NULL); 3197 | case decorated: 3198 | return ast_for_decorated(c, ch); 3199 | default: 3200 | PyErr_Format(PyExc_SystemError, 3201 | "unhandled small_stmt: TYPE=%d NCH=%d\n", 3202 | TYPE(n), NCH(n)); 3203 | return NULL; 3204 | } 3205 | } 3206 | } 3207 | 3208 | static PyObject * 3209 | parsenumber(struct compiling *c, const char *s) 3210 | { 3211 | const char *end; 3212 | long x; 3213 | double dx; 3214 | Py_complex compl; 3215 | int imflag; 3216 | 3217 | assert(s != NULL); 3218 | errno = 0; 3219 | end = s + strlen(s) - 1; 3220 | imflag = *end == 'j' || *end == 'J'; 3221 | if (s[0] == '0') { 3222 | x = (long) PyOS_strtoul((char *)s, (char **)&end, 0); 3223 | if (x < 0 && errno == 0) { 3224 | return PyLong_FromString((char *)s, 3225 | (char **)0, 3226 | 0); 3227 | } 3228 | } 3229 | else 3230 | x = PyOS_strtol((char *)s, (char **)&end, 0); 3231 | if (*end == '\0') { 3232 | if (errno != 0) 3233 | return PyLong_FromString((char *)s, (char **)0, 0); 3234 | return PyLong_FromLong(x); 3235 | } 3236 | /* XXX Huge floats may silently fail */ 3237 | if (imflag) { 3238 | compl.real = 0.; 3239 | compl.imag = PyOS_string_to_double(s, (char **)&end, NULL); 3240 | if (compl.imag == -1.0 && PyErr_Occurred()) 3241 | return NULL; 3242 | return PyComplex_FromCComplex(compl); 3243 | } 3244 | else 3245 | { 3246 | dx = PyOS_string_to_double(s, NULL, NULL); 3247 | if (dx == -1.0 && PyErr_Occurred()) 3248 | return NULL; 3249 | return PyFloat_FromDouble(dx); 3250 | } 3251 | } 3252 | 3253 | static PyObject * 3254 | decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding) 3255 | { 3256 | PyObject *u, *v; 3257 | char *s, *t; 3258 | t = s = (char *)*sPtr; 3259 | /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */ 3260 | while (s < end && (*s & 0x80)) s++; 3261 | *sPtr = s; 3262 | u = PyUnicode_DecodeUTF8(t, s - t, NULL); 3263 | if (u == NULL) 3264 | return NULL; 3265 | v = PyUnicode_AsEncodedString(u, encoding, NULL); 3266 | Py_DECREF(u); 3267 | return v; 3268 | } 3269 | 3270 | static PyObject * 3271 | decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding) 3272 | { 3273 | PyObject *v, *u; 3274 | char *buf; 3275 | char *p; 3276 | const char *end; 3277 | 3278 | if (encoding == NULL) { 3279 | buf = (char *)s; 3280 | u = NULL; 3281 | } else { 3282 | /* check for integer overflow */ 3283 | if (len > PY_SIZE_MAX / 6) 3284 | return NULL; 3285 | /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5 3286 | "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */ 3287 | u = PyBytes_FromStringAndSize((char *)NULL, len * 6); 3288 | if (u == NULL) 3289 | return NULL; 3290 | p = buf = PyBytes_AsString(u); 3291 | end = s + len; 3292 | while (s < end) { 3293 | if (*s == '\\') { 3294 | *p++ = *s++; 3295 | if (*s & 0x80) { 3296 | strcpy(p, "u005c"); 3297 | p += 5; 3298 | } 3299 | } 3300 | if (*s & 0x80) { /* XXX inefficient */ 3301 | PyObject *w; 3302 | char *r; 3303 | Py_ssize_t rn, i; 3304 | w = decode_utf8(c, &s, end, "utf-32-be"); 3305 | if (w == NULL) { 3306 | Py_DECREF(u); 3307 | return NULL; 3308 | } 3309 | r = PyBytes_AS_STRING(w); 3310 | rn = Py_SIZE(w); 3311 | assert(rn % 4 == 0); 3312 | for (i = 0; i < rn; i += 4) { 3313 | sprintf(p, "\\U%02x%02x%02x%02x", 3314 | r[i + 0] & 0xFF, 3315 | r[i + 1] & 0xFF, 3316 | r[i + 2] & 0xFF, 3317 | r[i + 3] & 0xFF); 3318 | p += 10; 3319 | } 3320 | /* Should be impossible to overflow */ 3321 | assert(p - buf <= Py_SIZE(u)); 3322 | Py_DECREF(w); 3323 | } else { 3324 | *p++ = *s++; 3325 | } 3326 | } 3327 | len = p - buf; 3328 | s = buf; 3329 | } 3330 | if (rawmode) 3331 | v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL); 3332 | else 3333 | v = PyUnicode_DecodeUnicodeEscape(s, len, NULL); 3334 | Py_XDECREF(u); 3335 | return v; 3336 | } 3337 | 3338 | /* s is a Python string literal, including the bracketing quote characters, 3339 | * and r &/or b prefixes (if any), and embedded escape sequences (if any). 3340 | * parsestr parses it, and returns the decoded Python string object. 3341 | */ 3342 | static PyObject * 3343 | parsestr(struct compiling *c, const node *n, int *bytesmode) 3344 | { 3345 | size_t len; 3346 | const char *s = STR(n); 3347 | int quote = Py_CHARMASK(*s); 3348 | int rawmode = 0; 3349 | int need_encoding; 3350 | if (isalpha(quote)) { 3351 | if (quote == 'b' || quote == 'B') { 3352 | quote = *++s; 3353 | *bytesmode = 1; 3354 | } 3355 | if (quote == 'r' || quote == 'R') { 3356 | quote = *++s; 3357 | rawmode = 1; 3358 | } 3359 | } 3360 | if (quote != '\'' && quote != '\"') { 3361 | PyErr_BadInternalCall(); 3362 | return NULL; 3363 | } 3364 | s++; 3365 | len = strlen(s); 3366 | if (len > INT_MAX) { 3367 | PyErr_SetString(PyExc_OverflowError, 3368 | "string to parse is too long"); 3369 | return NULL; 3370 | } 3371 | if (s[--len] != quote) { 3372 | PyErr_BadInternalCall(); 3373 | return NULL; 3374 | } 3375 | if (len >= 4 && s[0] == quote && s[1] == quote) { 3376 | s += 2; 3377 | len -= 2; 3378 | if (s[--len] != quote || s[--len] != quote) { 3379 | PyErr_BadInternalCall(); 3380 | return NULL; 3381 | } 3382 | } 3383 | if (!*bytesmode && !rawmode) { 3384 | return decode_unicode(c, s, len, rawmode, c->c_encoding); 3385 | } 3386 | if (*bytesmode) { 3387 | /* Disallow non-ascii characters (but not escapes) */ 3388 | const char *c; 3389 | for (c = s; *c; c++) { 3390 | if (Py_CHARMASK(*c) >= 0x80) { 3391 | ast_error(n, "bytes can only contain ASCII " 3392 | "literal characters."); 3393 | return NULL; 3394 | } 3395 | } 3396 | } 3397 | need_encoding = (!*bytesmode && c->c_encoding != NULL && 3398 | strcmp(c->c_encoding, "utf-8") != 0); 3399 | if (rawmode || strchr(s, '\\') == NULL) { 3400 | if (need_encoding) { 3401 | PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL); 3402 | if (u == NULL || !*bytesmode) 3403 | return u; 3404 | v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL); 3405 | Py_DECREF(u); 3406 | return v; 3407 | } else if (*bytesmode) { 3408 | return PyBytes_FromStringAndSize(s, len); 3409 | } else if (strcmp(c->c_encoding, "utf-8") == 0) { 3410 | return PyUnicode_FromStringAndSize(s, len); 3411 | } else { 3412 | return PyUnicode_DecodeLatin1(s, len, NULL); 3413 | } 3414 | } 3415 | return PyBytes_DecodeEscape(s, len, NULL, 1, 3416 | need_encoding ? c->c_encoding : NULL); 3417 | } 3418 | 3419 | /* Build a Python string object out of a STRING+ atom. This takes care of 3420 | * compile-time literal catenation, calling parsestr() on each piece, and 3421 | * pasting the intermediate results together. 3422 | */ 3423 | static PyObject * 3424 | parsestrplus(struct compiling *c, const node *n, int *bytesmode) 3425 | { 3426 | PyObject *v; 3427 | int i; 3428 | REQ(CHILD(n, 0), STRING); 3429 | v = parsestr(c, CHILD(n, 0), bytesmode); 3430 | if (v != NULL) { 3431 | /* String literal concatenation */ 3432 | for (i = 1; i < NCH(n); i++) { 3433 | PyObject *s; 3434 | int subbm = 0; 3435 | s = parsestr(c, CHILD(n, i), &subbm); 3436 | if (s == NULL) 3437 | goto onError; 3438 | if (*bytesmode != subbm) { 3439 | ast_error(n, "cannot mix bytes and nonbytes literals"); 3440 | goto onError; 3441 | } 3442 | if (PyBytes_Check(v) && PyBytes_Check(s)) { 3443 | PyBytes_ConcatAndDel(&v, s); 3444 | if (v == NULL) 3445 | goto onError; 3446 | } 3447 | else { 3448 | PyObject *temp = PyUnicode_Concat(v, s); 3449 | Py_DECREF(s); 3450 | Py_DECREF(v); 3451 | v = temp; 3452 | if (v == NULL) 3453 | goto onError; 3454 | } 3455 | } 3456 | } 3457 | return v; 3458 | 3459 | onError: 3460 | Py_XDECREF(v); 3461 | return NULL; 3462 | } 3463 | --------------------------------------------------------------------------------