├── typed_ast ├── __init__.py └── conversions.py ├── .gitignore ├── CONTRIBUTING.md ├── ast27 ├── Include │ ├── ast.h │ ├── compile.h │ ├── pgenheaders.h │ ├── pyarena.h │ ├── bitset.h │ ├── node.h │ ├── asdl.h │ ├── errcode.h │ ├── parsetok.h │ ├── token.h │ ├── graminit.h │ ├── grammar.h │ └── pycore_pyarena.h ├── Parser │ ├── parser.h │ ├── bitset.c │ ├── grammar1.c │ ├── tokenizer.h │ ├── acceler.c │ ├── node.c │ ├── Python.asdl │ ├── grammar.c │ └── asdl.py ├── Python │ ├── asdl.c │ └── mystrtoul.c ├── Grammar │ └── Grammar └── Custom │ └── typed_ast.c ├── tools ├── update_ast27_asdl ├── update_ast3_asdl ├── update_ast3_grammar ├── update_header_guards ├── find_exported_symbols ├── update_exported_symbols ├── token.patch ├── script ├── tokenizer.patch ├── Python-asdl.patch ├── parsetok.patch ├── Grammar.patch └── asdl_c.patch ├── ast3 ├── Include │ ├── pgenheaders.h │ ├── pyarena.h │ ├── ast.h │ ├── bitset.h │ ├── node.h │ ├── asdl.h │ ├── errcode.h │ ├── graminit.h │ ├── grammar.h │ ├── pycore_pyarena.h │ ├── token.h │ └── parsetok.h ├── Parser │ ├── bitset.c │ ├── parser.h │ ├── grammar1.c │ ├── tokenizer.h │ ├── acceler.c │ ├── node.c │ ├── Python.asdl │ └── grammar.c ├── Python │ └── asdl.c ├── Grammar │ └── Grammar ├── tests │ └── test_basics.py └── Custom │ └── typed_ast.c ├── MANIFEST.in ├── .gitattributes ├── release_process.md ├── .github └── workflows │ └── build.yml ├── README.md └── setup.py /typed_ast/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.5.5" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.pyc 3 | /build/ 4 | __pycache__/ 5 | .DS_Store 6 | /tools/pgen3 7 | /.pytest_cache/ 8 | /typed_ast.egg-info/ 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | To contribute code to this project, you'll need to sign the [Python Software Foundation's Contributor License Agreement](https://www.python.org/psf/contrib/contrib-form/). 2 | -------------------------------------------------------------------------------- /ast27/Include/ast.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta27_AST_H 2 | #define Ta27_AST_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | mod_ty Ta27AST_FromNode(const node *, PyCompilerFlags *flags, 8 | const char *, PyArena *); 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | #endif /* !Ta27_AST_H */ 14 | -------------------------------------------------------------------------------- /tools/update_ast27_asdl: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eux 2 | 3 | # Run after changing `Parser/Python.asdl` 4 | 5 | PROJ_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." 6 | 7 | python2 ast27/Parser/asdl_c.py -h ast27/Include/ ast27/Parser/Python.asdl 8 | python2 ast27/Parser/asdl_c.py -c ast27/Python/ ast27/Parser/Python.asdl 9 | -------------------------------------------------------------------------------- /tools/update_ast3_asdl: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eux 2 | 3 | # Run after changing `Parser/Python.asdl` 4 | 5 | PROJ_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." 6 | 7 | python3 ast3/Parser/asdl_c.py -h ast3/Include/Python-ast.h ast3/Parser/Python.asdl 8 | python3 ast3/Parser/asdl_c.py -c ast3/Python/Python-ast.c ast3/Parser/Python.asdl 9 | -------------------------------------------------------------------------------- /ast27/Include/compile.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef Ta27_COMPILE_H 3 | #define Ta27_COMPILE_H 4 | 5 | #include "Python.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | /* Public interface */ 12 | PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); 13 | 14 | 15 | #ifdef __cplusplus 16 | } 17 | #endif 18 | #endif /* !Ta27_COMPILE_H */ 19 | -------------------------------------------------------------------------------- /ast27/Include/pgenheaders.h: -------------------------------------------------------------------------------- 1 | #ifndef DUMMY_Py_PGENHEADERS_H 2 | #define DUMMY_Py_PGENHEADERS_H 3 | 4 | /* pgenheaders.h is included by a bunch of files but nothing in it is 5 | * used except for the Python.h import, and it was removed in Python 6 | * 3.8. Since some of those files are generated we provide a dummy 7 | * pgenheaders.h. */ 8 | #include "Python.h" 9 | 10 | #endif /* !DUMMY_Py_PGENHEADERS_H */ 11 | -------------------------------------------------------------------------------- /ast3/Include/pgenheaders.h: -------------------------------------------------------------------------------- 1 | #ifndef DUMMY_Py_PGENHEADERS_H 2 | #define DUMMY_Py_PGENHEADERS_H 3 | 4 | /* pgenheaders.h is included by a bunch of files but nothing in it is 5 | * used except for the Python.h import, and it was removed in Python 6 | * 3.8. Since some of those files are generated we provide a dummy 7 | * pgenheaders.h. */ 8 | #include "Python.h" 9 | 10 | #endif /* !DUMMY_Py_PGENHEADERS_H */ 11 | -------------------------------------------------------------------------------- /tools/update_ast3_grammar: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eux 2 | 3 | # Run after changing `Grammar/Grammar` 4 | 5 | PROJ_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." 6 | 7 | echo 'Copying pgen' 8 | cp ~/src/cpython37/Parser/pgen tools/pgen3 9 | 10 | echo 'Updating graminit files' 11 | tools/pgen3 ast3/Grammar/Grammar ast3/Include/graminit.h ast3/Python/graminit.c 12 | 13 | echo 'Grammar file update complete' 14 | -------------------------------------------------------------------------------- /tools/update_header_guards: -------------------------------------------------------------------------------- 1 | #!/bin/bash -eux 2 | 3 | # usage: ./update_header_guards VERSION_NUMBER 4 | 5 | PROJ_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." 6 | 7 | # only works on OS X due to silly sed incompatibility 8 | FOLDER="ast$1" 9 | PATTERN='s/Py\([A-Z_]*_H\( \*\/\)\{0,1\}\)$/' 10 | PATTERN+="Ta$1" 11 | PATTERN+='\1/' 12 | find "$FOLDER" -type f -name '*.h' | xargs -n 1 sed -i '' "$PATTERN" 13 | -------------------------------------------------------------------------------- /tools/find_exported_symbols: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PROJ_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." 3 | 4 | # This requires GNU binutils (e.g. brew install binutils). 5 | 6 | /usr/local/opt/binutils/bin/gobjdump -t $PROJ_DIR/build/lib*/_ast${1}.*.so \ 7 | | grep ' g ' \ 8 | | grep -v UND \ 9 | | sed 's/.* _//' \ 10 | | grep -v PyInit__ast \ 11 | | grep 'Py' \ 12 | > "exported_symbols${1}.txt" 13 | -------------------------------------------------------------------------------- /ast27/Include/pyarena.h: -------------------------------------------------------------------------------- 1 | /* An arena-like memory interface for the compiler. 2 | */ 3 | 4 | #ifndef Ta27_PYARENA_H 5 | #define Ta27_PYARENA_H 6 | 7 | #if PY_MINOR_VERSION >= 10 8 | #include "../Include/pycore_pyarena.h" 9 | 10 | #define PyArena_New _PyArena_New 11 | #define PyArena_Free _PyArena_Free 12 | #define PyArena_Malloc _PyArena_Malloc 13 | #define PyArena_AddPyObject _PyArena_AddPyObject 14 | #endif 15 | 16 | #endif /* !Ta27_PYARENA_H */ 17 | -------------------------------------------------------------------------------- /ast3/Include/pyarena.h: -------------------------------------------------------------------------------- 1 | /* An arena-like memory interface for the compiler. 2 | */ 3 | 4 | #ifndef Ta3_PYARENA_H 5 | #define Ta3_PYARENA_H 6 | 7 | #if PY_MINOR_VERSION >= 10 8 | #include "../Include/pycore_pyarena.h" 9 | 10 | #define PyArena_New _PyArena_New 11 | #define PyArena_Free _PyArena_Free 12 | #define PyArena_Malloc _PyArena_Malloc 13 | #define PyArena_AddPyObject _PyArena_AddPyObject 14 | #endif 15 | 16 | #endif /* !Ta3_PYARENA_H */ 17 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include ast27/Grammar/Grammar 2 | include ast27/Parser/Python.asdl 3 | recursive-include ast27 *.h 4 | recursive-include ast27 *.py 5 | 6 | include ast3/Grammar/Grammar 7 | include ast3/Parser/Python.asdl 8 | recursive-include ast3 *.h 9 | recursive-include ast3 *.py 10 | 11 | recursive-include ast3/tests *.py 12 | include LICENSE 13 | 14 | prune tools 15 | exclude CONTRIBUTING.md 16 | exclude release_process.md 17 | exclude update_process.md 18 | -------------------------------------------------------------------------------- /tools/update_exported_symbols: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | PROJ_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." 3 | 4 | for CHANGE in $( cat "$PROJ_DIR/exported_symbols${1}.txt" ); do 5 | if [[ ${CHANGE:0:1} == "_" ]] ; then 6 | NEW="_Ta${1}${CHANGE:3}" 7 | else 8 | NEW="Ta${1}${CHANGE:2}" 9 | fi 10 | find "$PROJ_DIR/ast${1}" -type f -name '*.h' -or -name '*.c' | xargs -n 1 sed -i '' "s/$CHANGE/$NEW/" 11 | done 12 | 13 | echo "Symbols updated. Remember to also update autogeneration code like Parser/asdl_c.py." 14 | -------------------------------------------------------------------------------- /tools/token.patch: -------------------------------------------------------------------------------- 1 | diff --git a/ast3/Include/token.h b/ast3/Include/token.h 2 | index a657fdd..d0b2b94 100644 3 | --- a/ast3/Include/token.h 4 | +++ b/ast3/Include/token.h 5 | @@ -68,8 +68,10 @@ extern "C" { 6 | /* These aren't used by the C tokenizer but are needed for tokenize.py */ 7 | #define COMMENT 55 8 | #define NL 56 9 | -#define ENCODING 57 10 | -#define N_TOKENS 58 11 | +#define ENCODING 57 12 | +#define TYPE_IGNORE 58 13 | +#define TYPE_COMMENT 59 14 | +#define N_TOKENS 60 15 | 16 | /* Special definitions for cooperation with parser */ 17 | 18 | -------------------------------------------------------------------------------- /ast3/Include/ast.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta3_AST_H 2 | #define Ta3_AST_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern int Ta3AST_Validate(mod_ty); 8 | extern mod_ty Ta3AST_FromNode( 9 | const node *n, 10 | PyCompilerFlags *flags, 11 | const char *filename, /* decoded from the filesystem encoding */ 12 | int feature_version, 13 | PyArena *arena); 14 | extern mod_ty Ta3AST_FromNodeObject( 15 | const node *n, 16 | PyCompilerFlags *flags, 17 | PyObject *filename, 18 | int feature_version, 19 | PyArena *arena); 20 | 21 | #ifndef Py_LIMITED_API 22 | 23 | /* _PyAST_ExprAsUnicode is defined in ast_unparse.c */ 24 | extern PyObject * _PyAST_ExprAsUnicode(expr_ty); 25 | 26 | #endif /* !Py_LIMITED_API */ 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | #endif /* !Ta3_AST_H */ 32 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Generated files 2 | # https://github.com/github/linguist#generated-code 3 | ast3/Include/graminit.h linguist-generated=true 4 | ast3/Python/graminit.h linguist-generated=true 5 | ast3/Include/Python-ast.h linguist-generated=true 6 | ast3/Python/Python-ast.c linguist-generated=true 7 | ast3/Include/token.h linguist-generated=true 8 | ast3/Lib/token.py linguist-generated=true 9 | ast3/Parser/token.c linguist-generated=true 10 | ast27/Include/graminit.h linguist-generated=true 11 | ast27/Python/graminit.h linguist-generated=true 12 | ast27/Include/Python-ast.h linguist-generated=true 13 | ast27/Python/Python-ast.c linguist-generated=true 14 | ast27/Include/token.h linguist-generated=true 15 | ast27/Lib/token.py linguist-generated=true 16 | ast27/Parser/token.c linguist-generated=true 17 | -------------------------------------------------------------------------------- /ast27/Include/bitset.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef Ta27_BITSET_H 3 | #define Ta27_BITSET_H 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /* Bitset interface */ 9 | 10 | #define BYTE char 11 | 12 | typedef BYTE *bitset; 13 | 14 | bitset newbitset(int nbits); 15 | void delbitset(bitset bs); 16 | #define testbit(ss, ibit) (((ss)[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0) 17 | int addbit(bitset bs, int ibit); /* Returns 0 if already set */ 18 | int samebitset(bitset bs1, bitset bs2, int nbits); 19 | void mergebitset(bitset bs1, bitset bs2, int nbits); 20 | 21 | #define BITSPERBYTE (8*sizeof(BYTE)) 22 | #define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) 23 | 24 | #define BIT2BYTE(ibit) ((ibit) / BITSPERBYTE) 25 | #define BIT2SHIFT(ibit) ((ibit) % BITSPERBYTE) 26 | #define BIT2MASK(ibit) (1 << BIT2SHIFT(ibit)) 27 | #define BYTE2BIT(ibyte) ((ibyte) * BITSPERBYTE) 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | #endif /* !Ta27_BITSET_H */ 33 | -------------------------------------------------------------------------------- /ast3/Include/bitset.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef Ta3_BITSET_H 3 | #define Ta3_BITSET_H 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | /* Bitset interface */ 9 | 10 | #define BYTE char 11 | 12 | typedef BYTE *bitset; 13 | 14 | bitset newbitset(int nbits); 15 | void delbitset(bitset bs); 16 | #define testbit(ss, ibit) (((ss)[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0) 17 | int addbit(bitset bs, int ibit); /* Returns 0 if already set */ 18 | int samebitset(bitset bs1, bitset bs2, int nbits); 19 | void mergebitset(bitset bs1, bitset bs2, int nbits); 20 | 21 | #define BITSPERBYTE (8*sizeof(BYTE)) 22 | #define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) 23 | 24 | #define BIT2BYTE(ibit) ((ibit) / BITSPERBYTE) 25 | #define BIT2SHIFT(ibit) ((ibit) % BITSPERBYTE) 26 | #define BIT2MASK(ibit) (1 << BIT2SHIFT(ibit)) 27 | #define BYTE2BIT(ibyte) ((ibyte) * BITSPERBYTE) 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | #endif /* !Ta3_BITSET_H */ 33 | -------------------------------------------------------------------------------- /ast27/Include/node.h: -------------------------------------------------------------------------------- 1 | 2 | /* Parse tree node interface */ 3 | 4 | #ifndef Ta27_NODE_H 5 | #define Ta27_NODE_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct _node { 11 | short n_type; 12 | char *n_str; 13 | int n_lineno; 14 | int n_col_offset; 15 | int n_nchildren; 16 | struct _node *n_child; 17 | } node; 18 | 19 | node *Ta27Node_New(int type); 20 | int Ta27Node_AddChild(node *n, int type, 21 | char *str, int lineno, int col_offset); 22 | void Ta27Node_Free(node *n); 23 | Py_ssize_t _Ta27Node_SizeOf(node *n); 24 | 25 | /* Node access functions */ 26 | #define NCH(n) ((n)->n_nchildren) 27 | 28 | #define CHILD(n, i) (&(n)->n_child[i]) 29 | #define RCHILD(n, i) (CHILD(n, NCH(n) + i)) 30 | #define TYPE(n) ((n)->n_type) 31 | #define STR(n) ((n)->n_str) 32 | 33 | /* Assert that the type of a node is what we expect */ 34 | #define REQ(n, type) assert(TYPE(n) == (type)) 35 | 36 | PyAPI_FUNC(void) PyNode_ListTree(node *); 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | #endif /* !Ta27_NODE_H */ 42 | -------------------------------------------------------------------------------- /release_process.md: -------------------------------------------------------------------------------- 1 | # Typed AST PyPI Release Process 2 | 0. Thoroughly test the prospective release. 3 | 1. Make a commit titled "Release version \[version number\]" which removes the 4 | `-dev0` suffix of the version string in 5 | [`typed_ast/__init__.py`](https://github.com/python/typed_ast/blob/master/typed_ast/__init__.py). 6 | Switch the version to a new minor version if there have been 7 | backwards-incompatible changes. 8 | 2. Make a git tag pointing to this commit with the version number as the name 9 | of the tag. 10 | 3. Push the commit and the tag. 11 | 4. Wait for the GitHub Actions build to complete. 12 | 5. Download all artifacts from the relevant GitHub Actions build. 13 | 6. Compare the wheels produced with the previous release of typed-ast to make sure 14 | you have the full matrix. 15 | 7. If possible, verify the final `typed_ast` wheels work on Windows, macOS, 16 | and Linux platforms. 17 | 8. Upload the sdist and wheels to PyPI with `twine upload dist/*`. 18 | 9. Make a commit which bumps the bugfix version and adds back the `.dev0` 19 | suffix. 20 | -------------------------------------------------------------------------------- /ast27/Parser/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta27_PARSER_H 2 | #define Ta27_PARSER_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | 8 | /* Parser interface */ 9 | 10 | #define MAXSTACK 1500 11 | 12 | typedef struct { 13 | int s_state; /* State in current DFA */ 14 | dfa *s_dfa; /* Current DFA */ 15 | struct _node *s_parent; /* Where to add next node */ 16 | } stackentry; 17 | 18 | typedef struct { 19 | stackentry *s_top; /* Top entry */ 20 | stackentry s_base[MAXSTACK];/* Array of stack entries */ 21 | /* NB The stack grows down */ 22 | } stack; 23 | 24 | typedef struct { 25 | stack p_stack; /* Stack of parser states */ 26 | grammar *p_grammar; /* Grammar to use */ 27 | node *p_tree; /* Top of parse tree */ 28 | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 29 | unsigned long p_flags; /* see co_flags in Include/code.h */ 30 | #endif 31 | } parser_state; 32 | 33 | parser_state *Ta27Parser_New(grammar *g, int start); 34 | void Ta27Parser_Delete(parser_state *ps); 35 | int Ta27Parser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset, 36 | int *expected_ret); 37 | void Ta27Grammar_AddAccelerators(grammar *g); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | #endif /* !Ta27_PARSER_H */ 43 | -------------------------------------------------------------------------------- /ast3/Include/node.h: -------------------------------------------------------------------------------- 1 | 2 | /* Parse tree node interface */ 3 | 4 | #ifndef Ta3_NODE_H 5 | #define Ta3_NODE_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct _node { 11 | short n_type; 12 | char *n_str; 13 | int n_lineno; 14 | int n_col_offset; 15 | int n_nchildren; 16 | struct _node *n_child; 17 | } node; 18 | 19 | extern node * Ta3Node_New(int type); 20 | extern int Ta3Node_AddChild(node *n, int type, 21 | char *str, int lineno, int col_offset); 22 | extern void Ta3Node_Free(node *n); 23 | #ifndef Py_LIMITED_API 24 | extern Py_ssize_t _Ta3Node_SizeOf(node *n); 25 | #endif 26 | 27 | /* Node access functions */ 28 | #define NCH(n) ((n)->n_nchildren) 29 | 30 | #define CHILD(n, i) (&(n)->n_child[i]) 31 | #define RCHILD(n, i) (CHILD(n, NCH(n) + i)) 32 | #define TYPE(n) ((n)->n_type) 33 | #define STR(n) ((n)->n_str) 34 | #define LINENO(n) ((n)->n_lineno) 35 | 36 | /* Assert that the type of a node is what we expect */ 37 | #define REQ(n, type) assert(TYPE(n) == (type)) 38 | 39 | extern void PyNode_ListTree(node *); 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif 44 | #endif /* !Ta3_NODE_H */ 45 | -------------------------------------------------------------------------------- /ast3/Parser/bitset.c: -------------------------------------------------------------------------------- 1 | 2 | /* Bitset primitives used by the parser generator */ 3 | 4 | #include "../Include/pgenheaders.h" 5 | #include "../Include/bitset.h" 6 | 7 | bitset 8 | newbitset(int nbits) 9 | { 10 | int nbytes = NBYTES(nbits); 11 | bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); 12 | 13 | if (ss == NULL) 14 | Py_FatalError("no mem for bitset"); 15 | 16 | ss += nbytes; 17 | while (--nbytes >= 0) 18 | *--ss = 0; 19 | return ss; 20 | } 21 | 22 | void 23 | delbitset(bitset ss) 24 | { 25 | PyObject_FREE(ss); 26 | } 27 | 28 | int 29 | addbit(bitset ss, int ibit) 30 | { 31 | int ibyte = BIT2BYTE(ibit); 32 | BYTE mask = BIT2MASK(ibit); 33 | 34 | if (ss[ibyte] & mask) 35 | return 0; /* Bit already set */ 36 | ss[ibyte] |= mask; 37 | return 1; 38 | } 39 | 40 | #if 0 /* Now a macro */ 41 | int 42 | testbit(bitset ss, int ibit) 43 | { 44 | return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; 45 | } 46 | #endif 47 | 48 | int 49 | samebitset(bitset ss1, bitset ss2, int nbits) 50 | { 51 | int i; 52 | 53 | for (i = NBYTES(nbits); --i >= 0; ) 54 | if (*ss1++ != *ss2++) 55 | return 0; 56 | return 1; 57 | } 58 | 59 | void 60 | mergebitset(bitset ss1, bitset ss2, int nbits) 61 | { 62 | int i; 63 | 64 | for (i = NBYTES(nbits); --i >= 0; ) 65 | *ss1++ |= *ss2++; 66 | } 67 | -------------------------------------------------------------------------------- /ast27/Parser/bitset.c: -------------------------------------------------------------------------------- 1 | 2 | /* Bitset primitives used by the parser generator */ 3 | 4 | #include "../Include/pgenheaders.h" 5 | #include "../Include/bitset.h" 6 | 7 | bitset 8 | newbitset(int nbits) 9 | { 10 | int nbytes = NBYTES(nbits); 11 | bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); 12 | 13 | if (ss == NULL) 14 | Py_FatalError("no mem for bitset"); 15 | 16 | ss += nbytes; 17 | while (--nbytes >= 0) 18 | *--ss = 0; 19 | return ss; 20 | } 21 | 22 | void 23 | delbitset(bitset ss) 24 | { 25 | PyObject_FREE(ss); 26 | } 27 | 28 | int 29 | addbit(bitset ss, int ibit) 30 | { 31 | int ibyte = BIT2BYTE(ibit); 32 | BYTE mask = BIT2MASK(ibit); 33 | 34 | if (ss[ibyte] & mask) 35 | return 0; /* Bit already set */ 36 | ss[ibyte] |= mask; 37 | return 1; 38 | } 39 | 40 | #if 0 /* Now a macro */ 41 | int 42 | testbit(bitset ss, int ibit) 43 | { 44 | return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; 45 | } 46 | #endif 47 | 48 | int 49 | samebitset(bitset ss1, bitset ss2, int nbits) 50 | { 51 | int i; 52 | 53 | for (i = NBYTES(nbits); --i >= 0; ) 54 | if (*ss1++ != *ss2++) 55 | return 0; 56 | return 1; 57 | } 58 | 59 | void 60 | mergebitset(bitset ss1, bitset ss2, int nbits) 61 | { 62 | int i; 63 | 64 | for (i = NBYTES(nbits); --i >= 0; ) 65 | *ss1++ |= *ss2++; 66 | } 67 | -------------------------------------------------------------------------------- /ast3/Parser/parser.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta3_PARSER_H 2 | #define Ta3_PARSER_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | 8 | /* Parser interface */ 9 | 10 | #define MAXSTACK 1500 11 | 12 | typedef struct { 13 | int s_state; /* State in current DFA */ 14 | dfa *s_dfa; /* Current DFA */ 15 | struct _node *s_parent; /* Where to add next node */ 16 | } stackentry; 17 | 18 | typedef struct { 19 | stackentry *s_top; /* Top entry */ 20 | stackentry s_base[MAXSTACK];/* Array of stack entries */ 21 | /* NB The stack grows down */ 22 | } stack; 23 | 24 | typedef struct { 25 | stack p_stack; /* Stack of parser states */ 26 | grammar *p_grammar; /* Grammar to use */ 27 | node *p_tree; /* Top of parse tree */ 28 | #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 29 | unsigned long p_flags; /* see co_flags in Include/code.h */ 30 | #endif 31 | } parser_state; 32 | 33 | parser_state *Ta3Parser_New(grammar *g, int start); 34 | void Ta3Parser_Delete(parser_state *ps); 35 | int Ta3Parser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset, 36 | int *expected_ret); 37 | void Ta3Grammar_AddAccelerators(grammar *g); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | #endif /* !Ta3_PARSER_H */ 43 | -------------------------------------------------------------------------------- /ast3/Include/asdl.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta3_ASDL_H 2 | #define Ta3_ASDL_H 3 | 4 | #include "../Include/pyarena.h" 5 | 6 | typedef PyObject * identifier; 7 | typedef PyObject * string; 8 | typedef PyObject * bytes; 9 | typedef PyObject * object; 10 | typedef PyObject * singleton; 11 | typedef PyObject * constant; 12 | 13 | /* It would be nice if the code generated by asdl_c.py was completely 14 | independent of Python, but it is a goal the requires too much work 15 | at this stage. So, for example, I'll represent identifiers as 16 | interned Python strings. 17 | */ 18 | 19 | /* XXX A sequence should be typed so that its use can be typechecked. */ 20 | 21 | typedef struct { 22 | Py_ssize_t size; 23 | void *elements[1]; 24 | } asdl_seq; 25 | 26 | typedef struct { 27 | Py_ssize_t size; 28 | int elements[1]; 29 | } asdl_int_seq; 30 | 31 | asdl_seq *_Ta3_asdl_seq_new(Py_ssize_t size, PyArena *arena); 32 | asdl_int_seq *_Ta3_asdl_int_seq_new(Py_ssize_t size, PyArena *arena); 33 | 34 | #define asdl_seq_GET(S, I) (S)->elements[(I)] 35 | #define asdl_seq_LEN(S) ((S) == NULL ? 0 : (S)->size) 36 | #ifdef Py_DEBUG 37 | #define asdl_seq_SET(S, I, V) \ 38 | do { \ 39 | Py_ssize_t _asdl_i = (I); \ 40 | assert((S) != NULL); \ 41 | assert(_asdl_i < (S)->size); \ 42 | (S)->elements[_asdl_i] = (V); \ 43 | } while (0) 44 | #else 45 | #define asdl_seq_SET(S, I, V) (S)->elements[I] = (V) 46 | #endif 47 | 48 | #endif /* !Ta3_ASDL_H */ 49 | -------------------------------------------------------------------------------- /ast27/Include/asdl.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta27_ASDL_H 2 | #define Ta27_ASDL_H 3 | 4 | #include "../Include/pyarena.h" 5 | 6 | typedef PyObject * identifier; 7 | typedef PyObject * string; 8 | typedef PyObject * object; 9 | 10 | #ifndef __cplusplus 11 | #ifndef __bool_true_false_are_defined 12 | typedef enum {false, true} bool; 13 | #endif 14 | #endif 15 | 16 | /* It would be nice if the code generated by asdl_c.py was completely 17 | independent of Python, but it is a goal the requires too much work 18 | at this stage. So, for example, I'll represent identifiers as 19 | interned Python strings. 20 | */ 21 | 22 | /* XXX A sequence should be typed so that its use can be typechecked. */ 23 | 24 | typedef struct { 25 | int size; 26 | void *elements[1]; 27 | } asdl_seq; 28 | 29 | typedef struct { 30 | int size; 31 | int elements[1]; 32 | } asdl_int_seq; 33 | 34 | #define asdl_seq_new _Ta27_asdl_seq_new 35 | #define asdl_int_seq_new _Ta27_asdl_int_seq_new 36 | asdl_seq *asdl_seq_new(Py_ssize_t size, PyArena *arena); 37 | asdl_int_seq *asdl_int_seq_new(Py_ssize_t size, PyArena *arena); 38 | 39 | #define asdl_seq_GET(S, I) (S)->elements[(I)] 40 | #define asdl_seq_LEN(S) ((S) == NULL ? 0 : (S)->size) 41 | #ifdef Py_DEBUG 42 | #define asdl_seq_SET(S, I, V) { \ 43 | int _asdl_i = (I); \ 44 | assert((S) && _asdl_i < (S)->size); \ 45 | (S)->elements[_asdl_i] = (V); \ 46 | } 47 | #else 48 | #define asdl_seq_SET(S, I, V) (S)->elements[I] = (V) 49 | #endif 50 | 51 | #endif /* !Ta27_ASDL_H */ 52 | -------------------------------------------------------------------------------- /ast27/Parser/grammar1.c: -------------------------------------------------------------------------------- 1 | 2 | /* Grammar subroutines needed by parser */ 3 | 4 | #include "Python.h" 5 | #include "../Include/pgenheaders.h" 6 | #include "../Include/grammar.h" 7 | #include "../Include/token.h" 8 | 9 | /* Return the DFA for the given type */ 10 | 11 | dfa * 12 | Ta27Grammar_FindDFA(grammar *g, register int type) 13 | { 14 | register dfa *d; 15 | #if 1 16 | /* Massive speed-up */ 17 | d = &g->g_dfa[type - NT_OFFSET]; 18 | assert(d->d_type == type); 19 | return d; 20 | #else 21 | /* Old, slow version */ 22 | register int i; 23 | 24 | for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { 25 | if (d->d_type == type) 26 | return d; 27 | } 28 | assert(0); 29 | /* NOTREACHED */ 30 | #endif 31 | } 32 | 33 | char * 34 | Ta27Grammar_LabelRepr(label *lb) 35 | { 36 | static char buf[100]; 37 | 38 | if (lb->lb_type == ENDMARKER) 39 | return "EMPTY"; 40 | else if (ISNONTERMINAL(lb->lb_type)) { 41 | if (lb->lb_str == NULL) { 42 | PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); 43 | return buf; 44 | } 45 | else 46 | return lb->lb_str; 47 | } 48 | else { 49 | if (lb->lb_str == NULL) 50 | return _Ta27Parser_TokenNames[lb->lb_type]; 51 | else { 52 | PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", 53 | _Ta27Parser_TokenNames[lb->lb_type], lb->lb_str); 54 | return buf; 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /ast27/Include/errcode.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta27_ERRCODE_H 2 | #define Ta27_ERRCODE_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | 8 | /* Error codes passed around between file input, tokenizer, parser and 9 | interpreter. This is necessary so we can turn them into Python 10 | exceptions at a higher level. Note that some errors have a 11 | slightly different meaning when passed from the tokenizer to the 12 | parser than when passed from the parser to the interpreter; e.g. 13 | the parser only returns E_EOF when it hits EOF immediately, and it 14 | never returns E_OK. */ 15 | 16 | #define E_OK 10 /* No error */ 17 | #define E_EOF 11 /* End Of File */ 18 | #define E_INTR 12 /* Interrupted */ 19 | #define E_TOKEN 13 /* Bad token */ 20 | #define E_SYNTAX 14 /* Syntax error */ 21 | #define E_NOMEM 15 /* Ran out of memory */ 22 | #define E_DONE 16 /* Parsing complete */ 23 | #define E_ERROR 17 /* Execution error */ 24 | #define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ 25 | #define E_OVERFLOW 19 /* Node had too many children */ 26 | #define E_TOODEEP 20 /* Too many indentation levels */ 27 | #define E_DEDENT 21 /* No matching outer block for dedent */ 28 | #define E_DECODE 22 /* Error in decoding into Unicode */ 29 | #define E_EOFS 23 /* EOF in triple-quoted string */ 30 | #define E_EOLS 24 /* EOL in single-quoted string */ 31 | #define E_LINECONT 25 /* Unexpected characters after a line continuation */ 32 | 33 | #ifdef __cplusplus 34 | } 35 | #endif 36 | #endif /* !Ta27_ERRCODE_H */ 37 | -------------------------------------------------------------------------------- /tools/script: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Automate steps 1-4 of update_process.md (Mac). 4 | 5 | HERE=$(dirname ${BASH_SOURCE[0]}) 6 | cd $HERE/.. 7 | pwd 8 | 9 | CPYTHON=~/src/cpython37 10 | 11 | DIRS="Grammar Include Parser Python" 12 | C_FILES="Parser/acceler.c Parser/bitset.c Parser/grammar.c Parser/grammar1.c Parser/node.c Parser/parser.c Parser/parsetok.c Parser/tokenizer.c Python/asdl.c Python/ast.c Python/graminit.c Python/Python-ast.c" 13 | H_FILES="Include/asdl.h Include/ast.h Include/bitset.h Include/errcode.h Include/graminit.h Include/grammar.h Include/node.h Include/parsetok.h Include/Python-ast.h Include/token.h Parser/parser.h Parser/tokenizer.h" 14 | OTHER_FILES="Grammar/Grammar Parser/Python.asdl Parser/asdl.py Parser/asdl_c.py" 15 | 16 | for dir in $DIRS 17 | do 18 | rm -rf ast3/$dir 19 | mkdir -p ast3/$dir 20 | done 21 | 22 | for file in $C_FILES $H_FILES $OTHER_FILES 23 | do 24 | cp $CPYTHON/$file ast3/$file 25 | done 26 | 27 | ./tools/update_header_guards 3 28 | 29 | rm -rf build 30 | grep -v ast3/Custom setup.py | python3.7 - build 31 | 32 | ./tools/find_exported_symbols 3 33 | ./tools/update_exported_symbols 3 34 | 35 | patch ast3/Parser/asdl_c.py g_dfa[type - NT_OFFSET]; 18 | assert(d->d_type == type); 19 | return d; 20 | #else 21 | /* Old, slow version */ 22 | int i; 23 | 24 | for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { 25 | if (d->d_type == type) 26 | return d; 27 | } 28 | abort(); 29 | #endif 30 | } 31 | 32 | const char * 33 | Ta3Grammar_LabelRepr(label *lb) 34 | { 35 | static char buf[100]; 36 | 37 | if (lb->lb_type == ENDMARKER) 38 | return "EMPTY"; 39 | else if (ISNONTERMINAL(lb->lb_type)) { 40 | if (lb->lb_str == NULL) { 41 | PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); 42 | return buf; 43 | } 44 | else 45 | return lb->lb_str; 46 | } 47 | else if (lb->lb_type < N_TOKENS) { 48 | if (lb->lb_str == NULL) 49 | return _Ta3Parser_TokenNames[lb->lb_type]; 50 | else { 51 | PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", 52 | _Ta3Parser_TokenNames[lb->lb_type], lb->lb_str); 53 | return buf; 54 | } 55 | } 56 | else { 57 | Py_FatalError("invalid label"); 58 | return NULL; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /ast3/Python/asdl.c: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "../Include/asdl.h" 3 | 4 | asdl_seq * 5 | _Ta3_asdl_seq_new(Py_ssize_t size, PyArena *arena) 6 | { 7 | asdl_seq *seq = NULL; 8 | size_t n; 9 | 10 | /* check size is sane */ 11 | if (size < 0 || 12 | (size && (((size_t)size - 1) > (SIZE_MAX / sizeof(void *))))) { 13 | PyErr_NoMemory(); 14 | return NULL; 15 | } 16 | n = (size ? (sizeof(void *) * (size - 1)) : 0); 17 | 18 | /* check if size can be added safely */ 19 | if (n > SIZE_MAX - sizeof(asdl_seq)) { 20 | PyErr_NoMemory(); 21 | return NULL; 22 | } 23 | n += sizeof(asdl_seq); 24 | 25 | seq = (asdl_seq *)PyArena_Malloc(arena, n); 26 | if (!seq) { 27 | PyErr_NoMemory(); 28 | return NULL; 29 | } 30 | memset(seq, 0, n); 31 | seq->size = size; 32 | return seq; 33 | } 34 | 35 | asdl_int_seq * 36 | _Ta3_asdl_int_seq_new(Py_ssize_t size, PyArena *arena) 37 | { 38 | asdl_int_seq *seq = NULL; 39 | size_t n; 40 | 41 | /* check size is sane */ 42 | if (size < 0 || 43 | (size && (((size_t)size - 1) > (SIZE_MAX / sizeof(void *))))) { 44 | PyErr_NoMemory(); 45 | return NULL; 46 | } 47 | n = (size ? (sizeof(void *) * (size - 1)) : 0); 48 | 49 | /* check if size can be added safely */ 50 | if (n > SIZE_MAX - sizeof(asdl_seq)) { 51 | PyErr_NoMemory(); 52 | return NULL; 53 | } 54 | n += sizeof(asdl_seq); 55 | 56 | seq = (asdl_int_seq *)PyArena_Malloc(arena, n); 57 | if (!seq) { 58 | PyErr_NoMemory(); 59 | return NULL; 60 | } 61 | memset(seq, 0, n); 62 | seq->size = size; 63 | return seq; 64 | } 65 | -------------------------------------------------------------------------------- /ast27/Python/asdl.c: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "../Include/asdl.h" 3 | 4 | asdl_seq * 5 | _Ta27_asdl_seq_new(Py_ssize_t size, PyArena *arena) 6 | { 7 | asdl_seq *seq = NULL; 8 | size_t n; 9 | 10 | /* check size is sane */ 11 | if (size < 0 || 12 | (size && (((size_t)size - 1) > (PY_SIZE_MAX / sizeof(void *))))) { 13 | PyErr_NoMemory(); 14 | return NULL; 15 | } 16 | n = (size ? (sizeof(void *) * (size - 1)) : 0); 17 | 18 | /* check if size can be added safely */ 19 | if (n > PY_SIZE_MAX - sizeof(asdl_seq)) { 20 | PyErr_NoMemory(); 21 | return NULL; 22 | } 23 | n += sizeof(asdl_seq); 24 | 25 | seq = (asdl_seq *)PyArena_Malloc(arena, n); 26 | if (!seq) { 27 | PyErr_NoMemory(); 28 | return NULL; 29 | } 30 | memset(seq, 0, n); 31 | seq->size = size; 32 | return seq; 33 | } 34 | 35 | asdl_int_seq * 36 | _Ta27_asdl_int_seq_new(Py_ssize_t size, PyArena *arena) 37 | { 38 | asdl_int_seq *seq = NULL; 39 | size_t n; 40 | 41 | /* check size is sane */ 42 | if (size < 0 || 43 | (size && (((size_t)size - 1) > (PY_SIZE_MAX / sizeof(void *))))) { 44 | PyErr_NoMemory(); 45 | return NULL; 46 | } 47 | n = (size ? (sizeof(void *) * (size - 1)) : 0); 48 | 49 | /* check if size can be added safely */ 50 | if (n > PY_SIZE_MAX - sizeof(asdl_seq)) { 51 | PyErr_NoMemory(); 52 | return NULL; 53 | } 54 | n += sizeof(asdl_seq); 55 | 56 | seq = (asdl_int_seq *)PyArena_Malloc(arena, n); 57 | if (!seq) { 58 | PyErr_NoMemory(); 59 | return NULL; 60 | } 61 | memset(seq, 0, n); 62 | seq->size = size; 63 | return seq; 64 | } 65 | -------------------------------------------------------------------------------- /ast3/Include/errcode.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta3_ERRCODE_H 2 | #define Ta3_ERRCODE_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | 8 | /* Error codes passed around between file input, tokenizer, parser and 9 | interpreter. This is necessary so we can turn them into Python 10 | exceptions at a higher level. Note that some errors have a 11 | slightly different meaning when passed from the tokenizer to the 12 | parser than when passed from the parser to the interpreter; e.g. 13 | the parser only returns E_EOF when it hits EOF immediately, and it 14 | never returns E_OK. */ 15 | 16 | #define E_OK 10 /* No error */ 17 | #define E_EOF 11 /* End Of File */ 18 | #define E_INTR 12 /* Interrupted */ 19 | #define E_TOKEN 13 /* Bad token */ 20 | #define E_SYNTAX 14 /* Syntax error */ 21 | #define E_NOMEM 15 /* Ran out of memory */ 22 | #define E_DONE 16 /* Parsing complete */ 23 | #define E_ERROR 17 /* Execution error */ 24 | #define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ 25 | #define E_OVERFLOW 19 /* Node had too many children */ 26 | #define E_TOODEEP 20 /* Too many indentation levels */ 27 | #define E_DEDENT 21 /* No matching outer block for dedent */ 28 | #define E_DECODE 22 /* Error in decoding into Unicode */ 29 | #define E_EOFS 23 /* EOF in triple-quoted string */ 30 | #define E_EOLS 24 /* EOL in single-quoted string */ 31 | #define E_LINECONT 25 /* Unexpected characters after a line continuation */ 32 | #define E_IDENTIFIER 26 /* Invalid characters in identifier */ 33 | #define E_BADSINGLE 27 /* Ill-formed single statement input */ 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | #endif /* !Ta3_ERRCODE_H */ 39 | -------------------------------------------------------------------------------- /ast27/Include/parsetok.h: -------------------------------------------------------------------------------- 1 | 2 | /* Parser-tokenizer link interface */ 3 | 4 | #ifndef Ta27_PARSETOK_H 5 | #define Ta27_PARSETOK_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | int error; 12 | PyObject *filename; 13 | int lineno; 14 | int offset; 15 | char *text; 16 | int token; 17 | int expected; 18 | } perrdetail; 19 | 20 | #if 0 21 | #define PyPARSE_YIELD_IS_KEYWORD 0x0001 22 | #endif 23 | 24 | #define PyPARSE_DONT_IMPLY_DEDENT 0x0002 25 | 26 | #if 0 27 | #define PyPARSE_WITH_IS_KEYWORD 0x0003 28 | #endif 29 | 30 | #define PyPARSE_PRINT_IS_FUNCTION 0x0004 31 | #define PyPARSE_UNICODE_LITERALS 0x0008 32 | 33 | #define PyPARSE_IGNORE_COOKIE 0x0010 34 | 35 | 36 | node *Ta27Parser_ParseString(const char *, grammar *, int, 37 | perrdetail *); 38 | node *Ta27Parser_ParseFile (FILE *, const char *, grammar *, int, 39 | char *, char *, perrdetail *); 40 | 41 | node *Ta27Parser_ParseStringFlags(const char *, grammar *, int, 42 | perrdetail *, int); 43 | node *Ta27Parser_ParseFileFlags(FILE *, const char *, grammar *, 44 | int, char *, char *, 45 | perrdetail *, int); 46 | node *Ta27Parser_ParseFileFlagsEx(FILE *, const char *, grammar *, 47 | int, char *, char *, 48 | perrdetail *, int *); 49 | 50 | node *Ta27Parser_ParseStringFlagsFilename(const char *, 51 | const char *, 52 | grammar *, int, 53 | perrdetail *, int); 54 | node *Ta27Parser_ParseStringFlagsFilenameEx(const char *, 55 | const char *, 56 | grammar *, int, 57 | perrdetail *, int *); 58 | 59 | node *Ta27Parser_ParseStringObject( 60 | const char *s, 61 | PyObject *filename, 62 | grammar *g, 63 | int start, 64 | perrdetail *err_ret, 65 | int *flags); 66 | 67 | /* Note that he following function is defined in pythonrun.c not parsetok.c. */ 68 | PyAPI_FUNC(void) PyParser_SetError(perrdetail *); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | #endif /* !Ta27_PARSETOK_H */ 74 | -------------------------------------------------------------------------------- /ast27/Include/token.h: -------------------------------------------------------------------------------- 1 | 2 | /* Token types */ 3 | 4 | #ifndef Ta27_TOKEN_H 5 | #define Ta27_TOKEN_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ 11 | 12 | #define ENDMARKER 0 13 | #define NAME 1 14 | #define NUMBER 2 15 | #define STRING 3 16 | #define NEWLINE 4 17 | #define INDENT 5 18 | #define DEDENT 6 19 | #define LPAR 7 20 | #define RPAR 8 21 | #define LSQB 9 22 | #define RSQB 10 23 | #define COLON 11 24 | #define COMMA 12 25 | #define SEMI 13 26 | #define PLUS 14 27 | #define MINUS 15 28 | #define STAR 16 29 | #define SLASH 17 30 | #define VBAR 18 31 | #define AMPER 19 32 | #define LESS 20 33 | #define GREATER 21 34 | #define EQUAL 22 35 | #define DOT 23 36 | #define PERCENT 24 37 | #define BACKQUOTE 25 38 | #define LBRACE 26 39 | #define RBRACE 27 40 | #define EQEQUAL 28 41 | #define NOTEQUAL 29 42 | #define LESSEQUAL 30 43 | #define GREATEREQUAL 31 44 | #define TILDE 32 45 | #define CIRCUMFLEX 33 46 | #define LEFTSHIFT 34 47 | #define RIGHTSHIFT 35 48 | #define DOUBLESTAR 36 49 | #define PLUSEQUAL 37 50 | #define MINEQUAL 38 51 | #define STAREQUAL 39 52 | #define SLASHEQUAL 40 53 | #define PERCENTEQUAL 41 54 | #define AMPEREQUAL 42 55 | #define VBAREQUAL 43 56 | #define CIRCUMFLEXEQUAL 44 57 | #define LEFTSHIFTEQUAL 45 58 | #define RIGHTSHIFTEQUAL 46 59 | #define DOUBLESTAREQUAL 47 60 | #define DOUBLESLASH 48 61 | #define DOUBLESLASHEQUAL 49 62 | #define AT 50 63 | /* Don't forget to update the table _Ta27Parser_TokenNames in tokenizer.c! */ 64 | #define OP 51 65 | #define RARROW 52 66 | #define TYPE_IGNORE 53 67 | #define TYPE_COMMENT 54 68 | #define ERRORTOKEN 55 69 | #define N_TOKENS 56 70 | 71 | /* Special definitions for cooperation with parser */ 72 | 73 | #define NT_OFFSET 256 74 | 75 | #define ISTERMINAL(x) ((x) < NT_OFFSET) 76 | #define ISNONTERMINAL(x) ((x) >= NT_OFFSET) 77 | #define ISEOF(x) ((x) == ENDMARKER) 78 | 79 | 80 | extern char *_Ta27Parser_TokenNames[]; /* Token names */ 81 | int Ta27Token_OneChar(int); 82 | int Ta27Token_TwoChars(int, int); 83 | int Ta27Token_ThreeChars(int, int, int); 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | #endif /* !Ta27_TOKEN_H */ 89 | -------------------------------------------------------------------------------- /ast27/Include/graminit.h: -------------------------------------------------------------------------------- 1 | /* Generated by Parser/pgen */ 2 | 3 | #define single_input 256 4 | #define file_input 257 5 | #define eval_input 258 6 | #define decorator 259 7 | #define decorators 260 8 | #define decorated 261 9 | #define funcdef 262 10 | #define parameters 263 11 | #define varargslist 264 12 | #define fpdef 265 13 | #define fplist 266 14 | #define stmt 267 15 | #define simple_stmt 268 16 | #define small_stmt 269 17 | #define expr_stmt 270 18 | #define augassign 271 19 | #define print_stmt 272 20 | #define del_stmt 273 21 | #define pass_stmt 274 22 | #define flow_stmt 275 23 | #define break_stmt 276 24 | #define continue_stmt 277 25 | #define return_stmt 278 26 | #define yield_stmt 279 27 | #define raise_stmt 280 28 | #define import_stmt 281 29 | #define import_name 282 30 | #define import_from 283 31 | #define import_as_name 284 32 | #define dotted_as_name 285 33 | #define import_as_names 286 34 | #define dotted_as_names 287 35 | #define dotted_name 288 36 | #define global_stmt 289 37 | #define exec_stmt 290 38 | #define assert_stmt 291 39 | #define compound_stmt 292 40 | #define if_stmt 293 41 | #define while_stmt 294 42 | #define for_stmt 295 43 | #define try_stmt 296 44 | #define with_stmt 297 45 | #define with_item 298 46 | #define except_clause 299 47 | #define suite 300 48 | #define testlist_safe 301 49 | #define old_test 302 50 | #define old_lambdef 303 51 | #define test 304 52 | #define or_test 305 53 | #define and_test 306 54 | #define not_test 307 55 | #define comparison 308 56 | #define comp_op 309 57 | #define expr 310 58 | #define xor_expr 311 59 | #define and_expr 312 60 | #define shift_expr 313 61 | #define arith_expr 314 62 | #define term 315 63 | #define factor 316 64 | #define power 317 65 | #define atom 318 66 | #define listmaker 319 67 | #define testlist_comp 320 68 | #define lambdef 321 69 | #define trailer 322 70 | #define subscriptlist 323 71 | #define subscript 324 72 | #define sliceop 325 73 | #define exprlist 326 74 | #define testlist 327 75 | #define dictorsetmaker 328 76 | #define classdef 329 77 | #define arglist 330 78 | #define argument 331 79 | #define list_iter 332 80 | #define list_for 333 81 | #define list_if 334 82 | #define comp_iter 335 83 | #define comp_for 336 84 | #define comp_if 337 85 | #define testlist1 338 86 | #define encoding_decl 339 87 | #define yield_expr 340 88 | #define func_type_input 341 89 | #define func_type 342 90 | #define typelist 343 91 | -------------------------------------------------------------------------------- /ast27/Include/grammar.h: -------------------------------------------------------------------------------- 1 | 2 | /* Grammar interface */ 3 | 4 | #ifndef Ta27_GRAMMAR_H 5 | #define Ta27_GRAMMAR_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #include "../Include/bitset.h" 11 | 12 | /* A label of an arc */ 13 | 14 | typedef struct { 15 | int lb_type; 16 | char *lb_str; 17 | } label; 18 | 19 | #define EMPTY 0 /* Label number 0 is by definition the empty label */ 20 | 21 | /* A list of labels */ 22 | 23 | typedef struct { 24 | int ll_nlabels; 25 | label *ll_label; 26 | } labellist; 27 | 28 | /* An arc from one state to another */ 29 | 30 | typedef struct { 31 | short a_lbl; /* Label of this arc */ 32 | short a_arrow; /* State where this arc goes to */ 33 | } arc; 34 | 35 | /* A state in a DFA */ 36 | 37 | typedef struct { 38 | int s_narcs; 39 | arc *s_arc; /* Array of arcs */ 40 | 41 | /* Optional accelerators */ 42 | int s_lower; /* Lowest label index */ 43 | int s_upper; /* Highest label index */ 44 | int *s_accel; /* Accelerator */ 45 | int s_accept; /* Nonzero for accepting state */ 46 | } state; 47 | 48 | /* A DFA */ 49 | 50 | typedef struct { 51 | int d_type; /* Non-terminal this represents */ 52 | char *d_name; /* For printing */ 53 | int d_initial; /* Initial state */ 54 | int d_nstates; 55 | state *d_state; /* Array of states */ 56 | bitset d_first; 57 | } dfa; 58 | 59 | /* A grammar */ 60 | 61 | typedef struct { 62 | int g_ndfas; 63 | dfa *g_dfa; /* Array of DFAs */ 64 | labellist g_ll; 65 | int g_start; /* Start symbol of the grammar */ 66 | int g_accel; /* Set if accelerators present */ 67 | } grammar; 68 | 69 | /* FUNCTIONS */ 70 | 71 | grammar *newgrammar(int start); 72 | dfa *adddfa(grammar *g, int type, char *name); 73 | int addstate(dfa *d); 74 | void addarc(dfa *d, int from, int to, int lbl); 75 | dfa *Ta27Grammar_FindDFA(grammar *g, int type); 76 | 77 | int addlabel(labellist *ll, int type, char *str); 78 | int findlabel(labellist *ll, int type, char *str); 79 | char *Ta27Grammar_LabelRepr(label *lb); 80 | void translatelabels(grammar *g); 81 | 82 | void addfirstsets(grammar *g); 83 | 84 | void Ta27Grammar_AddAccelerators(grammar *g); 85 | void Ta27Grammar_RemoveAccelerators(grammar *); 86 | 87 | void printgrammar(grammar *g, FILE *fp); 88 | void printnonterminals(grammar *g, FILE *fp); 89 | 90 | #ifdef __cplusplus 91 | } 92 | #endif 93 | #endif /* !Ta27_GRAMMAR_H */ 94 | -------------------------------------------------------------------------------- /ast3/Include/graminit.h: -------------------------------------------------------------------------------- 1 | /* Generated by Parser/pgen */ 2 | 3 | #define single_input 256 4 | #define file_input 257 5 | #define eval_input 258 6 | #define decorator 259 7 | #define decorators 260 8 | #define decorated 261 9 | #define async_funcdef 262 10 | #define funcdef 263 11 | #define parameters 264 12 | #define typedargslist 265 13 | #define tfpdef 266 14 | #define varargslist 267 15 | #define vfpdef 268 16 | #define stmt 269 17 | #define simple_stmt 270 18 | #define small_stmt 271 19 | #define expr_stmt 272 20 | #define annassign 273 21 | #define testlist_star_expr 274 22 | #define augassign 275 23 | #define del_stmt 276 24 | #define pass_stmt 277 25 | #define flow_stmt 278 26 | #define break_stmt 279 27 | #define continue_stmt 280 28 | #define return_stmt 281 29 | #define yield_stmt 282 30 | #define raise_stmt 283 31 | #define import_stmt 284 32 | #define import_name 285 33 | #define import_from 286 34 | #define import_as_name 287 35 | #define dotted_as_name 288 36 | #define import_as_names 289 37 | #define dotted_as_names 290 38 | #define dotted_name 291 39 | #define global_stmt 292 40 | #define nonlocal_stmt 293 41 | #define assert_stmt 294 42 | #define compound_stmt 295 43 | #define async_stmt 296 44 | #define if_stmt 297 45 | #define while_stmt 298 46 | #define for_stmt 299 47 | #define try_stmt 300 48 | #define with_stmt 301 49 | #define with_item 302 50 | #define except_clause 303 51 | #define suite 304 52 | #define test 305 53 | #define test_nocond 306 54 | #define lambdef 307 55 | #define lambdef_nocond 308 56 | #define or_test 309 57 | #define and_test 310 58 | #define not_test 311 59 | #define comparison 312 60 | #define comp_op 313 61 | #define star_expr 314 62 | #define expr 315 63 | #define xor_expr 316 64 | #define and_expr 317 65 | #define shift_expr 318 66 | #define arith_expr 319 67 | #define term 320 68 | #define factor 321 69 | #define power 322 70 | #define atom_expr 323 71 | #define atom 324 72 | #define testlist_comp 325 73 | #define trailer 326 74 | #define subscriptlist 327 75 | #define subscript 328 76 | #define sliceop 329 77 | #define exprlist 330 78 | #define testlist 331 79 | #define dictorsetmaker 332 80 | #define classdef 333 81 | #define arglist 334 82 | #define argument 335 83 | #define comp_iter 336 84 | #define sync_comp_for 337 85 | #define comp_for 338 86 | #define comp_if 339 87 | #define encoding_decl 340 88 | #define yield_expr 341 89 | #define yield_arg 342 90 | #define func_type_input 343 91 | #define func_type 344 92 | #define typelist 345 93 | -------------------------------------------------------------------------------- /ast3/Include/grammar.h: -------------------------------------------------------------------------------- 1 | 2 | /* Grammar interface */ 3 | 4 | #ifndef Ta3_GRAMMAR_H 5 | #define Ta3_GRAMMAR_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #include "../Include/bitset.h" 11 | 12 | /* A label of an arc */ 13 | 14 | typedef struct { 15 | int lb_type; 16 | char *lb_str; 17 | } label; 18 | 19 | #define EMPTY 0 /* Label number 0 is by definition the empty label */ 20 | 21 | /* A list of labels */ 22 | 23 | typedef struct { 24 | int ll_nlabels; 25 | label *ll_label; 26 | } labellist; 27 | 28 | /* An arc from one state to another */ 29 | 30 | typedef struct { 31 | short a_lbl; /* Label of this arc */ 32 | short a_arrow; /* State where this arc goes to */ 33 | } arc; 34 | 35 | /* A state in a DFA */ 36 | 37 | typedef struct { 38 | int s_narcs; 39 | arc *s_arc; /* Array of arcs */ 40 | 41 | /* Optional accelerators */ 42 | int s_lower; /* Lowest label index */ 43 | int s_upper; /* Highest label index */ 44 | int *s_accel; /* Accelerator */ 45 | int s_accept; /* Nonzero for accepting state */ 46 | } state; 47 | 48 | /* A DFA */ 49 | 50 | typedef struct { 51 | int d_type; /* Non-terminal this represents */ 52 | char *d_name; /* For printing */ 53 | int d_initial; /* Initial state */ 54 | int d_nstates; 55 | state *d_state; /* Array of states */ 56 | bitset d_first; 57 | } dfa; 58 | 59 | /* A grammar */ 60 | 61 | typedef struct { 62 | int g_ndfas; 63 | dfa *g_dfa; /* Array of DFAs */ 64 | labellist g_ll; 65 | int g_start; /* Start symbol of the grammar */ 66 | int g_accel; /* Set if accelerators present */ 67 | } grammar; 68 | 69 | /* FUNCTIONS */ 70 | 71 | grammar *newgrammar(int start); 72 | void freegrammar(grammar *g); 73 | dfa *adddfa(grammar *g, int type, const char *name); 74 | int addstate(dfa *d); 75 | void addarc(dfa *d, int from, int to, int lbl); 76 | dfa *Ta3Grammar_FindDFA(grammar *g, int type); 77 | 78 | int addlabel(labellist *ll, int type, const char *str); 79 | int findlabel(labellist *ll, int type, const char *str); 80 | const char *Ta3Grammar_LabelRepr(label *lb); 81 | void translatelabels(grammar *g); 82 | 83 | void addfirstsets(grammar *g); 84 | 85 | void Ta3Grammar_AddAccelerators(grammar *g); 86 | void Ta3Grammar_RemoveAccelerators(grammar *); 87 | 88 | void printgrammar(grammar *g, FILE *fp); 89 | void printnonterminals(grammar *g, FILE *fp); 90 | 91 | #ifdef __cplusplus 92 | } 93 | #endif 94 | #endif /* !Ta3_GRAMMAR_H */ 95 | -------------------------------------------------------------------------------- /tools/tokenizer.patch: -------------------------------------------------------------------------------- 1 | diff --git a/ast3/Parser/tokenizer.c b/ast3/Parser/tokenizer.c 2 | index 617a744..667fb4a 100644 3 | --- a/ast3/Parser/tokenizer.c 4 | +++ b/ast3/Parser/tokenizer.c 5 | @@ -105,10 +105,16 @@ const char *_Ta3Parser_TokenNames[] = { 6 | "OP", 7 | "AWAIT", 8 | "ASYNC", 9 | + "TYPE_IGNORE", 10 | + "TYPE_COMMENT", 11 | "", 12 | "" 13 | }; 14 | 15 | +/* Spaces in this constant are treated as "zero or more spaces or tabs" when 16 | + tokenizing. */ 17 | +static const char* type_comment_prefix = "# type: "; 18 | + 19 | 20 | /* Create and initialize a new tok_state structure */ 21 | 22 | @@ -1493,10 +1499,56 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) 23 | /* Set start of current token */ 24 | tok->start = tok->cur - 1; 25 | 26 | - /* Skip comment */ 27 | + /* Skip comment, unless it's a type comment */ 28 | if (c == '#') { 29 | - while (c != EOF && c != '\n') { 30 | + const char *prefix, *p, *type_start; 31 | + 32 | + while (c != EOF && c != '\n') 33 | c = tok_nextc(tok); 34 | + 35 | + p = tok->start; 36 | + prefix = type_comment_prefix; 37 | + while (*prefix && p < tok->cur) { 38 | + if (*prefix == ' ') { 39 | + while (*p == ' ' || *p == '\t') 40 | + p++; 41 | + } else if (*prefix == *p) { 42 | + p++; 43 | + } else { 44 | + break; 45 | + } 46 | + 47 | + prefix++; 48 | + } 49 | + 50 | + /* This is a type comment if we matched all of type_comment_prefix. */ 51 | + if (!*prefix) { 52 | + int is_type_ignore = 1; 53 | + tok_backup(tok, c); /* don't eat the newline or EOF */ 54 | + 55 | + type_start = p; 56 | + 57 | + is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0; 58 | + p += 6; 59 | + while (is_type_ignore && p < tok->cur) { 60 | + if (*p == '#') 61 | + break; 62 | + is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t'); 63 | + p++; 64 | + } 65 | + 66 | + if (is_type_ignore) { 67 | + /* If this type ignore is the only thing on the line, consume the newline also. */ 68 | + if (blankline) { 69 | + tok_nextc(tok); 70 | + tok->atbol = 1; 71 | + } 72 | + return TYPE_IGNORE; 73 | + } else { 74 | + *p_start = (char *) type_start; /* after type_comment_prefix */ 75 | + *p_end = tok->cur; 76 | + return TYPE_COMMENT; 77 | + } 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /ast3/Include/pycore_pyarena.h: -------------------------------------------------------------------------------- 1 | /* An arena-like memory interface for the compiler. 2 | */ 3 | 4 | #ifndef Ta3_INTERNAL_PYARENA_H 5 | #define Ta3_INTERNAL_PYARENA_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct _arena PyArena; 11 | 12 | /* _PyArena_New() and _PyArena_Free() create a new arena and free it, 13 | respectively. Once an arena has been created, it can be used 14 | to allocate memory via _PyArena_Malloc(). Pointers to PyObject can 15 | also be registered with the arena via _PyArena_AddPyObject(), and the 16 | arena will ensure that the PyObjects stay alive at least until 17 | _PyArena_Free() is called. When an arena is freed, all the memory it 18 | allocated is freed, the arena releases internal references to registered 19 | PyObject*, and none of its pointers are valid. 20 | XXX (tim) What does "none of its pointers are valid" mean? Does it 21 | XXX mean that pointers previously obtained via _PyArena_Malloc() are 22 | XXX no longer valid? (That's clearly true, but not sure that's what 23 | XXX the text is trying to say.) 24 | 25 | _PyArena_New() returns an arena pointer. On error, it 26 | returns a negative number and sets an exception. 27 | XXX (tim): Not true. On error, _PyArena_New() actually returns NULL, 28 | XXX and looks like it may or may not set an exception (e.g., if the 29 | XXX internal PyList_New(0) returns NULL, _PyArena_New() passes that on 30 | XXX and an exception is set; OTOH, if the internal 31 | XXX block_new(DEFAULT_BLOCK_SIZE) returns NULL, that's passed on but 32 | XXX an exception is not set in that case). 33 | */ 34 | PyAPI_FUNC(PyArena*) _PyArena_New(void); 35 | PyAPI_FUNC(void) _PyArena_Free(PyArena *); 36 | 37 | /* Mostly like malloc(), return the address of a block of memory spanning 38 | * `size` bytes, or return NULL (without setting an exception) if enough 39 | * new memory can't be obtained. Unlike malloc(0), _PyArena_Malloc() with 40 | * size=0 does not guarantee to return a unique pointer (the pointer 41 | * returned may equal one or more other pointers obtained from 42 | * _PyArena_Malloc()). 43 | * Note that pointers obtained via _PyArena_Malloc() must never be passed to 44 | * the system free() or realloc(), or to any of Python's similar memory- 45 | * management functions. _PyArena_Malloc()-obtained pointers remain valid 46 | * until _PyArena_Free(ar) is called, at which point all pointers obtained 47 | * from the arena `ar` become invalid simultaneously. 48 | */ 49 | PyAPI_FUNC(void*) _PyArena_Malloc(PyArena *, size_t size); 50 | 51 | /* This routine isn't a proper arena allocation routine. It takes 52 | * a PyObject* and records it so that it can be DECREFed when the 53 | * arena is freed. 54 | */ 55 | PyAPI_FUNC(int) _PyArena_AddPyObject(PyArena *, PyObject *); 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | #endif /* !Ta3_INTERNAL_PYARENA_H */ 61 | -------------------------------------------------------------------------------- /ast27/Include/pycore_pyarena.h: -------------------------------------------------------------------------------- 1 | /* An arena-like memory interface for the compiler. 2 | */ 3 | 4 | #ifndef Ta27_INTERNAL_PYARENA_H 5 | #define Ta27_INTERNAL_PYARENA_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct _arena PyArena; 11 | 12 | /* _PyArena_New() and _PyArena_Free() create a new arena and free it, 13 | respectively. Once an arena has been created, it can be used 14 | to allocate memory via _PyArena_Malloc(). Pointers to PyObject can 15 | also be registered with the arena via _PyArena_AddPyObject(), and the 16 | arena will ensure that the PyObjects stay alive at least until 17 | _PyArena_Free() is called. When an arena is freed, all the memory it 18 | allocated is freed, the arena releases internal references to registered 19 | PyObject*, and none of its pointers are valid. 20 | XXX (tim) What does "none of its pointers are valid" mean? Does it 21 | XXX mean that pointers previously obtained via _PyArena_Malloc() are 22 | XXX no longer valid? (That's clearly true, but not sure that's what 23 | XXX the text is trying to say.) 24 | 25 | _PyArena_New() returns an arena pointer. On error, it 26 | returns a negative number and sets an exception. 27 | XXX (tim): Not true. On error, _PyArena_New() actually returns NULL, 28 | XXX and looks like it may or may not set an exception (e.g., if the 29 | XXX internal PyList_New(0) returns NULL, _PyArena_New() passes that on 30 | XXX and an exception is set; OTOH, if the internal 31 | XXX block_new(DEFAULT_BLOCK_SIZE) returns NULL, that's passed on but 32 | XXX an exception is not set in that case). 33 | */ 34 | PyAPI_FUNC(PyArena*) _PyArena_New(void); 35 | PyAPI_FUNC(void) _PyArena_Free(PyArena *); 36 | 37 | /* Mostly like malloc(), return the address of a block of memory spanning 38 | * `size` bytes, or return NULL (without setting an exception) if enough 39 | * new memory can't be obtained. Unlike malloc(0), _PyArena_Malloc() with 40 | * size=0 does not guarantee to return a unique pointer (the pointer 41 | * returned may equal one or more other pointers obtained from 42 | * _PyArena_Malloc()). 43 | * Note that pointers obtained via _PyArena_Malloc() must never be passed to 44 | * the system free() or realloc(), or to any of Python's similar memory- 45 | * management functions. _PyArena_Malloc()-obtained pointers remain valid 46 | * until _PyArena_Free(ar) is called, at which point all pointers obtained 47 | * from the arena `ar` become invalid simultaneously. 48 | */ 49 | PyAPI_FUNC(void*) _PyArena_Malloc(PyArena *, size_t size); 50 | 51 | /* This routine isn't a proper arena allocation routine. It takes 52 | * a PyObject* and records it so that it can be DECREFed when the 53 | * arena is freed. 54 | */ 55 | PyAPI_FUNC(int) _PyArena_AddPyObject(PyArena *, PyObject *); 56 | 57 | #ifdef __cplusplus 58 | } 59 | #endif 60 | #endif /* !Ta27_INTERNAL_PYARENA_H */ 61 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build wheels 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | build_wheels: 7 | name: py${{ matrix.python-version }} on ${{ matrix.os }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | # cibuildwheel builds linux wheels inside a manylinux container 13 | # it also takes care of procuring the correct python version for us 14 | os: [ubuntu-latest, windows-latest, macos-latest] 15 | python-version: [36, 37, 38, 39, 310, 311] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Build wheels 20 | uses: pypa/cibuildwheel@v2.10.2 21 | env: 22 | CIBW_BUILD: "cp${{ matrix.python-version }}-*" 23 | CIBW_SKIP: "*-manylinux_i686 *-musllinux_i686 *-win32" 24 | CIBW_ARCHS_MACOS: "x86_64 arm64" 25 | CIBW_BUILD_VERBOSITY: 1 26 | CIBW_BEFORE_TEST: pip install pytest 27 | CIBW_TEST_COMMAND: pytest {package} 28 | - uses: actions/upload-artifact@v3 29 | with: 30 | name: dist 31 | path: ./wheelhouse/*.whl 32 | 33 | build_wheels_aarch64: 34 | name: py${{ matrix.python-version }} on ${{ matrix.os }} (aarch64) 35 | runs-on: ${{ matrix.os }} 36 | strategy: 37 | fail-fast: false 38 | matrix: 39 | # cibuildwheel builds linux wheels inside a manylinux container 40 | # it also takes care of procuring the correct python version for us 41 | os: [ubuntu-latest] 42 | python-version: [36, 37, 38, 39, 310, 311] 43 | 44 | steps: 45 | - uses: actions/checkout@v3 46 | - name: Setup up QEMU 47 | uses: docker/setup-qemu-action@v2 48 | with: 49 | platforms: arm64 50 | - name: Build wheels 51 | uses: pypa/cibuildwheel@v2.10.2 52 | env: 53 | CIBW_BUILD: "cp${{ matrix.python-version }}-*" 54 | CIBW_ARCHS: aarch64 55 | CIBW_BUILD_VERBOSITY: 1 56 | CIBW_BEFORE_TEST: pip install pytest 57 | CIBW_TEST_COMMAND: pytest {package} 58 | - uses: actions/upload-artifact@v3 59 | with: 60 | name: dist 61 | path: ./wheelhouse/*.whl 62 | 63 | build_sdist_python_wheel: 64 | name: sdist and python wheel 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v3 68 | - uses: actions/setup-python@v4 69 | name: Install Python 70 | with: 71 | python-version: "3.9" 72 | - name: Run check-manifest 73 | run: | 74 | pip install check-manifest 75 | check-manifest -v 76 | - name: Build sdist and wheel 77 | run: | 78 | pip install --upgrade setuptools pip wheel 79 | python setup.py sdist 80 | - uses: actions/upload-artifact@v3 81 | with: 82 | name: dist 83 | path: | 84 | dist/*.tar.gz 85 | -------------------------------------------------------------------------------- /ast3/Include/token.h: -------------------------------------------------------------------------------- 1 | 2 | /* Token types */ 3 | #ifndef Py_LIMITED_API 4 | #ifndef Ta3_TOKEN_H 5 | #define Ta3_TOKEN_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ 11 | 12 | #define ENDMARKER 0 13 | #define NAME 1 14 | #define NUMBER 2 15 | #define STRING 3 16 | #define NEWLINE 4 17 | #define INDENT 5 18 | #define DEDENT 6 19 | #define LPAR 7 20 | #define RPAR 8 21 | #define LSQB 9 22 | #define RSQB 10 23 | #define COLON 11 24 | #define COMMA 12 25 | #define SEMI 13 26 | #define PLUS 14 27 | #define MINUS 15 28 | #define STAR 16 29 | #define SLASH 17 30 | #define VBAR 18 31 | #define AMPER 19 32 | #define LESS 20 33 | #define GREATER 21 34 | #define EQUAL 22 35 | #define DOT 23 36 | #define PERCENT 24 37 | #define LBRACE 25 38 | #define RBRACE 26 39 | #define EQEQUAL 27 40 | #define NOTEQUAL 28 41 | #define LESSEQUAL 29 42 | #define GREATEREQUAL 30 43 | #define TILDE 31 44 | #define CIRCUMFLEX 32 45 | #define LEFTSHIFT 33 46 | #define RIGHTSHIFT 34 47 | #define DOUBLESTAR 35 48 | #define PLUSEQUAL 36 49 | #define MINEQUAL 37 50 | #define STAREQUAL 38 51 | #define SLASHEQUAL 39 52 | #define PERCENTEQUAL 40 53 | #define AMPEREQUAL 41 54 | #define VBAREQUAL 42 55 | #define CIRCUMFLEXEQUAL 43 56 | #define LEFTSHIFTEQUAL 44 57 | #define RIGHTSHIFTEQUAL 45 58 | #define DOUBLESTAREQUAL 46 59 | #define DOUBLESLASH 47 60 | #define DOUBLESLASHEQUAL 48 61 | #define AT 49 62 | #define ATEQUAL 50 63 | #define RARROW 51 64 | #define ELLIPSIS 52 65 | /* Don't forget to update the table _Ta3Parser_TokenNames in tokenizer.c! */ 66 | #define OP 53 67 | #define AWAIT 54 68 | #define ASYNC 55 69 | #define TYPE_IGNORE 56 70 | #define TYPE_COMMENT 57 71 | #define ERRORTOKEN 58 72 | /* These aren't used by the C tokenizer but are needed for tokenize.py */ 73 | #define COMMENT 59 74 | #define NL 60 75 | #define ENCODING 61 76 | #define N_TOKENS 62 77 | 78 | /* Special definitions for cooperation with parser */ 79 | 80 | #define NT_OFFSET 256 81 | 82 | #define ISTERMINAL(x) ((x) < NT_OFFSET) 83 | #define ISNONTERMINAL(x) ((x) >= NT_OFFSET) 84 | #define ISEOF(x) ((x) == ENDMARKER) 85 | 86 | 87 | extern const char * _Ta3Parser_TokenNames[]; /* Token names */ 88 | extern int Ta3Token_OneChar(int); 89 | extern int Ta3Token_TwoChars(int, int); 90 | extern int Ta3Token_ThreeChars(int, int, int); 91 | 92 | #ifdef __cplusplus 93 | } 94 | #endif 95 | #endif /* !Ta3_TOKEN_H */ 96 | #endif /* Py_LIMITED_API */ 97 | -------------------------------------------------------------------------------- /tools/Python-asdl.patch: -------------------------------------------------------------------------------- 1 | diff --git a/ast3/Parser/Python.asdl b/ast3/Parser/Python.asdl 2 | index f470ad1..7bde99c 100644 3 | --- a/ast3/Parser/Python.asdl 4 | +++ b/ast3/Parser/Python.asdl 5 | @@ -6,17 +6,18 @@ 6 | 7 | module Python 8 | { 9 | - mod = Module(stmt* body) 10 | + mod = Module(stmt* body, type_ignore *type_ignores) 11 | | Interactive(stmt* body) 12 | | Expression(expr body) 13 | + | FunctionType(expr* argtypes, expr returns) 14 | 15 | -- not really an actual node but useful in Jython's typesystem. 16 | | Suite(stmt* body) 17 | 18 | stmt = FunctionDef(identifier name, arguments args, 19 | - stmt* body, expr* decorator_list, expr? returns) 20 | + stmt* body, expr* decorator_list, expr? returns, string? type_comment) 21 | | AsyncFunctionDef(identifier name, arguments args, 22 | - stmt* body, expr* decorator_list, expr? returns) 23 | + stmt* body, expr* decorator_list, expr? returns, string? type_comment) 24 | 25 | | ClassDef(identifier name, 26 | expr* bases, 27 | @@ -26,18 +27,18 @@ module Python 28 | | Return(expr? value) 29 | 30 | | Delete(expr* targets) 31 | - | Assign(expr* targets, expr value) 32 | + | Assign(expr* targets, expr value, string? type_comment) 33 | | AugAssign(expr target, operator op, expr value) 34 | -- 'simple' indicates that we annotate simple name without parens 35 | | AnnAssign(expr target, expr annotation, expr? value, int simple) 36 | 37 | -- use 'orelse' because else is a keyword in target languages 38 | - | For(expr target, expr iter, stmt* body, stmt* orelse) 39 | - | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse) 40 | + | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) 41 | + | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) 42 | | While(expr test, stmt* body, stmt* orelse) 43 | | If(expr test, stmt* body, stmt* orelse) 44 | - | With(withitem* items, stmt* body) 45 | - | AsyncWith(withitem* items, stmt* body) 46 | + | With(withitem* items, stmt* body, string? type_comment) 47 | + | AsyncWith(withitem* items, stmt* body, string? type_comment) 48 | 49 | | Raise(expr? exc, expr? cause) 50 | | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) 51 | @@ -118,7 +119,7 @@ module Python 52 | arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults, 53 | arg? kwarg, expr* defaults) 54 | 55 | - arg = (identifier arg, expr? annotation) 56 | + arg = (identifier arg, expr? annotation, string? type_comment) 57 | attributes (int lineno, int col_offset) 58 | 59 | -- keyword arguments supplied to call (NULL identifier for **kwargs) 60 | @@ -128,5 +129,7 @@ module Python 61 | alias = (identifier name, identifier? asname) 62 | 63 | withitem = (expr context_expr, expr? optional_vars) 64 | + 65 | + type_ignore = TypeIgnore(int lineno) 66 | } 67 | 68 | -------------------------------------------------------------------------------- /tools/parsetok.patch: -------------------------------------------------------------------------------- 1 | diff --git a/ast3/Parser/parsetok.c b/ast3/Parser/parsetok.c 2 | index 9f01a0d..5529feb 100644 3 | --- a/ast3/Parser/parsetok.c 4 | +++ b/ast3/Parser/parsetok.c 5 | @@ -177,6 +177,38 @@ warn(const char *msg, const char *filename, int lineno) 6 | #endif 7 | #endif 8 | 9 | +typedef struct { 10 | + int *items; 11 | + size_t size; 12 | + size_t num_items; 13 | +} growable_int_array; 14 | + 15 | +int growable_int_array_init(growable_int_array *arr, size_t initial_size) { 16 | + assert(initial_size > 0); 17 | + arr->items = malloc(initial_size * sizeof(*arr->items)); 18 | + arr->size = initial_size; 19 | + arr->num_items = 0; 20 | + 21 | + return arr->items != NULL; 22 | +} 23 | + 24 | +int growable_int_array_add(growable_int_array *arr, int item) { 25 | + if (arr->num_items >= arr->size) { 26 | + arr->size *= 2; 27 | + arr->items = realloc(arr->items, arr->size * sizeof(*arr->items)); 28 | + if (!arr->items) 29 | + return 0; 30 | + } 31 | + 32 | + arr->items[arr->num_items] = item; 33 | + arr->num_items++; 34 | + return 1; 35 | +} 36 | + 37 | +void growable_int_array_deallocate(growable_int_array *arr) { 38 | + free(arr->items); 39 | +} 40 | + 41 | /* Parse input coming from the given tokenizer structure. 42 | Return error code. */ 43 | 44 | @@ -188,6 +220,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, 45 | node *n; 46 | int started = 0; 47 | 48 | + growable_int_array type_ignores; 49 | + if (!growable_int_array_init(&type_ignores, 10)) { 50 | + err_ret->error = E_NOMEM; 51 | + Ta3Tokenizer_Free(tok); 52 | + return NULL; 53 | + } 54 | + 55 | if ((ps = Ta3Parser_New(g, start)) == NULL) { 56 | err_ret->error = E_NOMEM; 57 | Ta3Tokenizer_Free(tok); 58 | @@ -259,6 +298,14 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, 59 | else 60 | col_offset = -1; 61 | 62 | + if (type == TYPE_IGNORE) { 63 | + if (!growable_int_array_add(&type_ignores, tok->lineno)) { 64 | + err_ret->error = E_NOMEM; 65 | + break; 66 | + } 67 | + continue; 68 | + } 69 | + 70 | if ((err_ret->error = 71 | Ta3Parser_AddToken(ps, (int)type, str, 72 | tok->lineno, col_offset, 73 | @@ -275,6 +322,22 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, 74 | n = ps->p_tree; 75 | ps->p_tree = NULL; 76 | 77 | + if (n->n_type == file_input) { 78 | + /* Put type_ignore nodes in the ENDMARKER of file_input. */ 79 | + int num; 80 | + node *ch; 81 | + size_t i; 82 | + 83 | + num = NCH(n); 84 | + ch = CHILD(n, num - 1); 85 | + REQ(ch, ENDMARKER); 86 | + 87 | + for (i = 0; i < type_ignores.num_items; i++) { 88 | + Ta3Node_AddChild(ch, TYPE_IGNORE, NULL, type_ignores.items[i], 0); 89 | + } 90 | + } 91 | + growable_int_array_deallocate(&type_ignores); 92 | + 93 | #ifndef PGEN 94 | /* Check that the source for a single input statement really 95 | is a single statement by looking at what is left in the 96 | -------------------------------------------------------------------------------- /ast27/Parser/tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta27_TOKENIZER_H 2 | #define Ta27_TOKENIZER_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include "object.h" 8 | 9 | /* Tokenizer interface */ 10 | 11 | #include "../Include/token.h" 12 | 13 | #define MAXINDENT 100 /* Max indentation level */ 14 | 15 | /* Tokenizer state */ 16 | struct tok_state { 17 | /* Input state; buf <= cur <= inp <= end */ 18 | /* NB an entire line is held in the buffer */ 19 | char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 20 | char *cur; /* Next character in buffer */ 21 | char *inp; /* End of data in buffer */ 22 | char *end; /* End of input buffer if buf != NULL */ 23 | char *start; /* Start of current token if not NULL */ 24 | int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 25 | /* NB If done != E_OK, cur must be == inp!!! */ 26 | FILE *fp; /* Rest of input; NULL if tokenizing a string */ 27 | int tabsize; /* Tab spacing */ 28 | int indent; /* Current indentation index */ 29 | int indstack[MAXINDENT]; /* Stack of indents */ 30 | int atbol; /* Nonzero if at begin of new line */ 31 | int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 32 | char *prompt, *nextprompt; /* For interactive prompting */ 33 | int lineno; /* Current line number */ 34 | int level; /* () [] {} Parentheses nesting level */ 35 | /* Used to allow free continuations inside them */ 36 | /* Stuff for checking on different tab sizes */ 37 | const char *filename; /* For error messages */ 38 | int altwarning; /* Issue warning if alternate tabs don't match */ 39 | int alterror; /* Issue error if alternate tabs don't match */ 40 | int alttabsize; /* Alternate tab spacing */ 41 | int altindstack[MAXINDENT]; /* Stack of alternate indents */ 42 | /* Stuff for PEP 0263 */ 43 | int decoding_state; /* -1:decoding, 0:init, 1:raw */ 44 | int decoding_erred; /* whether erred in decoding */ 45 | int read_coding_spec; /* whether 'coding:...' has been read */ 46 | char *encoding; 47 | int cont_line; /* whether we are in a continuation line. */ 48 | const char* line_start; /* pointer to start of current line */ 49 | #ifndef PGEN 50 | PyObject *decoding_readline; /* codecs.open(...).readline */ 51 | PyObject *decoding_buffer; 52 | #endif 53 | const char* enc; 54 | const char* str; 55 | const char* input; /* Tokenizer's newline translated copy of the string. */ 56 | }; 57 | 58 | extern struct tok_state *Ta27Tokenizer_FromString(const char *, int); 59 | extern struct tok_state *Ta27Tokenizer_FromUTF8(const char *, int); 60 | extern struct tok_state *Ta27Tokenizer_FromFile(FILE *, char *, char *); 61 | extern void Ta27Tokenizer_Free(struct tok_state *); 62 | extern int Ta27Tokenizer_Get(struct tok_state *, char **, char **); 63 | #if defined(PGEN) || defined(Py_USING_UNICODE) 64 | extern char * Ta27Tokenizer_RestoreEncoding(struct tok_state* tok, 65 | int len, int *offset); 66 | #endif 67 | 68 | #ifdef __cplusplus 69 | } 70 | #endif 71 | #endif /* !Ta27_TOKENIZER_H */ 72 | -------------------------------------------------------------------------------- /ast3/Include/parsetok.h: -------------------------------------------------------------------------------- 1 | 2 | /* Parser-tokenizer link interface */ 3 | #ifndef Py_LIMITED_API 4 | #ifndef Ta3_PARSETOK_H 5 | #define Ta3_PARSETOK_H 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct { 11 | int error; 12 | #ifndef PGEN 13 | /* The filename is useless for pgen, see comment in tok_state structure */ 14 | PyObject *filename; 15 | #endif 16 | int lineno; 17 | int offset; 18 | char *text; /* UTF-8-encoded string */ 19 | int token; 20 | int expected; 21 | } perrdetail; 22 | 23 | #if 0 24 | #define PyPARSE_YIELD_IS_KEYWORD 0x0001 25 | #endif 26 | 27 | #define PyPARSE_DONT_IMPLY_DEDENT 0x0002 28 | 29 | #if 0 30 | #define PyPARSE_WITH_IS_KEYWORD 0x0003 31 | #define PyPARSE_PRINT_IS_FUNCTION 0x0004 32 | #define PyPARSE_UNICODE_LITERALS 0x0008 33 | #endif 34 | 35 | #define PyPARSE_IGNORE_COOKIE 0x0010 36 | #define PyPARSE_BARRY_AS_BDFL 0x0020 37 | #define PyPARSE_ASYNC_ALWAYS 0x8000 38 | 39 | extern node * Ta3Parser_ParseString(const char *, grammar *, int, 40 | perrdetail *); 41 | extern node * Ta3Parser_ParseFile (FILE *, const char *, grammar *, int, 42 | const char *, const char *, 43 | perrdetail *); 44 | 45 | extern node * Ta3Parser_ParseStringFlags(const char *, grammar *, int, 46 | perrdetail *, int); 47 | extern node * Ta3Parser_ParseFileFlags( 48 | FILE *fp, 49 | const char *filename, /* decoded from the filesystem encoding */ 50 | const char *enc, 51 | grammar *g, 52 | int start, 53 | const char *ps1, 54 | const char *ps2, 55 | perrdetail *err_ret, 56 | int flags); 57 | extern node * Ta3Parser_ParseFileFlagsEx( 58 | FILE *fp, 59 | const char *filename, /* decoded from the filesystem encoding */ 60 | const char *enc, 61 | grammar *g, 62 | int start, 63 | const char *ps1, 64 | const char *ps2, 65 | perrdetail *err_ret, 66 | int *flags); 67 | extern node * Ta3Parser_ParseFileObject( 68 | FILE *fp, 69 | PyObject *filename, 70 | const char *enc, 71 | grammar *g, 72 | int start, 73 | const char *ps1, 74 | const char *ps2, 75 | perrdetail *err_ret, 76 | int *flags); 77 | 78 | extern node * Ta3Parser_ParseStringFlagsFilename( 79 | const char *s, 80 | const char *filename, /* decoded from the filesystem encoding */ 81 | grammar *g, 82 | int start, 83 | perrdetail *err_ret, 84 | int flags); 85 | extern node * Ta3Parser_ParseStringFlagsFilenameEx( 86 | const char *s, 87 | const char *filename, /* decoded from the filesystem encoding */ 88 | grammar *g, 89 | int start, 90 | perrdetail *err_ret, 91 | int *flags); 92 | extern node * Ta3Parser_ParseStringObject( 93 | const char *s, 94 | PyObject *filename, 95 | grammar *g, 96 | int start, 97 | perrdetail *err_ret, 98 | int *flags); 99 | 100 | /* Note that the following functions are defined in pythonrun.c, 101 | not in parsetok.c */ 102 | extern void PyParser_SetError(perrdetail *); 103 | extern void PyParser_ClearError(perrdetail *); 104 | 105 | #ifdef __cplusplus 106 | } 107 | #endif 108 | #endif /* !Ta3_PARSETOK_H */ 109 | #endif /* !Py_LIMITED_API */ 110 | -------------------------------------------------------------------------------- /ast3/Parser/tokenizer.h: -------------------------------------------------------------------------------- 1 | #ifndef Ta3_TOKENIZER_H 2 | #define Ta3_TOKENIZER_H 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #include "object.h" 8 | 9 | /* Tokenizer interface */ 10 | 11 | #include "../Include/token.h" 12 | 13 | #define MAXINDENT 100 /* Max indentation level */ 14 | 15 | enum decoding_state { 16 | STATE_INIT, 17 | STATE_RAW, 18 | STATE_NORMAL /* have a codec associated with input */ 19 | }; 20 | 21 | /* Tokenizer state */ 22 | struct tok_state { 23 | /* Input state; buf <= cur <= inp <= end */ 24 | /* NB an entire line is held in the buffer */ 25 | char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 26 | char *cur; /* Next character in buffer */ 27 | char *inp; /* End of data in buffer */ 28 | char *end; /* End of input buffer if buf != NULL */ 29 | char *start; /* Start of current token if not NULL */ 30 | int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 31 | /* NB If done != E_OK, cur must be == inp!!! */ 32 | FILE *fp; /* Rest of input; NULL if tokenizing a string */ 33 | int tabsize; /* Tab spacing */ 34 | int indent; /* Current indentation index */ 35 | int indstack[MAXINDENT]; /* Stack of indents */ 36 | int atbol; /* Nonzero if at begin of new line */ 37 | int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 38 | const char *prompt, *nextprompt; /* For interactive prompting */ 39 | int lineno; /* Current line number */ 40 | int level; /* () [] {} Parentheses nesting level */ 41 | /* Used to allow free continuations inside them */ 42 | /* Stuff for checking on different tab sizes */ 43 | #ifndef PGEN 44 | /* pgen doesn't have access to Python codecs, it cannot decode the input 45 | filename. The bytes filename might be kept, but it is only used by 46 | indenterror() and it is not really needed: pgen only compiles one file 47 | (Grammar/Grammar). */ 48 | PyObject *filename; 49 | #endif 50 | int altindstack[MAXINDENT]; /* Stack of alternate indents */ 51 | /* Stuff for PEP 0263 */ 52 | enum decoding_state decoding_state; 53 | int decoding_erred; /* whether erred in decoding */ 54 | int read_coding_spec; /* whether 'coding:...' has been read */ 55 | char *encoding; /* Source encoding. */ 56 | int cont_line; /* whether we are in a continuation line. */ 57 | const char* line_start; /* pointer to start of current line */ 58 | #ifndef PGEN 59 | PyObject *decoding_readline; /* open(...).readline */ 60 | PyObject *decoding_buffer; 61 | #endif 62 | const char* enc; /* Encoding for the current str. */ 63 | const char* str; 64 | const char* input; /* Tokenizer's newline translated copy of the string. */ 65 | 66 | /* async/await related fields; can be removed in 3.7 when async and await 67 | become normal keywords. */ 68 | int async_def; /* =1 if tokens are inside an 'async def' body. */ 69 | int async_def_indent; /* Indentation level of the outermost 'async def'. */ 70 | int async_def_nl; /* =1 if the outermost 'async def' had at least one 71 | NEWLINE token after it. */ 72 | int async_always; /* =1 if async/await are always keywords */ 73 | }; 74 | 75 | extern struct tok_state *Ta3Tokenizer_FromString(const char *, int); 76 | extern struct tok_state *Ta3Tokenizer_FromUTF8(const char *, int); 77 | extern struct tok_state *Ta3Tokenizer_FromFile(FILE *, const char*, 78 | const char *, const char *); 79 | extern void Ta3Tokenizer_Free(struct tok_state *); 80 | extern int Ta3Tokenizer_Get(struct tok_state *, char **, char **); 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | #endif /* !Ta3_TOKENIZER_H */ 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # End of life 2 | 3 | This project is no longer maintained. 4 | 5 | Use the standard library `ast` module instead. 6 | See https://github.com/python/typed_ast/issues/179. 7 | 8 | # Typed AST 9 | 10 | [![Build Status](https://travis-ci.org/python/typed_ast.svg?branch=master)](https://travis-ci.org/python/typed_ast) 11 | [![Chat at https://gitter.im/python/typed_ast](https://badges.gitter.im/python/typed_ast.svg)](https://gitter.im/python/typed_ast) 12 | 13 | `typed_ast` is a Python 3 package that provides a Python 2.7 and Python 3 14 | parser similar to the standard `ast` library. Unlike `ast` up to Python 3.7, the parsers in 15 | `typed_ast` include [PEP 484](https://www.python.org/dev/peps/pep-0484/) type 16 | comments and are independent of the version of Python under which they are run. 17 | The `typed_ast` parsers produce the standard Python AST (plus type comments), 18 | and are both fast and correct, as they are based on the CPython 2.7 and 3.7 19 | parsers. `typed_ast` runs on CPython 3.6-3.10 on Linux, OS X and Windows. 20 | 21 | **Note:** Starting with Python 3.8, we recommend to use the native `ast` parser 22 | (see below). 23 | 24 | ## Development Philosophy 25 | 26 | This project is a (mostly) drop-in replacement for the builtin `ast` module. It is 27 | intended to be bug-for-bug compatible and behave identically, except for the 28 | presence of a few additional fields on the returned classes and a few 29 | additional optional arguments to the `parse` call. Therefore, `typed_ast` will 30 | not accept any bugfixes for bugs in `ast` -- they should be fixed upstream 31 | instead. To avoid feature bloat, any new features for `typed_ast` should have 32 | the potential to be broadly useful and not be built just for one niche usecase 33 | or in a manner such that only one project can use them. 34 | 35 | ### Incompatibilities 36 | 37 | For the purposes of *consuming* syntax trees, this should be a drop-in replacement. 38 | It is not a drop-in replacement for users that wish to create or transform ASTs, 39 | as a number of syntax tree classes have additional fields that must be populated 40 | when constructing them. 41 | 42 | Due to reliance on certain C APIs, this library does not build on and there 43 | are [no plans to support PyPy](https://github.com/python/typed_ast/issues/111). 44 | 45 | ### Python 3.8 46 | 47 | `typed_ast` will not be updated to support parsing Python 3.8 and 48 | newer. Instead, it is recommended to use the stdlib `ast` module 49 | there, which has been augmented to support extracting type comments 50 | and has limited support for parsing older versions of Python 3. 51 | 52 | ## Submodules 53 | ### ast3 54 | The `ast3` parser produces the AST from a Python 3 code, up to Python 3.7. 55 | (For rationale and technical 56 | details, see [here](update_process.md).) The AST it currently produces is described in 57 | [ast3/Parser/Python.asdl](ast3/Parser/Python.asdl). If you wish to limit 58 | parsing to older versions of Python 3, `ast3` can be configured to to give a 59 | SyntaxError for new syntax features introduced beyond a given Python version. 60 | For more information, see the module docstring in 61 | [typed\_ast/ast3.py](typed_ast/ast3.py). 62 | 63 | ### ast27 64 | The `ast27` parser tracks the standard Python 2.7 AST, which is expected to 65 | never receive further updates. The AST it produces is described in 66 | [ast27/Parser/Python.asdl](ast27/Parser/Python.asdl). For more information, 67 | see the module docstring in [typed\_ast/ast27.py](typed_ast/ast27.py). 68 | 69 | ### conversions 70 | `typed_ast` also provides a `conversions` module which converts `ast27` ASTs 71 | into `ast3` ASTs. This functionality is somewhat experimental, however. For 72 | more information, see the `py2to3` docstring in 73 | [typed\_ast/conversions](typed_ast/conversions.py). 74 | 75 | 76 | Note: as these parsers consider type comments part of the grammar, incorrectly 77 | placed type comments are considered syntax errors. 78 | 79 | ## Releases 80 | 81 | To make a new `typed_ast` release, see [`release_process.md`](release_process.md). 82 | -------------------------------------------------------------------------------- /ast27/Parser/acceler.c: -------------------------------------------------------------------------------- 1 | 2 | /* Parser accelerator module */ 3 | 4 | /* The parser as originally conceived had disappointing performance. 5 | This module does some precomputation that speeds up the selection 6 | of a DFA based upon a token, turning a search through an array 7 | into a simple indexing operation. The parser now cannot work 8 | without the accelerators installed. Note that the accelerators 9 | are installed dynamically when the parser is initialized, they 10 | are not part of the static data structure written on graminit.[ch] 11 | by the parser generator. */ 12 | 13 | #include "../Include/pgenheaders.h" 14 | #include "../Include/grammar.h" 15 | #include "../Include/node.h" 16 | #include "../Include/token.h" 17 | #include "parser.h" 18 | 19 | /* Forward references */ 20 | static void fixdfa(grammar *, dfa *); 21 | static void fixstate(grammar *, state *); 22 | 23 | void 24 | Ta27Grammar_AddAccelerators(grammar *g) 25 | { 26 | dfa *d; 27 | int i; 28 | d = g->g_dfa; 29 | for (i = g->g_ndfas; --i >= 0; d++) 30 | fixdfa(g, d); 31 | g->g_accel = 1; 32 | } 33 | 34 | void 35 | Ta27Grammar_RemoveAccelerators(grammar *g) 36 | { 37 | dfa *d; 38 | int i; 39 | g->g_accel = 0; 40 | d = g->g_dfa; 41 | for (i = g->g_ndfas; --i >= 0; d++) { 42 | state *s; 43 | int j; 44 | s = d->d_state; 45 | for (j = 0; j < d->d_nstates; j++, s++) { 46 | if (s->s_accel) 47 | PyObject_FREE(s->s_accel); 48 | s->s_accel = NULL; 49 | } 50 | } 51 | } 52 | 53 | static void 54 | fixdfa(grammar *g, dfa *d) 55 | { 56 | state *s; 57 | int j; 58 | s = d->d_state; 59 | for (j = 0; j < d->d_nstates; j++, s++) 60 | fixstate(g, s); 61 | } 62 | 63 | static void 64 | fixstate(grammar *g, state *s) 65 | { 66 | arc *a; 67 | int k; 68 | int *accel; 69 | int nl = g->g_ll.ll_nlabels; 70 | s->s_accept = 0; 71 | accel = (int *) PyObject_MALLOC(nl * sizeof(int)); 72 | if (accel == NULL) { 73 | fprintf(stderr, "no mem to build parser accelerators\n"); 74 | exit(1); 75 | } 76 | for (k = 0; k < nl; k++) 77 | accel[k] = -1; 78 | a = s->s_arc; 79 | for (k = s->s_narcs; --k >= 0; a++) { 80 | int lbl = a->a_lbl; 81 | label *l = &g->g_ll.ll_label[lbl]; 82 | int type = l->lb_type; 83 | if (a->a_arrow >= (1 << 7)) { 84 | printf("XXX too many states!\n"); 85 | continue; 86 | } 87 | if (ISNONTERMINAL(type)) { 88 | dfa *d1 = Ta27Grammar_FindDFA(g, type); 89 | int ibit; 90 | if (type - NT_OFFSET >= (1 << 7)) { 91 | printf("XXX too high nonterminal number!\n"); 92 | continue; 93 | } 94 | for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { 95 | if (testbit(d1->d_first, ibit)) { 96 | if (accel[ibit] != -1) 97 | printf("XXX ambiguity!\n"); 98 | accel[ibit] = a->a_arrow | (1 << 7) | 99 | ((type - NT_OFFSET) << 8); 100 | } 101 | } 102 | } 103 | else if (lbl == EMPTY) 104 | s->s_accept = 1; 105 | else if (lbl >= 0 && lbl < nl) 106 | accel[lbl] = a->a_arrow; 107 | } 108 | while (nl > 0 && accel[nl-1] == -1) 109 | nl--; 110 | for (k = 0; k < nl && accel[k] == -1;) 111 | k++; 112 | if (k < nl) { 113 | int i; 114 | s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); 115 | if (s->s_accel == NULL) { 116 | fprintf(stderr, "no mem to add parser accelerators\n"); 117 | exit(1); 118 | } 119 | s->s_lower = k; 120 | s->s_upper = nl; 121 | for (i = 0; k < nl; i++, k++) 122 | s->s_accel[i] = accel[k]; 123 | } 124 | PyObject_FREE(accel); 125 | } 126 | -------------------------------------------------------------------------------- /ast3/Parser/acceler.c: -------------------------------------------------------------------------------- 1 | 2 | /* Parser accelerator module */ 3 | 4 | /* The parser as originally conceived had disappointing performance. 5 | This module does some precomputation that speeds up the selection 6 | of a DFA based upon a token, turning a search through an array 7 | into a simple indexing operation. The parser now cannot work 8 | without the accelerators installed. Note that the accelerators 9 | are installed dynamically when the parser is initialized, they 10 | are not part of the static data structure written on graminit.[ch] 11 | by the parser generator. */ 12 | 13 | #include "../Include/pgenheaders.h" 14 | #include "../Include/grammar.h" 15 | #include "../Include/node.h" 16 | #include "../Include/token.h" 17 | #include "parser.h" 18 | 19 | /* Forward references */ 20 | static void fixdfa(grammar *, dfa *); 21 | static void fixstate(grammar *, state *); 22 | 23 | void 24 | Ta3Grammar_AddAccelerators(grammar *g) 25 | { 26 | dfa *d; 27 | int i; 28 | d = g->g_dfa; 29 | for (i = g->g_ndfas; --i >= 0; d++) 30 | fixdfa(g, d); 31 | g->g_accel = 1; 32 | } 33 | 34 | void 35 | Ta3Grammar_RemoveAccelerators(grammar *g) 36 | { 37 | dfa *d; 38 | int i; 39 | g->g_accel = 0; 40 | d = g->g_dfa; 41 | for (i = g->g_ndfas; --i >= 0; d++) { 42 | state *s; 43 | int j; 44 | s = d->d_state; 45 | for (j = 0; j < d->d_nstates; j++, s++) { 46 | if (s->s_accel) 47 | PyObject_FREE(s->s_accel); 48 | s->s_accel = NULL; 49 | } 50 | } 51 | } 52 | 53 | static void 54 | fixdfa(grammar *g, dfa *d) 55 | { 56 | state *s; 57 | int j; 58 | s = d->d_state; 59 | for (j = 0; j < d->d_nstates; j++, s++) 60 | fixstate(g, s); 61 | } 62 | 63 | static void 64 | fixstate(grammar *g, state *s) 65 | { 66 | arc *a; 67 | int k; 68 | int *accel; 69 | int nl = g->g_ll.ll_nlabels; 70 | s->s_accept = 0; 71 | accel = (int *) PyObject_MALLOC(nl * sizeof(int)); 72 | if (accel == NULL) { 73 | fprintf(stderr, "no mem to build parser accelerators\n"); 74 | exit(1); 75 | } 76 | for (k = 0; k < nl; k++) 77 | accel[k] = -1; 78 | a = s->s_arc; 79 | for (k = s->s_narcs; --k >= 0; a++) { 80 | int lbl = a->a_lbl; 81 | label *l = &g->g_ll.ll_label[lbl]; 82 | int type = l->lb_type; 83 | if (a->a_arrow >= (1 << 7)) { 84 | printf("XXX too many states!\n"); 85 | continue; 86 | } 87 | if (ISNONTERMINAL(type)) { 88 | dfa *d1 = Ta3Grammar_FindDFA(g, type); 89 | int ibit; 90 | if (type - NT_OFFSET >= (1 << 7)) { 91 | printf("XXX too high nonterminal number!\n"); 92 | continue; 93 | } 94 | for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { 95 | if (testbit(d1->d_first, ibit)) { 96 | if (accel[ibit] != -1) 97 | printf("XXX ambiguity!\n"); 98 | accel[ibit] = a->a_arrow | (1 << 7) | 99 | ((type - NT_OFFSET) << 8); 100 | } 101 | } 102 | } 103 | else if (lbl == EMPTY) 104 | s->s_accept = 1; 105 | else if (lbl >= 0 && lbl < nl) 106 | accel[lbl] = a->a_arrow; 107 | } 108 | while (nl > 0 && accel[nl-1] == -1) 109 | nl--; 110 | for (k = 0; k < nl && accel[k] == -1;) 111 | k++; 112 | if (k < nl) { 113 | int i; 114 | s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); 115 | if (s->s_accel == NULL) { 116 | fprintf(stderr, "no mem to add parser accelerators\n"); 117 | exit(1); 118 | } 119 | s->s_lower = k; 120 | s->s_upper = nl; 121 | for (i = 0; k < nl; i++, k++) 122 | s->s_accel[i] = accel[k]; 123 | } 124 | PyObject_FREE(accel); 125 | } 126 | -------------------------------------------------------------------------------- /tools/Grammar.patch: -------------------------------------------------------------------------------- 1 | diff --git a/ast3/Grammar/Grammar b/ast3/Grammar/Grammar 2 | index b139e9f..dfd730f 100644 3 | --- a/ast3/Grammar/Grammar 4 | +++ b/ast3/Grammar/Grammar 5 | @@ -14,7 +14,10 @@ 6 | # single_input is a single interactive statement; 7 | # file_input is a module or sequence of commands read from an input file; 8 | # eval_input is the input for the eval() functions. 9 | +# func_type_input is a PEP 484 Python 2 function type comment 10 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 11 | +# NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a 12 | +# NEWLINE 13 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 14 | file_input: (NEWLINE | stmt)* ENDMARKER 15 | eval_input: testlist NEWLINE* ENDMARKER 16 | @@ -24,14 +27,14 @@ decorators: decorator+ 17 | decorated: decorators (classdef | funcdef | async_funcdef) 18 | 19 | async_funcdef: ASYNC funcdef 20 | -funcdef: 'def' NAME parameters ['->' test] ':' suite 21 | +funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite 22 | 23 | parameters: '(' [typedargslist] ')' 24 | -typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [ 25 | - '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 26 | - | '**' tfpdef [',']]] 27 | - | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]] 28 | - | '**' tfpdef [',']) 29 | +typedargslist: (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ 30 | + '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) 31 | + | '**' tfpdef [','] [TYPE_COMMENT]]]) 32 | + | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) 33 | + | '**' tfpdef [','] [TYPE_COMMENT]) 34 | tfpdef: NAME [':' test] 35 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ 36 | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 37 | @@ -46,7 +49,7 @@ simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 38 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 39 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 40 | expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | 41 | - ('=' (yield_expr|testlist_star_expr))*) 42 | + ('=' (yield_expr|testlist_star_expr))* [TYPE_COMMENT]) 43 | annassign: ':' test ['=' test] 44 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 45 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | 46 | @@ -78,17 +81,18 @@ compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef 47 | async_stmt: ASYNC (funcdef | with_stmt | for_stmt) 48 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 49 | while_stmt: 'while' test ':' suite ['else' ':' suite] 50 | -for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 51 | +for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] 52 | try_stmt: ('try' ':' suite 53 | ((except_clause ':' suite)+ 54 | ['else' ':' suite] 55 | ['finally' ':' suite] | 56 | 'finally' ':' suite)) 57 | -with_stmt: 'with' with_item (',' with_item)* ':' suite 58 | +with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite 59 | with_item: test ['as' expr] 60 | # NB compile.c makes sure that the default except clause is last 61 | except_clause: 'except' [test ['as' NAME]] 62 | -suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 63 | +# the TYPE_COMMENT in suites is only parsed for funcdefs, but can't go elsewhere due to ambiguity 64 | +suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT 65 | 66 | test: or_test ['if' or_test 'else' test] | lambdef 67 | test_nocond: or_test | lambdef_nocond 68 | @@ -154,3 +158,10 @@ encoding_decl: NAME 69 | 70 | yield_expr: 'yield' [yield_arg] 71 | yield_arg: 'from' test | testlist 72 | + 73 | +func_type_input: func_type NEWLINE* ENDMARKER 74 | +func_type: '(' [typelist] ')' '->' test 75 | +# typelist is a modified typedargslist (see above) 76 | +typelist: (test (',' test)* [',' 77 | + ['*' [test] (',' test)* [',' '**' test] | '**' test]] 78 | + | '*' [test] (',' test)* [',' '**' test] | '**' test) 79 | -------------------------------------------------------------------------------- /tools/asdl_c.patch: -------------------------------------------------------------------------------- 1 | --- /Users/guido/src/cpython37/Parser/asdl_c.py 2018-09-10 08:18:23.000000000 -0700 2 | +++ ast3/Parser/asdl_c.py 2019-01-15 16:13:24.000000000 -0800 3 | @@ -270,9 +270,9 @@ 4 | margs = "a0" 5 | for i in range(1, len(args)+1): 6 | margs += ", a%d" % i 7 | - self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0, 8 | + self.emit("#define %s(%s) _Ta3_%s(%s)" % (name, margs, name, margs), 0, 9 | reflow=False) 10 | - self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False) 11 | + self.emit("%s _Ta3_%s(%s);" % (ctype, name, argstr), False) 12 | 13 | def visitProduct(self, prod, name): 14 | self.emit_function(name, get_c_type(name), 15 | @@ -531,9 +531,9 @@ 16 | self.emit("}", depth+1) 17 | self.emit("len = PyList_GET_SIZE(tmp);", depth+1) 18 | if self.isSimpleType(field): 19 | - self.emit("%s = _Py_asdl_int_seq_new(len, arena);" % field.name, depth+1) 20 | + self.emit("%s = _Ta3_asdl_int_seq_new(len, arena);" % field.name, depth+1) 21 | else: 22 | - self.emit("%s = _Py_asdl_seq_new(len, arena);" % field.name, depth+1) 23 | + self.emit("%s = _Ta3_asdl_seq_new(len, arena);" % field.name, depth+1) 24 | self.emit("if (%s == NULL) goto failed;" % field.name, depth+1) 25 | self.emit("for (i = 0; i < len; i++) {", depth+1) 26 | self.emit("%s val;" % ctype, depth+2) 27 | @@ -729,8 +729,8 @@ 28 | }; 29 | 30 | static PyTypeObject AST_type = { 31 | - PyVarObject_HEAD_INIT(&PyType_Type, 0) 32 | - "_ast.AST", 33 | + PyVarObject_HEAD_INIT(NULL, 0) 34 | + "_ast3.AST", 35 | sizeof(AST_object), 36 | 0, 37 | (destructor)ast_dealloc, /* tp_dealloc */ 38 | @@ -774,7 +774,7 @@ 39 | static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields) 40 | { 41 | _Py_IDENTIFIER(__module__); 42 | - _Py_IDENTIFIER(_ast); 43 | + _Py_IDENTIFIER(_ast3); 44 | PyObject *fnames, *result; 45 | int i; 46 | fnames = PyTuple_New(num_fields); 47 | @@ -791,7 +791,7 @@ 48 | type, base, 49 | _PyUnicode_FromId(&PyId__fields), fnames, 50 | _PyUnicode_FromId(&PyId___module__), 51 | - _PyUnicode_FromId(&PyId__ast)); 52 | + _PyUnicode_FromId(&PyId__ast3)); 53 | Py_DECREF(fnames); 54 | return (PyTypeObject*)result; 55 | } 56 | @@ -1010,11 +1010,16 @@ 57 | class ASTModuleVisitor(PickleVisitor): 58 | 59 | def visitModule(self, mod): 60 | + self.emit("PyObject *ast3_parse(PyObject *self, PyObject *args);", 0) 61 | + self.emit("static PyMethodDef ast3_methods[] = {", 0) 62 | + self.emit(' {"_parse", ast3_parse, METH_VARARGS, "Parse string into typed AST."},', 0) 63 | + self.emit(" {NULL, NULL, 0, NULL}", 0) 64 | + self.emit("};", 0) 65 | self.emit("static struct PyModuleDef _astmodule = {", 0) 66 | - self.emit(' PyModuleDef_HEAD_INIT, "_ast"', 0) 67 | + self.emit(' PyModuleDef_HEAD_INIT, "_ast3", NULL, 0, ast3_methods', 0) 68 | self.emit("};", 0) 69 | self.emit("PyMODINIT_FUNC", 0) 70 | - self.emit("PyInit__ast(void)", 0) 71 | + self.emit("PyInit__ast3(void)", 0) 72 | self.emit("{", 0) 73 | self.emit("PyObject *m, *d;", 1) 74 | self.emit("if (!init_types()) return NULL;", 1) 75 | @@ -1199,7 +1204,7 @@ 76 | class PartingShots(StaticVisitor): 77 | 78 | CODE = """ 79 | -PyObject* PyAST_mod2obj(mod_ty t) 80 | +PyObject* Ta3AST_mod2obj(mod_ty t) 81 | { 82 | if (!init_types()) 83 | return NULL; 84 | @@ -1207,7 +1212,7 @@ 85 | } 86 | 87 | /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */ 88 | -mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode) 89 | +mod_ty Ta3AST_obj2mod(PyObject* ast, PyArena* arena, int mode) 90 | { 91 | mod_ty res; 92 | PyObject *req_type[3]; 93 | @@ -1237,7 +1242,7 @@ 94 | return res; 95 | } 96 | 97 | -int PyAST_Check(PyObject* obj) 98 | +int Ta3AST_Check(PyObject* obj) 99 | { 100 | if (!init_types()) 101 | return -1; 102 | @@ -1276,9 +1281,9 @@ 103 | PrototypeVisitor(f), 104 | ) 105 | c.visit(mod) 106 | - f.write("PyObject* PyAST_mod2obj(mod_ty t);\n") 107 | - f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n") 108 | - f.write("int PyAST_Check(PyObject* obj);\n") 109 | + f.write("PyObject* Ta3AST_mod2obj(mod_ty t);\n") 110 | + f.write("mod_ty Ta3AST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n") 111 | + f.write("int Ta3AST_Check(PyObject* obj);\n") 112 | 113 | if C_FILE: 114 | with open(C_FILE, "w") as f: 115 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import re 3 | import sys 4 | if sys.version_info[0] < 3 or sys.version_info[1] < 3: 5 | sys.exit('Error: typed_ast only runs on Python 3.3 and above.') 6 | 7 | try: 8 | from setuptools import setup, Extension 9 | except ImportError: 10 | from distutils.core import setup, Extension 11 | 12 | _ast27 = Extension( 13 | '_ast27', 14 | include_dirs = ['ast27/Include'], 15 | sources = [ 16 | 'ast27/Parser/acceler.c', 17 | 'ast27/Parser/bitset.c', 18 | 'ast27/Parser/grammar.c', 19 | 'ast27/Parser/grammar1.c', 20 | 'ast27/Parser/node.c', 21 | 'ast27/Parser/parser.c', 22 | 'ast27/Parser/parsetok.c', 23 | 'ast27/Parser/tokenizer.c', 24 | 'ast27/Python/asdl.c', 25 | 'ast27/Python/ast.c', 26 | 'ast27/Python/graminit.c', 27 | 'ast27/Python/mystrtoul.c', 28 | 'ast27/Python/Python-ast.c', 29 | 'ast27/Custom/typed_ast.c', 30 | ], 31 | depends = [ 32 | 'ast27/Include/asdl.h', 33 | 'ast27/Include/ast.h', 34 | 'ast27/Include/bitset.h', 35 | 'ast27/Include/compile.h', 36 | 'ast27/Include/errcode.h', 37 | 'ast27/Include/graminit.h', 38 | 'ast27/Include/grammar.h', 39 | 'ast27/Include/node.h', 40 | 'ast27/Include/parsetok.h', 41 | 'ast27/Include/pgenheaders.h', 42 | 'ast27/Include/Python-ast.h', 43 | 'ast27/Include/token.h', 44 | 'ast27/Parser/parser.h', 45 | 'ast27/Parser/tokenizer.h', 46 | ]) 47 | 48 | 49 | _ast3 = Extension( 50 | '_ast3', 51 | include_dirs = ['ast3/Include'], 52 | sources = [ 53 | 'ast3/Parser/acceler.c', 54 | 'ast3/Parser/bitset.c', 55 | 'ast3/Parser/grammar.c', 56 | 'ast3/Parser/grammar1.c', 57 | 'ast3/Parser/node.c', 58 | 'ast3/Parser/parser.c', 59 | 'ast3/Parser/parsetok.c', 60 | 'ast3/Parser/tokenizer.c', 61 | 'ast3/Python/asdl.c', 62 | 'ast3/Python/ast.c', 63 | 'ast3/Python/graminit.c', 64 | 'ast3/Python/Python-ast.c', 65 | 'ast3/Custom/typed_ast.c', 66 | ], 67 | depends = [ 68 | 'ast3/Include/asdl.h', 69 | 'ast3/Include/ast.h', 70 | 'ast3/Include/bitset.h', 71 | 'ast3/Include/compile-ast3.h', 72 | 'ast3/Include/errcode.h', 73 | 'ast3/Include/graminit.h', 74 | 'ast3/Include/grammar.h', 75 | 'ast3/Include/node.h', 76 | 'ast3/Include/parsetok.h', 77 | 'ast3/Include/pgenheaders.h', 78 | 'ast3/Include/Python-ast.h', 79 | 'ast3/Include/token.h', 80 | 'ast3/Parser/parser.h', 81 | 'ast3/Parser/tokenizer.h', 82 | ]) 83 | 84 | long_description = """ 85 | =========== 86 | End of life 87 | =========== 88 | 89 | This project is no longer maintained. 90 | 91 | Use the standard library `ast` module instead. 92 | See https://github.com/python/typed_ast/issues/179. 93 | 94 | =========== 95 | Description 96 | =========== 97 | 98 | `typed_ast` is a Python 3 package that provides a Python 2.7 and Python 3 99 | parser similar to the standard `ast` library. Unlike `ast` below Python 3.8, 100 | the parsers in 101 | `typed_ast` include PEP 484 type comments and are independent of the version of 102 | Python under which they are run. The `typed_ast` parsers produce the standard 103 | Python AST (plus type comments), and are both fast and correct, as they are 104 | based on the CPython 2.7 and 3.7 parsers. 105 | 106 | **Note:** The `ast` module of Python 3.8+ supports all features of `typed_ast`. 107 | `typed_ast` does not support parsing code that uses syntax introduced in 108 | Python 3.8 onwards. 109 | We recommend using `ast` on Python 3.8 or above. 110 | """.strip() 111 | 112 | _version_re = re.compile(r'__version__\s+=\s+(?P.*)') 113 | 114 | with open('typed_ast/__init__.py', 'r', encoding='utf8') as f: 115 | version = _version_re.search(f.read()).group('version') 116 | version = str(ast.literal_eval(version)) 117 | 118 | setup (name = 'typed_ast', 119 | version = version, 120 | description = 'a fork of Python 2 and 3 ast modules with type comment support', 121 | long_description = long_description, 122 | author = 'David Fisher', 123 | url = 'https://github.com/python/typed_ast', 124 | license='Apache License 2.0', 125 | platforms = ['POSIX', 'Windows'], 126 | classifiers = [ 127 | 'Development Status :: 7 - Inactive', 128 | 'Environment :: Console', 129 | 'Intended Audience :: Developers', 130 | 'Operating System :: POSIX', 131 | 'Operating System :: Microsoft', 132 | 'Programming Language :: Python :: 3.6', 133 | 'Programming Language :: Python :: 3.7', 134 | 'Programming Language :: Python :: 3.8', 135 | 'Programming Language :: Python :: 3.9', 136 | 'Programming Language :: Python :: 3.10', 137 | 'Programming Language :: Python :: 3.11', 138 | 'Topic :: Software Development', 139 | ], 140 | python_requires=">=3.6", 141 | packages = ['typed_ast', 'typed_ast.tests'], 142 | package_dir={ 'typed_ast.tests': 'ast3/tests' }, 143 | ext_package='typed_ast', 144 | ext_modules = [_ast27, _ast3]) 145 | -------------------------------------------------------------------------------- /ast27/Parser/node.c: -------------------------------------------------------------------------------- 1 | /* Parse tree node implementation */ 2 | 3 | #include "Python.h" 4 | #include "../Include/node.h" 5 | #include "../Include/errcode.h" 6 | 7 | node * 8 | Ta27Node_New(int type) 9 | { 10 | node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); 11 | if (n == NULL) 12 | return NULL; 13 | n->n_type = type; 14 | n->n_str = NULL; 15 | n->n_lineno = 0; 16 | n->n_nchildren = 0; 17 | n->n_child = NULL; 18 | return n; 19 | } 20 | 21 | /* See comments at XXXROUNDUP below. Returns -1 on overflow. */ 22 | static int 23 | fancy_roundup(int n) 24 | { 25 | /* Round up to the closest power of 2 >= n. */ 26 | int result = 256; 27 | assert(n > 128); 28 | while (result < n) { 29 | result <<= 1; 30 | if (result <= 0) 31 | return -1; 32 | } 33 | return result; 34 | } 35 | 36 | /* A gimmick to make massive numbers of reallocs quicker. The result is 37 | * a number >= the input. In Ta27Node_AddChild, it's used like so, when 38 | * we're about to add child number current_size + 1: 39 | * 40 | * if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1): 41 | * allocate space for XXXROUNDUP(current_size + 1) total children 42 | * else: 43 | * we already have enough space 44 | * 45 | * Since a node starts out empty, we must have 46 | * 47 | * XXXROUNDUP(0) < XXXROUNDUP(1) 48 | * 49 | * so that we allocate space for the first child. One-child nodes are very 50 | * common (presumably that would change if we used a more abstract form 51 | * of syntax tree), so to avoid wasting memory it's desirable that 52 | * XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0. 53 | * 54 | * Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4? 55 | * Rounding up to a multiple of an exact power of 2 is very efficient, and 56 | * most nodes with more than one child have <= 4 kids. 57 | * 58 | * Else we call fancy_roundup() to grow proportionately to n. We've got an 59 | * extreme case then (like test_longexp.py), and on many platforms doing 60 | * anything less than proportional growth leads to exorbitant runtime 61 | * (e.g., MacPython), or extreme fragmentation of user address space (e.g., 62 | * Win98). 63 | * 64 | * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre 65 | * reported that, with this scheme, 89% of PyObject_REALLOC calls in 66 | * Ta27Node_AddChild passed 1 for the size, and 9% passed 4. So this usually 67 | * wastes very little memory, but is very effective at sidestepping 68 | * platform-realloc disasters on vulnerable platforms. 69 | * 70 | * Note that this would be straightforward if a node stored its current 71 | * capacity. The code is tricky to avoid that. 72 | */ 73 | #define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ 74 | (n) <= 128 ? (((n) + 3) & ~3) : \ 75 | fancy_roundup(n)) 76 | 77 | 78 | int 79 | Ta27Node_AddChild(register node *n1, int type, char *str, int lineno, int col_offset) 80 | { 81 | const int nch = n1->n_nchildren; 82 | int current_capacity; 83 | int required_capacity; 84 | node *n; 85 | 86 | if (nch == INT_MAX || nch < 0) 87 | return E_OVERFLOW; 88 | 89 | current_capacity = XXXROUNDUP(nch); 90 | required_capacity = XXXROUNDUP(nch + 1); 91 | if (current_capacity < 0 || required_capacity < 0) 92 | return E_OVERFLOW; 93 | if (current_capacity < required_capacity) { 94 | if ((size_t)required_capacity > PY_SIZE_MAX / sizeof(node)) { 95 | return E_NOMEM; 96 | } 97 | n = n1->n_child; 98 | n = (node *) PyObject_REALLOC(n, 99 | required_capacity * sizeof(node)); 100 | if (n == NULL) 101 | return E_NOMEM; 102 | n1->n_child = n; 103 | } 104 | 105 | n = &n1->n_child[n1->n_nchildren++]; 106 | n->n_type = type; 107 | n->n_str = str; 108 | n->n_lineno = lineno; 109 | n->n_col_offset = col_offset; 110 | n->n_nchildren = 0; 111 | n->n_child = NULL; 112 | return 0; 113 | } 114 | 115 | /* Forward */ 116 | static void freechildren(node *); 117 | static Py_ssize_t sizeofchildren(node *n); 118 | 119 | 120 | void 121 | Ta27Node_Free(node *n) 122 | { 123 | if (n != NULL) { 124 | freechildren(n); 125 | PyObject_FREE(n); 126 | } 127 | } 128 | 129 | Py_ssize_t 130 | _Ta27Node_SizeOf(node *n) 131 | { 132 | Py_ssize_t res = 0; 133 | 134 | if (n != NULL) 135 | res = sizeof(node) + sizeofchildren(n); 136 | return res; 137 | } 138 | 139 | static void 140 | freechildren(node *n) 141 | { 142 | int i; 143 | for (i = NCH(n); --i >= 0; ) 144 | freechildren(CHILD(n, i)); 145 | if (n->n_child != NULL) 146 | PyObject_FREE(n->n_child); 147 | if (STR(n) != NULL) 148 | PyObject_FREE(STR(n)); 149 | } 150 | 151 | static Py_ssize_t 152 | sizeofchildren(node *n) 153 | { 154 | Py_ssize_t res = 0; 155 | int i; 156 | for (i = NCH(n); --i >= 0; ) 157 | res += sizeofchildren(CHILD(n, i)); 158 | if (n->n_child != NULL) 159 | /* allocated size of n->n_child array */ 160 | res += XXXROUNDUP(NCH(n)) * sizeof(node); 161 | if (STR(n) != NULL) 162 | res += strlen(STR(n)) + 1; 163 | return res; 164 | } 165 | -------------------------------------------------------------------------------- /ast3/Parser/node.c: -------------------------------------------------------------------------------- 1 | /* Parse tree node implementation */ 2 | 3 | #include "Python.h" 4 | #include "../Include/node.h" 5 | #include "../Include/errcode.h" 6 | 7 | node * 8 | Ta3Node_New(int type) 9 | { 10 | node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); 11 | if (n == NULL) 12 | return NULL; 13 | n->n_type = type; 14 | n->n_str = NULL; 15 | n->n_lineno = 0; 16 | n->n_nchildren = 0; 17 | n->n_child = NULL; 18 | return n; 19 | } 20 | 21 | /* See comments at XXXROUNDUP below. Returns -1 on overflow. */ 22 | static int 23 | fancy_roundup(int n) 24 | { 25 | /* Round up to the closest power of 2 >= n. */ 26 | int result = 256; 27 | assert(n > 128); 28 | while (result < n) { 29 | result <<= 1; 30 | if (result <= 0) 31 | return -1; 32 | } 33 | return result; 34 | } 35 | 36 | /* A gimmick to make massive numbers of reallocs quicker. The result is 37 | * a number >= the input. In Ta3Node_AddChild, it's used like so, when 38 | * we're about to add child number current_size + 1: 39 | * 40 | * if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1): 41 | * allocate space for XXXROUNDUP(current_size + 1) total children 42 | * else: 43 | * we already have enough space 44 | * 45 | * Since a node starts out empty, we must have 46 | * 47 | * XXXROUNDUP(0) < XXXROUNDUP(1) 48 | * 49 | * so that we allocate space for the first child. One-child nodes are very 50 | * common (presumably that would change if we used a more abstract form 51 | * of syntax tree), so to avoid wasting memory it's desirable that 52 | * XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0. 53 | * 54 | * Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4? 55 | * Rounding up to a multiple of an exact power of 2 is very efficient, and 56 | * most nodes with more than one child have <= 4 kids. 57 | * 58 | * Else we call fancy_roundup() to grow proportionately to n. We've got an 59 | * extreme case then (like test_longexp.py), and on many platforms doing 60 | * anything less than proportional growth leads to exorbitant runtime 61 | * (e.g., MacPython), or extreme fragmentation of user address space (e.g., 62 | * Win98). 63 | * 64 | * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre 65 | * reported that, with this scheme, 89% of PyObject_REALLOC calls in 66 | * Ta3Node_AddChild passed 1 for the size, and 9% passed 4. So this usually 67 | * wastes very little memory, but is very effective at sidestepping 68 | * platform-realloc disasters on vulnerable platforms. 69 | * 70 | * Note that this would be straightforward if a node stored its current 71 | * capacity. The code is tricky to avoid that. 72 | */ 73 | #define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ 74 | (n) <= 128 ? (int)_Py_SIZE_ROUND_UP((n), 4) : \ 75 | fancy_roundup(n)) 76 | 77 | 78 | int 79 | Ta3Node_AddChild(node *n1, int type, char *str, int lineno, int col_offset) 80 | { 81 | const int nch = n1->n_nchildren; 82 | int current_capacity; 83 | int required_capacity; 84 | node *n; 85 | 86 | if (nch == INT_MAX || nch < 0) 87 | return E_OVERFLOW; 88 | 89 | current_capacity = XXXROUNDUP(nch); 90 | required_capacity = XXXROUNDUP(nch + 1); 91 | if (current_capacity < 0 || required_capacity < 0) 92 | return E_OVERFLOW; 93 | if (current_capacity < required_capacity) { 94 | if ((size_t)required_capacity > SIZE_MAX / sizeof(node)) { 95 | return E_NOMEM; 96 | } 97 | n = n1->n_child; 98 | n = (node *) PyObject_REALLOC(n, 99 | required_capacity * sizeof(node)); 100 | if (n == NULL) 101 | return E_NOMEM; 102 | n1->n_child = n; 103 | } 104 | 105 | n = &n1->n_child[n1->n_nchildren++]; 106 | n->n_type = type; 107 | n->n_str = str; 108 | n->n_lineno = lineno; 109 | n->n_col_offset = col_offset; 110 | n->n_nchildren = 0; 111 | n->n_child = NULL; 112 | return 0; 113 | } 114 | 115 | /* Forward */ 116 | static void freechildren(node *); 117 | static Py_ssize_t sizeofchildren(node *n); 118 | 119 | 120 | void 121 | Ta3Node_Free(node *n) 122 | { 123 | if (n != NULL) { 124 | freechildren(n); 125 | PyObject_FREE(n); 126 | } 127 | } 128 | 129 | Py_ssize_t 130 | _Ta3Node_SizeOf(node *n) 131 | { 132 | Py_ssize_t res = 0; 133 | 134 | if (n != NULL) 135 | res = sizeof(node) + sizeofchildren(n); 136 | return res; 137 | } 138 | 139 | static void 140 | freechildren(node *n) 141 | { 142 | int i; 143 | for (i = NCH(n); --i >= 0; ) 144 | freechildren(CHILD(n, i)); 145 | if (n->n_child != NULL) 146 | PyObject_FREE(n->n_child); 147 | if (STR(n) != NULL) 148 | PyObject_FREE(STR(n)); 149 | } 150 | 151 | static Py_ssize_t 152 | sizeofchildren(node *n) 153 | { 154 | Py_ssize_t res = 0; 155 | int i; 156 | for (i = NCH(n); --i >= 0; ) 157 | res += sizeofchildren(CHILD(n, i)); 158 | if (n->n_child != NULL) 159 | /* allocated size of n->n_child array */ 160 | res += XXXROUNDUP(NCH(n)) * sizeof(node); 161 | if (STR(n) != NULL) 162 | res += strlen(STR(n)) + 1; 163 | return res; 164 | } 165 | -------------------------------------------------------------------------------- /ast27/Parser/Python.asdl: -------------------------------------------------------------------------------- 1 | -- ASDL's five builtin types are identifier, int, string, object, bool 2 | 3 | module Python version "$Revision$" 4 | { 5 | mod = Module(stmt* body, type_ignore *type_ignores) 6 | | Interactive(stmt* body) 7 | | Expression(expr body) 8 | | FunctionType(expr* argtypes, expr returns) 9 | 10 | -- not really an actual node but useful in Jython's typesystem. 11 | | Suite(stmt* body) 12 | 13 | stmt = FunctionDef(identifier name, arguments args, 14 | stmt* body, expr* decorator_list, string? type_comment) 15 | | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list) 16 | | Return(expr? value) 17 | 18 | | Delete(expr* targets) 19 | | Assign(expr* targets, expr value, string? type_comment) 20 | | AugAssign(expr target, operator op, expr value) 21 | 22 | -- not sure if bool is allowed, can always use int 23 | | Print(expr? dest, expr* values, bool nl) 24 | 25 | -- use 'orelse' because else is a keyword in target languages 26 | | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) 27 | | While(expr test, stmt* body, stmt* orelse) 28 | | If(expr test, stmt* body, stmt* orelse) 29 | | With(expr context_expr, expr? optional_vars, stmt* body, string? type_comment) 30 | 31 | -- 'type' is a bad name 32 | | Raise(expr? type, expr? inst, expr? tback) 33 | | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) 34 | | TryFinally(stmt* body, stmt* finalbody) 35 | | Assert(expr test, expr? msg) 36 | 37 | | Import(alias* names) 38 | | ImportFrom(identifier? module, alias* names, int? level) 39 | 40 | -- Doesn't capture requirement that locals must be 41 | -- defined if globals is 42 | -- still supports use as a function! 43 | | Exec(expr body, expr? globals, expr? locals) 44 | 45 | | Global(identifier* names) 46 | | Expr(expr value) 47 | | Pass | Break | Continue 48 | 49 | -- XXX Jython will be different 50 | -- col_offset is the byte offset in the utf8 string the parser uses 51 | attributes (int lineno, int col_offset) 52 | 53 | -- BoolOp() can use left & right? 54 | expr = BoolOp(boolop op, expr* values) 55 | | BinOp(expr left, operator op, expr right) 56 | | UnaryOp(unaryop op, expr operand) 57 | | Lambda(arguments args, expr body) 58 | | IfExp(expr test, expr body, expr orelse) 59 | | Dict(expr* keys, expr* values) 60 | | Set(expr* elts) 61 | | ListComp(expr elt, comprehension* generators) 62 | | SetComp(expr elt, comprehension* generators) 63 | | DictComp(expr key, expr value, comprehension* generators) 64 | | GeneratorExp(expr elt, comprehension* generators) 65 | -- the grammar constrains where yield expressions can occur 66 | | Yield(expr? value) 67 | -- need sequences for compare to distinguish between 68 | -- x < 4 < 3 and (x < 4) < 3 69 | | Compare(expr left, cmpop* ops, expr* comparators) 70 | | Call(expr func, expr* args, keyword* keywords, 71 | expr? starargs, expr? kwargs) 72 | | Repr(expr value) 73 | | Num(object n) -- a number as a PyObject. 74 | | Str(string s, string kind) 75 | -- other literals? bools? 76 | 77 | -- the following expression can appear in assignment context 78 | | Attribute(expr value, identifier attr, expr_context ctx) 79 | | Subscript(expr value, slice slice, expr_context ctx) 80 | | Name(identifier id, expr_context ctx) 81 | | List(expr* elts, expr_context ctx) 82 | | Tuple(expr* elts, expr_context ctx) 83 | 84 | -- col_offset is the byte offset in the utf8 string the parser uses 85 | attributes (int lineno, int col_offset) 86 | 87 | expr_context = Load | Store | Del | AugLoad | AugStore | Param 88 | 89 | slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) 90 | | ExtSlice(slice* dims) 91 | | Index(expr value) 92 | 93 | boolop = And | Or 94 | 95 | operator = Add | Sub | Mult | Div | Mod | Pow | LShift 96 | | RShift | BitOr | BitXor | BitAnd | FloorDiv 97 | 98 | unaryop = Invert | Not | UAdd | USub 99 | 100 | cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn 101 | 102 | comprehension = (expr target, expr iter, expr* ifs) 103 | 104 | -- not sure what to call the first argument for raise and except 105 | excepthandler = ExceptHandler(expr? type, expr? name, stmt* body) 106 | attributes (int lineno, int col_offset) 107 | 108 | -- type_comments is used to support the per-argument type comment syntax. 109 | -- It is either an empty list or a list with length equal to the number of 110 | -- args (including varargs and kwargs, if present) and with members set to the 111 | -- string of each arg's type comment, if present, or None otherwise. 112 | arguments = (expr* args, identifier? vararg, 113 | identifier? kwarg, expr* defaults, string* type_comments) 114 | 115 | -- keyword arguments supplied to call 116 | keyword = (identifier arg, expr value) 117 | 118 | -- import name with optional 'as' alias. 119 | alias = (identifier name, identifier? asname) 120 | 121 | type_ignore = TypeIgnore(int lineno, string tag) 122 | } 123 | -------------------------------------------------------------------------------- /ast3/Parser/Python.asdl: -------------------------------------------------------------------------------- 1 | -- ASDL's 7 builtin types are: 2 | -- identifier, int, string, bytes, object, singleton, constant 3 | -- 4 | -- singleton: None, True or False 5 | -- constant can be None, whereas None means "no value" for object. 6 | 7 | module Python 8 | { 9 | mod = Module(stmt* body, type_ignore *type_ignores) 10 | | Interactive(stmt* body) 11 | | Expression(expr body) 12 | | FunctionType(expr* argtypes, expr returns) 13 | 14 | -- not really an actual node but useful in Jython's typesystem. 15 | | Suite(stmt* body) 16 | 17 | stmt = FunctionDef(identifier name, arguments args, 18 | stmt* body, expr* decorator_list, expr? returns, string? type_comment) 19 | | AsyncFunctionDef(identifier name, arguments args, 20 | stmt* body, expr* decorator_list, expr? returns, string? type_comment) 21 | 22 | | ClassDef(identifier name, 23 | expr* bases, 24 | keyword* keywords, 25 | stmt* body, 26 | expr* decorator_list) 27 | | Return(expr? value) 28 | 29 | | Delete(expr* targets) 30 | | Assign(expr* targets, expr value, string? type_comment) 31 | | AugAssign(expr target, operator op, expr value) 32 | -- 'simple' indicates that we annotate simple name without parens 33 | | AnnAssign(expr target, expr annotation, expr? value, int simple) 34 | 35 | -- use 'orelse' because else is a keyword in target languages 36 | | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) 37 | | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment) 38 | | While(expr test, stmt* body, stmt* orelse) 39 | | If(expr test, stmt* body, stmt* orelse) 40 | | With(withitem* items, stmt* body, string? type_comment) 41 | | AsyncWith(withitem* items, stmt* body, string? type_comment) 42 | 43 | | Raise(expr? exc, expr? cause) 44 | | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) 45 | | Assert(expr test, expr? msg) 46 | 47 | | Import(alias* names) 48 | | ImportFrom(identifier? module, alias* names, int? level) 49 | 50 | | Global(identifier* names) 51 | | Nonlocal(identifier* names) 52 | | Expr(expr value) 53 | | Pass | Break | Continue 54 | 55 | -- XXX Jython will be different 56 | -- col_offset is the byte offset in the utf8 string the parser uses 57 | attributes (int lineno, int col_offset) 58 | 59 | -- BoolOp() can use left & right? 60 | expr = BoolOp(boolop op, expr* values) 61 | | BinOp(expr left, operator op, expr right) 62 | | UnaryOp(unaryop op, expr operand) 63 | | Lambda(arguments args, expr body) 64 | | IfExp(expr test, expr body, expr orelse) 65 | | Dict(expr* keys, expr* values) 66 | | Set(expr* elts) 67 | | ListComp(expr elt, comprehension* generators) 68 | | SetComp(expr elt, comprehension* generators) 69 | | DictComp(expr key, expr value, comprehension* generators) 70 | | GeneratorExp(expr elt, comprehension* generators) 71 | -- the grammar constrains where yield expressions can occur 72 | | Await(expr value) 73 | | Yield(expr? value) 74 | | YieldFrom(expr value) 75 | -- need sequences for compare to distinguish between 76 | -- x < 4 < 3 and (x < 4) < 3 77 | | Compare(expr left, cmpop* ops, expr* comparators) 78 | | Call(expr func, expr* args, keyword* keywords) 79 | | Num(object n) -- a number as a PyObject. 80 | | Str(string s, string kind) 81 | | FormattedValue(expr value, int? conversion, expr? format_spec) 82 | | JoinedStr(expr* values) 83 | | Bytes(bytes s, string kind) 84 | | NameConstant(singleton value) 85 | | Ellipsis 86 | | Constant(constant value) 87 | 88 | -- the following expression can appear in assignment context 89 | | Attribute(expr value, identifier attr, expr_context ctx) 90 | | Subscript(expr value, slice slice, expr_context ctx) 91 | | Starred(expr value, expr_context ctx) 92 | | Name(identifier id, expr_context ctx) 93 | | List(expr* elts, expr_context ctx) 94 | | Tuple(expr* elts, expr_context ctx) 95 | 96 | -- col_offset is the byte offset in the utf8 string the parser uses 97 | attributes (int lineno, int col_offset) 98 | 99 | expr_context = Load | Store | Del | AugLoad | AugStore | Param 100 | 101 | slice = Slice(expr? lower, expr? upper, expr? step) 102 | | ExtSlice(slice* dims) 103 | | Index(expr value) 104 | 105 | boolop = And | Or 106 | 107 | operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift 108 | | RShift | BitOr | BitXor | BitAnd | FloorDiv 109 | 110 | unaryop = Invert | Not | UAdd | USub 111 | 112 | cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn 113 | 114 | comprehension = (expr target, expr iter, expr* ifs, int is_async) 115 | 116 | excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) 117 | attributes (int lineno, int col_offset) 118 | 119 | arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults, 120 | arg? kwarg, expr* defaults) 121 | 122 | arg = (identifier arg, expr? annotation, string? type_comment) 123 | attributes (int lineno, int col_offset) 124 | 125 | -- keyword arguments supplied to call (NULL identifier for **kwargs) 126 | keyword = (identifier? arg, expr value) 127 | 128 | -- import name with optional 'as' alias. 129 | alias = (identifier name, identifier? asname) 130 | 131 | withitem = (expr context_expr, expr? optional_vars) 132 | 133 | type_ignore = TypeIgnore(int lineno, string tag) 134 | } 135 | -------------------------------------------------------------------------------- /ast27/Grammar/Grammar: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # Note: Changing the grammar specified in this file will most likely 4 | # require corresponding changes in the parser module 5 | # (../Modules/parsermodule.c). If you can't make the changes to 6 | # that module yourself, please co-ordinate the required changes 7 | # with someone who can; ask around on python-dev for help. Fred 8 | # Drake will probably be listening there. 9 | 10 | # NOTE WELL: You should also follow all the steps listed in PEP 306, 11 | # "How to Change Python's Grammar" 12 | 13 | # Start symbols for the grammar: 14 | # single_input is a single interactive statement; 15 | # file_input is a module or sequence of commands read from an input file; 16 | # eval_input is the input for the eval() and input() functions. 17 | # func_type_input is a PEP 484 Python 2 function type comment 18 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 19 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 20 | file_input: (NEWLINE | stmt)* ENDMARKER 21 | eval_input: testlist NEWLINE* ENDMARKER 22 | 23 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 24 | decorators: decorator+ 25 | decorated: decorators (classdef | funcdef) 26 | funcdef: 'def' NAME parameters ':' [TYPE_COMMENT] suite 27 | parameters: '(' [varargslist] ')' 28 | varargslist: ((fpdef ['=' test] ',' [TYPE_COMMENT])* 29 | ('*' NAME [',' [TYPE_COMMENT] '**' NAME] [TYPE_COMMENT] | '**' NAME [TYPE_COMMENT]) | 30 | fpdef ['=' test] (',' [TYPE_COMMENT] fpdef ['=' test])* [','] [TYPE_COMMENT]) 31 | fpdef: NAME | '(' fplist ')' 32 | fplist: fpdef (',' fpdef)* [','] 33 | 34 | stmt: simple_stmt | compound_stmt 35 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 36 | small_stmt: (expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | 37 | import_stmt | global_stmt | exec_stmt | assert_stmt) 38 | expr_stmt: testlist (augassign (yield_expr|testlist) | 39 | ('=' (yield_expr|testlist))* [TYPE_COMMENT]) 40 | augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | 41 | '<<=' | '>>=' | '**=' | '//=') 42 | # For normal assignments, additional restrictions enforced by the interpreter 43 | print_stmt: 'print' ( [ test (',' test)* [','] ] | 44 | '>>' test [ (',' test)+ [','] ] ) 45 | del_stmt: 'del' exprlist 46 | pass_stmt: 'pass' 47 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 48 | break_stmt: 'break' 49 | continue_stmt: 'continue' 50 | return_stmt: 'return' [testlist] 51 | yield_stmt: yield_expr 52 | raise_stmt: 'raise' [test [',' test [',' test]]] 53 | import_stmt: import_name | import_from 54 | import_name: 'import' dotted_as_names 55 | import_from: ('from' ('.'* dotted_name | '.'+) 56 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 57 | import_as_name: NAME ['as' NAME] 58 | dotted_as_name: dotted_name ['as' NAME] 59 | import_as_names: import_as_name (',' import_as_name)* [','] 60 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 61 | dotted_name: NAME ('.' NAME)* 62 | global_stmt: 'global' NAME (',' NAME)* 63 | exec_stmt: 'exec' expr ['in' test [',' test]] 64 | assert_stmt: 'assert' test [',' test] 65 | 66 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated 67 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 68 | while_stmt: 'while' test ':' suite ['else' ':' suite] 69 | for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] 70 | try_stmt: ('try' ':' suite 71 | ((except_clause ':' suite)+ 72 | ['else' ':' suite] 73 | ['finally' ':' suite] | 74 | 'finally' ':' suite)) 75 | with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite 76 | with_item: test ['as' expr] 77 | # NB compile.c makes sure that the default except clause is last 78 | except_clause: 'except' [test [('as' | ',') test]] 79 | # the TYPE_COMMENT in suites is only parsed for funcdefs, but can't go elsewhere due to ambiguity 80 | suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT 81 | 82 | # Backward compatibility cruft to support: 83 | # [ x for x in lambda: True, lambda: False if x() ] 84 | # even while also allowing: 85 | # lambda x: 5 if x else 2 86 | # (But not a mix of the two) 87 | testlist_safe: old_test [(',' old_test)+ [',']] 88 | old_test: or_test | old_lambdef 89 | old_lambdef: 'lambda' [varargslist] ':' old_test 90 | 91 | test: or_test ['if' or_test 'else' test] | lambdef 92 | or_test: and_test ('or' and_test)* 93 | and_test: not_test ('and' not_test)* 94 | not_test: 'not' not_test | comparison 95 | comparison: expr (comp_op expr)* 96 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 97 | expr: xor_expr ('|' xor_expr)* 98 | xor_expr: and_expr ('^' and_expr)* 99 | and_expr: shift_expr ('&' shift_expr)* 100 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 101 | arith_expr: term (('+'|'-') term)* 102 | term: factor (('*'|'/'|'%'|'//') factor)* 103 | factor: ('+'|'-'|'~') factor | power 104 | power: atom trailer* ['**' factor] 105 | atom: ('(' [yield_expr|testlist_comp] ')' | 106 | '[' [listmaker] ']' | 107 | '{' [dictorsetmaker] '}' | 108 | '`' testlist1 '`' | 109 | NAME | NUMBER | STRING+) 110 | listmaker: test ( list_for | (',' test)* [','] ) 111 | testlist_comp: test ( comp_for | (',' test)* [','] ) 112 | lambdef: 'lambda' [varargslist] ':' test 113 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 114 | subscriptlist: subscript (',' subscript)* [','] 115 | subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 116 | sliceop: ':' [test] 117 | exprlist: expr (',' expr)* [','] 118 | testlist: test (',' test)* [','] 119 | dictorsetmaker: ( (test ':' test (comp_for | (',' test ':' test)* [','])) | 120 | (test (comp_for | (',' test)* [','])) ) 121 | 122 | classdef: 'class' NAME ['(' [testlist] ')'] ':' suite 123 | 124 | arglist: (argument ',')* (argument [','] 125 | |'*' test (',' argument)* [',' '**' test] 126 | |'**' test) 127 | # The reason that keywords are test nodes instead of NAME is that using NAME 128 | # results in an ambiguity. ast.c makes sure it's a NAME. 129 | argument: test [comp_for] | test '=' test 130 | 131 | list_iter: list_for | list_if 132 | list_for: 'for' exprlist 'in' testlist_safe [list_iter] 133 | list_if: 'if' old_test [list_iter] 134 | 135 | comp_iter: comp_for | comp_if 136 | comp_for: 'for' exprlist 'in' or_test [comp_iter] 137 | comp_if: 'if' old_test [comp_iter] 138 | 139 | testlist1: test (',' test)* 140 | 141 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 142 | encoding_decl: NAME 143 | 144 | yield_expr: 'yield' [testlist] 145 | 146 | func_type_input: func_type NEWLINE* ENDMARKER 147 | func_type: '(' [typelist] ')' '->' test 148 | # typelist is a modified typedargslist (see above) 149 | typelist: (test (',' test)* [',' 150 | ['*' [test] (',' test)* [',' '**' test] | '**' test]] 151 | | '*' [test] (',' test)* [',' '**' test] | '**' test) 152 | -------------------------------------------------------------------------------- /ast3/Grammar/Grammar: -------------------------------------------------------------------------------- 1 | # Grammar for Python 2 | 3 | # NOTE WELL: You should also follow all the steps listed at 4 | # https://devguide.python.org/grammar/ 5 | 6 | # Start symbols for the grammar: 7 | # single_input is a single interactive statement; 8 | # file_input is a module or sequence of commands read from an input file; 9 | # eval_input is the input for the eval() functions. 10 | # func_type_input is a PEP 484 Python 2 function type comment 11 | # NB: compound_stmt in single_input is followed by extra NEWLINE! 12 | # NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a 13 | # NEWLINE 14 | single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 15 | file_input: (NEWLINE | stmt)* ENDMARKER 16 | eval_input: testlist NEWLINE* ENDMARKER 17 | 18 | decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 19 | decorators: decorator+ 20 | decorated: decorators (classdef | funcdef | async_funcdef) 21 | 22 | async_funcdef: ASYNC funcdef 23 | funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] suite 24 | 25 | parameters: '(' [typedargslist] ')' 26 | typedargslist: (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [ 27 | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) 28 | | '**' tfpdef [','] [TYPE_COMMENT]]]) 29 | | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]]) 30 | | '**' tfpdef [','] [TYPE_COMMENT]) 31 | tfpdef: NAME [':' test] 32 | varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [ 33 | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 34 | | '**' vfpdef [',']]] 35 | | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]] 36 | | '**' vfpdef [','] 37 | ) 38 | vfpdef: NAME 39 | 40 | stmt: simple_stmt | compound_stmt 41 | simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 42 | small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | 43 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt) 44 | expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | 45 | ('=' (yield_expr|testlist_star_expr))* [TYPE_COMMENT]) 46 | annassign: ':' test ['=' test] 47 | testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [','] 48 | augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | 49 | '<<=' | '>>=' | '**=' | '//=') 50 | # For normal and annotated assignments, additional restrictions enforced by the interpreter 51 | del_stmt: 'del' exprlist 52 | pass_stmt: 'pass' 53 | flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 54 | break_stmt: 'break' 55 | continue_stmt: 'continue' 56 | return_stmt: 'return' [testlist] 57 | yield_stmt: yield_expr 58 | raise_stmt: 'raise' [test ['from' test]] 59 | import_stmt: import_name | import_from 60 | import_name: 'import' dotted_as_names 61 | # note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS 62 | import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) 63 | 'import' ('*' | '(' import_as_names ')' | import_as_names)) 64 | import_as_name: NAME ['as' NAME] 65 | dotted_as_name: dotted_name ['as' NAME] 66 | import_as_names: import_as_name (',' import_as_name)* [','] 67 | dotted_as_names: dotted_as_name (',' dotted_as_name)* 68 | dotted_name: NAME ('.' NAME)* 69 | global_stmt: 'global' NAME (',' NAME)* 70 | nonlocal_stmt: 'nonlocal' NAME (',' NAME)* 71 | assert_stmt: 'assert' test [',' test] 72 | 73 | compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt 74 | async_stmt: ASYNC (funcdef | with_stmt | for_stmt) 75 | if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 76 | while_stmt: 'while' test ':' suite ['else' ':' suite] 77 | for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite] 78 | try_stmt: ('try' ':' suite 79 | ((except_clause ':' suite)+ 80 | ['else' ':' suite] 81 | ['finally' ':' suite] | 82 | 'finally' ':' suite)) 83 | with_stmt: 'with' with_item (',' with_item)* ':' [TYPE_COMMENT] suite 84 | with_item: test ['as' expr] 85 | # NB compile.c makes sure that the default except clause is last 86 | except_clause: 'except' [test ['as' NAME]] 87 | # the TYPE_COMMENT in suites is only parsed for funcdefs, but can't go elsewhere due to ambiguity 88 | suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT 89 | 90 | test: or_test ['if' or_test 'else' test] | lambdef 91 | test_nocond: or_test | lambdef_nocond 92 | lambdef: 'lambda' [varargslist] ':' test 93 | lambdef_nocond: 'lambda' [varargslist] ':' test_nocond 94 | or_test: and_test ('or' and_test)* 95 | and_test: not_test ('and' not_test)* 96 | not_test: 'not' not_test | comparison 97 | comparison: expr (comp_op expr)* 98 | # <> isn't actually a valid comparison operator in Python. It's here for the 99 | # sake of a __future__ import described in PEP 401 (which really works :-) 100 | comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 101 | star_expr: '*' expr 102 | expr: xor_expr ('|' xor_expr)* 103 | xor_expr: and_expr ('^' and_expr)* 104 | and_expr: shift_expr ('&' shift_expr)* 105 | shift_expr: arith_expr (('<<'|'>>') arith_expr)* 106 | arith_expr: term (('+'|'-') term)* 107 | term: factor (('*'|'@'|'/'|'%'|'//') factor)* 108 | factor: ('+'|'-'|'~') factor | power 109 | power: atom_expr ['**' factor] 110 | atom_expr: [AWAIT] atom trailer* 111 | atom: ('(' [yield_expr|testlist_comp] ')' | 112 | '[' [testlist_comp] ']' | 113 | '{' [dictorsetmaker] '}' | 114 | NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False') 115 | testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] ) 116 | trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 117 | subscriptlist: subscript (',' subscript)* [','] 118 | subscript: test | [test] ':' [test] [sliceop] 119 | sliceop: ':' [test] 120 | exprlist: (expr|star_expr) (',' (expr|star_expr))* [','] 121 | testlist: test (',' test)* [','] 122 | dictorsetmaker: ( ((test ':' test | '**' expr) 123 | (comp_for | (',' (test ':' test | '**' expr))* [','])) | 124 | ((test | star_expr) 125 | (comp_for | (',' (test | star_expr))* [','])) ) 126 | 127 | classdef: 'class' NAME ['(' [arglist] ')'] ':' suite 128 | 129 | arglist: argument (',' argument)* [','] 130 | 131 | # The reason that keywords are test nodes instead of NAME is that using NAME 132 | # results in an ambiguity. ast.c makes sure it's a NAME. 133 | # "test '=' test" is really "keyword '=' test", but we have no such token. 134 | # These need to be in a single rule to avoid grammar that is ambiguous 135 | # to our LL(1) parser. Even though 'test' includes '*expr' in star_expr, 136 | # we explicitly match '*' here, too, to give it proper precedence. 137 | # Illegal combinations and orderings are blocked in ast.c: 138 | # multiple (test comp_for) arguments are blocked; keyword unpackings 139 | # that precede iterable unpackings are blocked; etc. 140 | argument: ( test [comp_for] | 141 | test '=' test | 142 | '**' test | 143 | '*' test ) 144 | 145 | comp_iter: comp_for | comp_if 146 | sync_comp_for: 'for' exprlist 'in' or_test [comp_iter] 147 | comp_for: [ASYNC] sync_comp_for 148 | comp_if: 'if' test_nocond [comp_iter] 149 | 150 | # not used in grammar, but may appear in "node" passed from Parser to Compiler 151 | encoding_decl: NAME 152 | 153 | yield_expr: 'yield' [yield_arg] 154 | yield_arg: 'from' test | testlist 155 | 156 | func_type_input: func_type NEWLINE* ENDMARKER 157 | func_type: '(' [typelist] ')' '->' test 158 | # typelist is a modified typedargslist (see above) 159 | typelist: (test (',' test)* [',' 160 | ['*' [test] (',' test)* [',' '**' test] | '**' test]] 161 | | '*' [test] (',' test)* [',' '**' test] | '**' test) 162 | -------------------------------------------------------------------------------- /ast27/Parser/grammar.c: -------------------------------------------------------------------------------- 1 | 2 | /* Grammar implementation */ 3 | 4 | #include "Python.h" 5 | #include "../Include/pgenheaders.h" 6 | 7 | #include 8 | 9 | #include "../Include/token.h" 10 | #include "../Include/grammar.h" 11 | 12 | #ifdef RISCOS 13 | #include 14 | #endif 15 | 16 | PyAPI_DATA(int) Py_DebugFlag; 17 | 18 | grammar * 19 | newgrammar(int start) 20 | { 21 | grammar *g; 22 | 23 | g = (grammar *)PyObject_MALLOC(sizeof(grammar)); 24 | if (g == NULL) 25 | Py_FatalError("no mem for new grammar"); 26 | g->g_ndfas = 0; 27 | g->g_dfa = NULL; 28 | g->g_start = start; 29 | g->g_ll.ll_nlabels = 0; 30 | g->g_ll.ll_label = NULL; 31 | g->g_accel = 0; 32 | return g; 33 | } 34 | 35 | dfa * 36 | adddfa(grammar *g, int type, char *name) 37 | { 38 | dfa *d; 39 | 40 | g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, 41 | sizeof(dfa) * (g->g_ndfas + 1)); 42 | if (g->g_dfa == NULL) 43 | Py_FatalError("no mem to resize dfa in adddfa"); 44 | d = &g->g_dfa[g->g_ndfas++]; 45 | d->d_type = type; 46 | d->d_name = strdup(name); 47 | d->d_nstates = 0; 48 | d->d_state = NULL; 49 | d->d_initial = -1; 50 | d->d_first = NULL; 51 | return d; /* Only use while fresh! */ 52 | } 53 | 54 | int 55 | addstate(dfa *d) 56 | { 57 | state *s; 58 | 59 | d->d_state = (state *)PyObject_REALLOC(d->d_state, 60 | sizeof(state) * (d->d_nstates + 1)); 61 | if (d->d_state == NULL) 62 | Py_FatalError("no mem to resize state in addstate"); 63 | s = &d->d_state[d->d_nstates++]; 64 | s->s_narcs = 0; 65 | s->s_arc = NULL; 66 | s->s_lower = 0; 67 | s->s_upper = 0; 68 | s->s_accel = NULL; 69 | s->s_accept = 0; 70 | return s - d->d_state; 71 | } 72 | 73 | void 74 | addarc(dfa *d, int from, int to, int lbl) 75 | { 76 | state *s; 77 | arc *a; 78 | 79 | assert(0 <= from && from < d->d_nstates); 80 | assert(0 <= to && to < d->d_nstates); 81 | 82 | s = &d->d_state[from]; 83 | s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); 84 | if (s->s_arc == NULL) 85 | Py_FatalError("no mem to resize arc list in addarc"); 86 | a = &s->s_arc[s->s_narcs++]; 87 | a->a_lbl = lbl; 88 | a->a_arrow = to; 89 | } 90 | 91 | int 92 | addlabel(labellist *ll, int type, char *str) 93 | { 94 | int i; 95 | label *lb; 96 | 97 | for (i = 0; i < ll->ll_nlabels; i++) { 98 | if (ll->ll_label[i].lb_type == type && 99 | strcmp(ll->ll_label[i].lb_str, str) == 0) 100 | return i; 101 | } 102 | ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, 103 | sizeof(label) * (ll->ll_nlabels + 1)); 104 | if (ll->ll_label == NULL) 105 | Py_FatalError("no mem to resize labellist in addlabel"); 106 | lb = &ll->ll_label[ll->ll_nlabels++]; 107 | lb->lb_type = type; 108 | lb->lb_str = strdup(str); 109 | if (Py_DebugFlag) 110 | printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, 111 | Ta27Grammar_LabelRepr(lb)); 112 | return lb - ll->ll_label; 113 | } 114 | 115 | /* Same, but rather dies than adds */ 116 | 117 | int 118 | findlabel(labellist *ll, int type, char *str) 119 | { 120 | int i; 121 | 122 | for (i = 0; i < ll->ll_nlabels; i++) { 123 | if (ll->ll_label[i].lb_type == type /*&& 124 | strcmp(ll->ll_label[i].lb_str, str) == 0*/) 125 | return i; 126 | } 127 | fprintf(stderr, "Label %d/'%s' not found\n", type, str); 128 | Py_FatalError("grammar.c:findlabel()"); 129 | return 0; /* Make gcc -Wall happy */ 130 | } 131 | 132 | /* Forward */ 133 | static void translabel(grammar *, label *); 134 | 135 | void 136 | translatelabels(grammar *g) 137 | { 138 | int i; 139 | 140 | #ifdef Py_DEBUG 141 | printf("Translating labels ...\n"); 142 | #endif 143 | /* Don't translate EMPTY */ 144 | for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) 145 | translabel(g, &g->g_ll.ll_label[i]); 146 | } 147 | 148 | static void 149 | translabel(grammar *g, label *lb) 150 | { 151 | int i; 152 | 153 | if (Py_DebugFlag) 154 | printf("Translating label %s ...\n", Ta27Grammar_LabelRepr(lb)); 155 | 156 | if (lb->lb_type == NAME) { 157 | for (i = 0; i < g->g_ndfas; i++) { 158 | if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { 159 | if (Py_DebugFlag) 160 | printf( 161 | "Label %s is non-terminal %d.\n", 162 | lb->lb_str, 163 | g->g_dfa[i].d_type); 164 | lb->lb_type = g->g_dfa[i].d_type; 165 | free(lb->lb_str); 166 | lb->lb_str = NULL; 167 | return; 168 | } 169 | } 170 | for (i = 0; i < (int)N_TOKENS; i++) { 171 | if (strcmp(lb->lb_str, _Ta27Parser_TokenNames[i]) == 0) { 172 | if (Py_DebugFlag) 173 | printf("Label %s is terminal %d.\n", 174 | lb->lb_str, i); 175 | lb->lb_type = i; 176 | free(lb->lb_str); 177 | lb->lb_str = NULL; 178 | return; 179 | } 180 | } 181 | printf("Can't translate NAME label '%s'\n", lb->lb_str); 182 | return; 183 | } 184 | 185 | if (lb->lb_type == STRING) { 186 | if (isalpha(Py_CHARMASK(lb->lb_str[1])) || 187 | lb->lb_str[1] == '_') { 188 | char *p; 189 | char *src; 190 | char *dest; 191 | size_t name_len; 192 | if (Py_DebugFlag) 193 | printf("Label %s is a keyword\n", lb->lb_str); 194 | lb->lb_type = NAME; 195 | src = lb->lb_str + 1; 196 | p = strchr(src, '\''); 197 | if (p) 198 | name_len = p - src; 199 | else 200 | name_len = strlen(src); 201 | dest = (char *)malloc(name_len + 1); 202 | if (!dest) { 203 | printf("Can't alloc dest '%s'\n", src); 204 | return; 205 | } 206 | strncpy(dest, src, name_len); 207 | dest[name_len] = '\0'; 208 | free(lb->lb_str); 209 | lb->lb_str = dest; 210 | } 211 | else if (lb->lb_str[2] == lb->lb_str[0]) { 212 | int type = (int) Ta27Token_OneChar(lb->lb_str[1]); 213 | if (type != OP) { 214 | lb->lb_type = type; 215 | free(lb->lb_str); 216 | lb->lb_str = NULL; 217 | } 218 | else 219 | printf("Unknown OP label %s\n", 220 | lb->lb_str); 221 | } 222 | else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { 223 | int type = (int) Ta27Token_TwoChars(lb->lb_str[1], 224 | lb->lb_str[2]); 225 | if (type != OP) { 226 | lb->lb_type = type; 227 | free(lb->lb_str); 228 | lb->lb_str = NULL; 229 | } 230 | else 231 | printf("Unknown OP label %s\n", 232 | lb->lb_str); 233 | } 234 | else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { 235 | int type = (int) Ta27Token_ThreeChars(lb->lb_str[1], 236 | lb->lb_str[2], 237 | lb->lb_str[3]); 238 | if (type != OP) { 239 | lb->lb_type = type; 240 | free(lb->lb_str); 241 | lb->lb_str = NULL; 242 | } 243 | else 244 | printf("Unknown OP label %s\n", 245 | lb->lb_str); 246 | } 247 | else 248 | printf("Can't translate STRING label %s\n", 249 | lb->lb_str); 250 | } 251 | else 252 | printf("Can't translate label '%s'\n", 253 | Ta27Grammar_LabelRepr(lb)); 254 | } 255 | -------------------------------------------------------------------------------- /ast3/Parser/grammar.c: -------------------------------------------------------------------------------- 1 | 2 | /* Grammar implementation */ 3 | 4 | #include "Python.h" 5 | #include "../Include/pgenheaders.h" 6 | 7 | #include 8 | 9 | #include "../Include/token.h" 10 | #include "../Include/grammar.h" 11 | 12 | PyAPI_DATA(int) Py_DebugFlag; 13 | 14 | grammar * 15 | newgrammar(int start) 16 | { 17 | grammar *g; 18 | 19 | g = (grammar *)PyObject_MALLOC(sizeof(grammar)); 20 | if (g == NULL) 21 | Py_FatalError("no mem for new grammar"); 22 | g->g_ndfas = 0; 23 | g->g_dfa = NULL; 24 | g->g_start = start; 25 | g->g_ll.ll_nlabels = 0; 26 | g->g_ll.ll_label = NULL; 27 | g->g_accel = 0; 28 | return g; 29 | } 30 | 31 | void 32 | freegrammar(grammar *g) 33 | { 34 | int i, j; 35 | for (i = 0; i < g->g_ndfas; i++) { 36 | free(g->g_dfa[i].d_name); 37 | for (j = 0; j < g->g_dfa[i].d_nstates; j++) 38 | PyObject_FREE(g->g_dfa[i].d_state[j].s_arc); 39 | PyObject_FREE(g->g_dfa[i].d_state); 40 | } 41 | PyObject_FREE(g->g_dfa); 42 | for (i = 0; i < g->g_ll.ll_nlabels; i++) 43 | free(g->g_ll.ll_label[i].lb_str); 44 | PyObject_FREE(g->g_ll.ll_label); 45 | PyObject_FREE(g); 46 | } 47 | 48 | dfa * 49 | adddfa(grammar *g, int type, const char *name) 50 | { 51 | dfa *d; 52 | 53 | g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, 54 | sizeof(dfa) * (g->g_ndfas + 1)); 55 | if (g->g_dfa == NULL) 56 | Py_FatalError("no mem to resize dfa in adddfa"); 57 | d = &g->g_dfa[g->g_ndfas++]; 58 | d->d_type = type; 59 | d->d_name = strdup(name); 60 | d->d_nstates = 0; 61 | d->d_state = NULL; 62 | d->d_initial = -1; 63 | d->d_first = NULL; 64 | return d; /* Only use while fresh! */ 65 | } 66 | 67 | int 68 | addstate(dfa *d) 69 | { 70 | state *s; 71 | 72 | d->d_state = (state *)PyObject_REALLOC(d->d_state, 73 | sizeof(state) * (d->d_nstates + 1)); 74 | if (d->d_state == NULL) 75 | Py_FatalError("no mem to resize state in addstate"); 76 | s = &d->d_state[d->d_nstates++]; 77 | s->s_narcs = 0; 78 | s->s_arc = NULL; 79 | s->s_lower = 0; 80 | s->s_upper = 0; 81 | s->s_accel = NULL; 82 | s->s_accept = 0; 83 | return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int); 84 | } 85 | 86 | void 87 | addarc(dfa *d, int from, int to, int lbl) 88 | { 89 | state *s; 90 | arc *a; 91 | 92 | assert(0 <= from && from < d->d_nstates); 93 | assert(0 <= to && to < d->d_nstates); 94 | 95 | s = &d->d_state[from]; 96 | s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); 97 | if (s->s_arc == NULL) 98 | Py_FatalError("no mem to resize arc list in addarc"); 99 | a = &s->s_arc[s->s_narcs++]; 100 | a->a_lbl = lbl; 101 | a->a_arrow = to; 102 | } 103 | 104 | int 105 | addlabel(labellist *ll, int type, const char *str) 106 | { 107 | int i; 108 | label *lb; 109 | 110 | for (i = 0; i < ll->ll_nlabels; i++) { 111 | if (ll->ll_label[i].lb_type == type && 112 | strcmp(ll->ll_label[i].lb_str, str) == 0) 113 | return i; 114 | } 115 | ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, 116 | sizeof(label) * (ll->ll_nlabels + 1)); 117 | if (ll->ll_label == NULL) 118 | Py_FatalError("no mem to resize labellist in addlabel"); 119 | lb = &ll->ll_label[ll->ll_nlabels++]; 120 | lb->lb_type = type; 121 | lb->lb_str = strdup(str); 122 | if (Py_DebugFlag) 123 | printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, 124 | Ta3Grammar_LabelRepr(lb)); 125 | return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int); 126 | } 127 | 128 | /* Same, but rather dies than adds */ 129 | 130 | int 131 | findlabel(labellist *ll, int type, const char *str) 132 | { 133 | int i; 134 | 135 | for (i = 0; i < ll->ll_nlabels; i++) { 136 | if (ll->ll_label[i].lb_type == type /*&& 137 | strcmp(ll->ll_label[i].lb_str, str) == 0*/) 138 | return i; 139 | } 140 | fprintf(stderr, "Label %d/'%s' not found\n", type, str); 141 | Py_FatalError("grammar.c:findlabel()"); 142 | 143 | /* Py_FatalError() is declared with __attribute__((__noreturn__)). 144 | GCC emits a warning without "return 0;" (compiler bug!), but Clang is 145 | smarter and emits a warning on the return... */ 146 | #ifndef __clang__ 147 | return 0; /* Make gcc -Wall happy */ 148 | #endif 149 | } 150 | 151 | /* Forward */ 152 | static void translabel(grammar *, label *); 153 | 154 | void 155 | translatelabels(grammar *g) 156 | { 157 | int i; 158 | 159 | #ifdef Py_DEBUG 160 | printf("Translating labels ...\n"); 161 | #endif 162 | /* Don't translate EMPTY */ 163 | for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) 164 | translabel(g, &g->g_ll.ll_label[i]); 165 | } 166 | 167 | static void 168 | translabel(grammar *g, label *lb) 169 | { 170 | int i; 171 | 172 | if (Py_DebugFlag) 173 | printf("Translating label %s ...\n", Ta3Grammar_LabelRepr(lb)); 174 | 175 | if (lb->lb_type == NAME) { 176 | for (i = 0; i < g->g_ndfas; i++) { 177 | if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { 178 | if (Py_DebugFlag) 179 | printf( 180 | "Label %s is non-terminal %d.\n", 181 | lb->lb_str, 182 | g->g_dfa[i].d_type); 183 | lb->lb_type = g->g_dfa[i].d_type; 184 | free(lb->lb_str); 185 | lb->lb_str = NULL; 186 | return; 187 | } 188 | } 189 | for (i = 0; i < (int)N_TOKENS; i++) { 190 | if (strcmp(lb->lb_str, _Ta3Parser_TokenNames[i]) == 0) { 191 | if (Py_DebugFlag) 192 | printf("Label %s is terminal %d.\n", 193 | lb->lb_str, i); 194 | lb->lb_type = i; 195 | free(lb->lb_str); 196 | lb->lb_str = NULL; 197 | return; 198 | } 199 | } 200 | printf("Can't translate NAME label '%s'\n", lb->lb_str); 201 | return; 202 | } 203 | 204 | if (lb->lb_type == STRING) { 205 | if (isalpha(Py_CHARMASK(lb->lb_str[1])) || 206 | lb->lb_str[1] == '_') { 207 | char *p; 208 | char *src; 209 | char *dest; 210 | size_t name_len; 211 | if (Py_DebugFlag) 212 | printf("Label %s is a keyword\n", lb->lb_str); 213 | lb->lb_type = NAME; 214 | src = lb->lb_str + 1; 215 | p = strchr(src, '\''); 216 | if (p) 217 | name_len = p - src; 218 | else 219 | name_len = strlen(src); 220 | dest = (char *)malloc(name_len + 1); 221 | if (!dest) { 222 | printf("Can't alloc dest '%s'\n", src); 223 | return; 224 | } 225 | strncpy(dest, src, name_len); 226 | dest[name_len] = '\0'; 227 | free(lb->lb_str); 228 | lb->lb_str = dest; 229 | } 230 | else if (lb->lb_str[2] == lb->lb_str[0]) { 231 | int type = (int) Ta3Token_OneChar(lb->lb_str[1]); 232 | if (type != OP) { 233 | lb->lb_type = type; 234 | free(lb->lb_str); 235 | lb->lb_str = NULL; 236 | } 237 | else 238 | printf("Unknown OP label %s\n", 239 | lb->lb_str); 240 | } 241 | else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { 242 | int type = (int) Ta3Token_TwoChars(lb->lb_str[1], 243 | lb->lb_str[2]); 244 | if (type != OP) { 245 | lb->lb_type = type; 246 | free(lb->lb_str); 247 | lb->lb_str = NULL; 248 | } 249 | else 250 | printf("Unknown OP label %s\n", 251 | lb->lb_str); 252 | } 253 | else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { 254 | int type = (int) Ta3Token_ThreeChars(lb->lb_str[1], 255 | lb->lb_str[2], 256 | lb->lb_str[3]); 257 | if (type != OP) { 258 | lb->lb_type = type; 259 | free(lb->lb_str); 260 | lb->lb_str = NULL; 261 | } 262 | else 263 | printf("Unknown OP label %s\n", 264 | lb->lb_str); 265 | } 266 | else 267 | printf("Can't translate STRING label %s\n", 268 | lb->lb_str); 269 | } 270 | else 271 | printf("Can't translate label '%s'\n", 272 | Ta3Grammar_LabelRepr(lb)); 273 | } 274 | -------------------------------------------------------------------------------- /ast3/tests/test_basics.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from typed_ast import _ast3 6 | from typed_ast import _ast27 7 | import typed_ast.conversions 8 | 9 | # Lowest and highest supported Python 3 minor version (inclusive) 10 | MIN_VER = 4 11 | MAX_VER = 7 12 | NEXT_VER = MAX_VER + 1 13 | 14 | 15 | basics = """\ 16 | def foo(): 17 | # type: () -> int 18 | pass 19 | 20 | def bar(): # type: () -> None 21 | pass 22 | """ 23 | def test_basics(): 24 | for version in range(MIN_VER, NEXT_VER): 25 | tree = _ast3._parse(basics, "", "exec", version) 26 | assert tree.body[0].type_comment == "() -> int" 27 | assert tree.body[1].type_comment == "() -> None" 28 | 29 | 30 | redundantdef = """\ 31 | def foo(): # type: () -> int 32 | # type: () -> str 33 | return '' 34 | """ 35 | def test_redundantdef(): 36 | for version in range(MIN_VER, NEXT_VER): 37 | with pytest.raises(SyntaxError): 38 | t = _ast3._parse(redundantdef, "", "exec", version) 39 | 40 | 41 | vardecl = """\ 42 | a = 0 # type: int 43 | a # type: int 44 | """ 45 | def test_vardecl(): 46 | for version in range(MIN_VER, NEXT_VER): 47 | tree = _ast3._parse(vardecl, "", "exec", version) 48 | assert tree.body[0].type_comment == "int" 49 | # Curious fact: an expression can have a type comment 50 | # but it is lost in the AST. 51 | 52 | 53 | forstmt = """\ 54 | for a in []: # type: int 55 | pass 56 | """ 57 | def test_forstmt(): 58 | for version in range(MIN_VER, NEXT_VER): 59 | tree = _ast3._parse(forstmt, "", "exec", version) 60 | assert tree.body[0].type_comment == "int" 61 | 62 | 63 | withstmt = """\ 64 | with context(): # type: int 65 | pass 66 | """ 67 | def test_withstmt(): 68 | for version in range(MIN_VER, NEXT_VER): 69 | tree = _ast3._parse(withstmt, "", "exec", version) 70 | assert tree.body[0].type_comment == "int" 71 | 72 | 73 | # A test function named 'fabvk' would have two positional args, a and b, 74 | # plus a var-arg *v, plus a kw-arg **k. It is verified in test_longargs() 75 | # that it has exactly these arguments, no more, no fewer. 76 | longargs = """\ 77 | def fa( 78 | a = 1, # type: A 79 | ): 80 | pass 81 | 82 | def fa( 83 | a = 1 # type: A 84 | ): 85 | pass 86 | 87 | def fab( 88 | a, # type: A 89 | b, # type: B 90 | ): 91 | pass 92 | 93 | def fab( 94 | a, # type: A 95 | b # type: B 96 | ): 97 | pass 98 | 99 | def fv( 100 | *v, # type: V 101 | ): 102 | pass 103 | 104 | def fv( 105 | *v # type: V 106 | ): 107 | pass 108 | 109 | def fk( 110 | **k, # type: K 111 | ): 112 | pass 113 | 114 | def fk( 115 | **k # type: K 116 | ): 117 | pass 118 | 119 | def fvk( 120 | *v, # type: V 121 | **k, # type: K 122 | ): 123 | pass 124 | 125 | def fvk( 126 | *v, # type: V 127 | **k # type: K 128 | ): 129 | pass 130 | 131 | def fav( 132 | a, # type: A 133 | *v, # type: V 134 | ): 135 | pass 136 | 137 | def fav( 138 | a, # type: A 139 | *v # type: V 140 | ): 141 | pass 142 | 143 | def fak( 144 | a, # type: A 145 | **k, # type: K 146 | ): 147 | pass 148 | 149 | def fak( 150 | a, # type: A 151 | **k # type: K 152 | ): 153 | pass 154 | 155 | def favk( 156 | a, # type: A 157 | *v, # type: V 158 | **k, # type: K 159 | ): 160 | pass 161 | 162 | def favk( 163 | a, # type: A 164 | *v, # type: V 165 | **k # type: K 166 | ): 167 | pass 168 | 169 | """ 170 | def test_longargs(): 171 | for version in range(MIN_VER, NEXT_VER): 172 | tree = _ast3._parse(longargs, "", "exec", version) 173 | for t in tree.body: 174 | # The expected args are encoded in the function name 175 | todo = set(t.name[1:]) 176 | assert len(t.args.args) == len(todo) - bool(t.args.vararg) - bool(t.args.kwarg) 177 | assert t.name.startswith('f') 178 | for c in t.name[1:]: 179 | todo.remove(c) 180 | if c == 'v': 181 | arg = t.args.vararg 182 | elif c == 'k': 183 | arg = t.args.kwarg 184 | else: 185 | assert 0 <= ord(c) - ord('a') < len(t.args.args) 186 | arg = t.args.args[ord(c) - ord('a')] 187 | assert arg.arg == c # That's the argument name 188 | assert arg.type_comment == arg.arg.upper() 189 | assert not todo 190 | 191 | 192 | ignores = """\ 193 | def foo(): 194 | pass # type: ignore 195 | 196 | def bar(): 197 | x = 1 # type: ignore 198 | 199 | def baz(): 200 | pass # type: ignore[excuse] 201 | pass # type: ignore=excuse 202 | pass # type: ignore [excuse] 203 | x = 1 # type: ignore whatever 204 | """ 205 | def test_ignores(): 206 | expected = [ 207 | (2, ''), 208 | (5, ''), 209 | (8, '[excuse]'), 210 | (9, '=excuse'), 211 | (10, ' [excuse]'), 212 | (11, ' whatever'), 213 | ] 214 | 215 | for version in range(MIN_VER, NEXT_VER): 216 | tree = _ast3._parse(ignores, "", "exec", version) 217 | assert [(ti.lineno, ti.tag) for ti in tree.type_ignores] == expected 218 | with pytest.raises(SyntaxError): 219 | _ast3._parse("pass # type: ignoreé\n", "", "exec", version) 220 | 221 | 222 | tree = _ast27.parse(ignores, "", "exec") 223 | assert [(ti.lineno, ti.tag) for ti in tree.type_ignores] == expected 224 | with pytest.raises(SyntaxError): 225 | _ast27.parse("pass # type: ignoreé\n", "", "exec") 226 | 227 | 228 | 229 | asyncfunc = """\ 230 | async def foo(): 231 | # type: () -> int 232 | return await bar() 233 | """ 234 | def test_asyncfunc(): 235 | for version in range(3, 5): 236 | with pytest.raises(SyntaxError): 237 | _ast3._parse(asyncfunc, "", "exec", version) 238 | for version in range(5, NEXT_VER): 239 | tree = _ast3._parse(asyncfunc, "", "exec", version) 240 | assert tree.body[0].type_comment == "() -> int" 241 | 242 | 243 | asyncvar = """\ 244 | async = 12 245 | await = 13 246 | """ 247 | def test_asyncvar(): 248 | for version in range(3, 7): 249 | tree = _ast3._parse(asyncvar, "", "exec", version) 250 | for version in range(7, NEXT_VER): 251 | with pytest.raises(SyntaxError): 252 | _ast3._parse(asyncvar, "", "exec", version) 253 | 254 | 255 | asynccomp = """\ 256 | async def foo(xs): 257 | [x async for x in xs] 258 | """ 259 | def test_asynccomp(): 260 | for version in range(3, 6): 261 | with pytest.raises(SyntaxError): 262 | tree = _ast3._parse(asynccomp, "", "exec", version) 263 | for version in range(6, NEXT_VER): 264 | _ast3._parse(asynccomp, "", "exec", version) 265 | 266 | 267 | matmul = """\ 268 | a = b @ c 269 | """ 270 | def test_matmul(): 271 | for version in range(3, 5): 272 | with pytest.raises(SyntaxError): 273 | tree = _ast3._parse(matmul, "", "exec", version) 274 | for version in range(5, NEXT_VER): 275 | tree = _ast3._parse(matmul, "", "exec", version) 276 | 277 | 278 | strkind = """\ 279 | plain = 'abc' 280 | raw = r'abc' 281 | plain_bytes = b'abc' 282 | raw_bytes = br'abc' 283 | """ 284 | def test_strkind(): 285 | # Test that Str() objects have a kind argument/attribute. 286 | node = _ast3.Str("foo", "r") 287 | assert node.s == "foo" 288 | assert node.kind == "r" 289 | for version in range(MIN_VER, NEXT_VER): 290 | tree = _ast3._parse(strkind, "", "exec", version) 291 | assert tree.body[0].value.kind == "" 292 | assert tree.body[1].value.kind == "r" 293 | assert tree.body[2].value.kind == "b" 294 | assert tree.body[3].value.kind == "br" 295 | 296 | 297 | basic_py2 = """\ 298 | a = 'hello' 299 | b = u'hello' 300 | c = b'hello' 301 | """ 302 | def test_convert_strs(): 303 | ast = _ast27.parse(basic_py2, "", "exec") 304 | tree = typed_ast.conversions.py2to3(ast) 305 | assert tree.body[0].value.kind == "" 306 | assert tree.body[1].value.kind == "u" 307 | assert tree.body[2].value.kind == "b" 308 | 309 | simple_fstring = """\ 310 | f'{5}' 311 | """ 312 | def test_simple_fstring(): 313 | for version in range(6, NEXT_VER): 314 | tree = _ast3._parse(simple_fstring, "", "exec", version) 315 | assert isinstance(tree.body[0].value, _ast3.JoinedStr) 316 | assert isinstance(tree.body[0].value.values[0].value, _ast3.Num) 317 | 318 | # Test the interaction between versions and f strings 319 | await_fstring = """\ 320 | f'1 + {f"{await}"}' 321 | """ 322 | def test_await_fstring(): 323 | # Should work on 6 but fail on 7 324 | _ast3._parse(await_fstring, "", "exec", 6) 325 | with pytest.raises(SyntaxError): 326 | _ast3._parse(await_fstring, "", "exec", 7) 327 | -------------------------------------------------------------------------------- /typed_ast/conversions.py: -------------------------------------------------------------------------------- 1 | from typed_ast import ast27 2 | from typed_ast import ast3 3 | 4 | def py2to3(ast): 5 | """Converts a typed Python 2.7 ast to a typed Python 3.5 ast. The returned 6 | ast is a valid Python 3 ast with two exceptions: 7 | 8 | - `arg` objects may contain Tuple objects instead of just identifiers 9 | in the case of Python 2 function definitions/lambdas that use the tuple 10 | unpacking syntax. 11 | - `Raise` objects will have a `traceback` attribute added if the 3 12 | argument version of the Python 2 raise is used. 13 | 14 | 15 | Strange and Rare Uncovered Edge Cases: 16 | - Raise: if the second argument to a raise statement is a tuple, its 17 | contents are unpacked as arguments to the exception constructor. This 18 | case is handled correctly if it's a literal tuple, but not if it's any 19 | other sort of tuple expression. 20 | """ 21 | return _AST2To3().visit(ast) 22 | 23 | def _copy_attributes(new_value, old_value): 24 | attrs = getattr(old_value, '_attributes', None) 25 | if attrs is not None: 26 | for attr in attrs: 27 | setattr(new_value, attr, getattr(old_value, attr)) 28 | return new_value 29 | 30 | class _AST2To3(ast27.NodeTransformer): 31 | # note: None, True, and False are *not* translated into NameConstants. 32 | def __init__(self): 33 | pass 34 | 35 | def visit(self, node): 36 | """Visit a node.""" 37 | method = 'visit_' + node.__class__.__name__ 38 | visitor = getattr(self, method, self.generic_visit) 39 | ret = _copy_attributes(visitor(node), node) 40 | return ret 41 | 42 | def maybe_visit(self, node): 43 | if node is not None: 44 | return self.visit(node) 45 | else: 46 | return None 47 | 48 | def generic_visit(self, node): 49 | class_name = node.__class__.__name__ 50 | converted_class = getattr(ast3, class_name) 51 | new_node = converted_class() 52 | for field, old_value in ast27.iter_fields(node): 53 | if isinstance(old_value, (ast27.AST, list)): 54 | setattr(new_node, field, self.visit(old_value)) 55 | else: 56 | setattr(new_node, field, old_value) 57 | return new_node 58 | 59 | 60 | def visit_list(self, l): 61 | return [self.visit(e) if isinstance(e, (ast27.AST, list)) else e for e in l] 62 | 63 | def visit_FunctionDef(self, n): 64 | new = self.generic_visit(n) 65 | new.returns = None 66 | return new 67 | 68 | def visit_ClassDef(self, n): 69 | new = self.generic_visit(n) 70 | new.keywords = [] 71 | return new 72 | 73 | def visit_TryExcept(self, n): 74 | return ast3.Try(self.visit(n.body), 75 | self.visit(n.handlers), 76 | self.visit(n.orelse), 77 | []) 78 | 79 | def visit_TryFinally(self, n): 80 | if len(n.body) == 1 and isinstance(n.body[0], ast27.TryExcept): 81 | new = self.visit(n.body[0]) 82 | new.finalbody = self.visit(n.finalbody) 83 | return new 84 | else: 85 | return ast3.Try(self.visit(n.body), 86 | [], 87 | [], 88 | self.visit(n.finalbody)) 89 | 90 | 91 | def visit_ExceptHandler(self, n): 92 | if n.name is None: 93 | name = None 94 | elif isinstance(n.name, ast27.Name): 95 | name = n.name.id 96 | else: 97 | raise RuntimeError("'{}' has non-Name name.".format(ast27.dump(n))) 98 | 99 | return ast3.ExceptHandler(self.maybe_visit(n.type), 100 | name, 101 | self.visit(n.body)) 102 | 103 | def visit_Print(self, n): 104 | keywords = [] 105 | if n.dest is not None: 106 | keywords.append(ast3.keyword("file", self.visit(n.dest))) 107 | 108 | if not n.nl: 109 | keywords.append(ast3.keyword("end", 110 | ast3.Str(s=" ", kind='', lineno=n.lineno, col_offset=-1))) 111 | 112 | return ast3.Expr(ast3.Call(ast3.Name("print", ast3.Load(), lineno=n.lineno, col_offset=-1), 113 | self.visit(n.values), 114 | keywords, 115 | lineno=n.lineno, col_offset=-1)) 116 | 117 | def visit_Raise(self, n): 118 | e = None 119 | if n.type is not None: 120 | e = self.visit(n.type) 121 | 122 | if n.inst is not None and not (isinstance(n.inst, ast27.Name) and n.inst.id == "None"): 123 | inst = self.visit(n.inst) 124 | if isinstance(inst, ast3.Tuple): 125 | args = inst.elts 126 | else: 127 | args = [inst] 128 | e = ast3.Call(e, args, [], lineno=e.lineno, col_offset=-1) 129 | 130 | ret = ast3.Raise(e, None) 131 | if n.tback is not None: 132 | ret.traceback = self.visit(n.tback) 133 | return ret 134 | 135 | def visit_Exec(self, n): 136 | new_globals = self.maybe_visit(n.globals) 137 | if new_globals is None: 138 | new_globals = ast3.Name("None", ast3.Load(), lineno=-1, col_offset=-1) 139 | new_locals = self.maybe_visit(n.locals) 140 | if new_locals is None: 141 | new_locals = ast3.Name("None", ast3.Load(), lineno=-1, col_offset=-1) 142 | 143 | return ast3.Expr(ast3.Call(ast3.Name("exec", ast3.Load(), lineno=n.lineno, col_offset=-1), 144 | [self.visit(n.body), new_globals, new_locals], 145 | [], 146 | lineno=n.lineno, col_offset=-1)) 147 | 148 | # TODO(ddfisher): the name repr could be used locally as something else; disambiguate 149 | def visit_Repr(self, n): 150 | return ast3.Call(ast3.Name("repr", ast3.Load(), lineno=n.lineno, col_offset=-1), 151 | [self.visit(n.value)], 152 | []) 153 | 154 | # TODO(ddfisher): this will cause strange behavior on multi-item with statements with type comments 155 | def visit_With(self, n): 156 | return ast3.With([ast3.withitem(self.visit(n.context_expr), self.maybe_visit(n.optional_vars))], 157 | self.visit(n.body), 158 | n.type_comment) 159 | 160 | def visit_Call(self, n): 161 | args = self.visit(n.args) 162 | if n.starargs is not None: 163 | args.append(ast3.Starred(self.visit(n.starargs), ast3.Load(), lineno=n.starargs.lineno, col_offset=n.starargs.col_offset)) 164 | 165 | keywords = self.visit(n.keywords) 166 | if n.kwargs is not None: 167 | keywords.append(ast3.keyword(None, self.visit(n.kwargs))) 168 | 169 | return ast3.Call(self.visit(n.func), 170 | args, 171 | keywords) 172 | 173 | # TODO(ddfisher): find better attributes to give Ellipses 174 | def visit_Ellipsis(self, n): 175 | # ellipses in Python 2 only exist as a slice index 176 | return ast3.Index(ast3.Ellipsis(lineno=-1, col_offset=-1)) 177 | 178 | def visit_arguments(self, n): 179 | def convert_arg(arg, type_comment): 180 | if isinstance(arg, ast27.Name): 181 | v = arg.id 182 | elif isinstance(arg, ast27.Tuple): 183 | v = self.visit(arg) 184 | else: 185 | raise RuntimeError("'{}' is not a valid argument.".format(ast27.dump(arg))) 186 | return ast3.arg(v, None, type_comment, lineno=arg.lineno, col_offset=arg.col_offset) 187 | 188 | def get_type_comment(i): 189 | if i < len(n.type_comments) and n.type_comments[i] is not None: 190 | return n.type_comments[i] 191 | return None 192 | 193 | args = [convert_arg(arg, get_type_comment(i)) for i, arg in enumerate(n.args)] 194 | 195 | vararg = None 196 | if n.vararg is not None: 197 | vararg = ast3.arg(n.vararg, 198 | None, 199 | get_type_comment(len(args)), 200 | lineno=-1, col_offset=-1) 201 | 202 | kwarg = None 203 | if n.kwarg is not None: 204 | kwarg = ast3.arg(n.kwarg, 205 | None, 206 | get_type_comment(len(args) + (0 if n.vararg is None else 1)), 207 | lineno=-1, col_offset=-1) 208 | 209 | defaults = self.visit(n.defaults) 210 | 211 | return ast3.arguments(args, 212 | vararg, 213 | [], 214 | [], 215 | kwarg, 216 | defaults) 217 | 218 | def visit_Str(self, s): 219 | if isinstance(s.s, bytes): 220 | return ast3.Bytes(s.s, s.kind) 221 | else: 222 | return ast3.Str(s.s, s.kind) 223 | 224 | def visit_Num(self, n): 225 | new = self.generic_visit(n) 226 | if new.n < 0: 227 | # Python 3 uses a unary - operator for negative literals. 228 | new.n = -new.n 229 | return ast3.UnaryOp(op=ast3.USub(), 230 | operand=_copy_attributes(new, n)) 231 | else: 232 | return new 233 | -------------------------------------------------------------------------------- /ast27/Custom/typed_ast.c: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "../Include/Python-ast.h" 3 | #include "../Include/compile.h" 4 | #include "../Include/node.h" 5 | #include "../Include/grammar.h" 6 | #include "../Include/token.h" 7 | #include "../Include/ast.h" 8 | #include "../Include/parsetok.h" 9 | #include "../Include/errcode.h" 10 | #include "../Include/graminit.h" 11 | 12 | extern grammar _Ta27Parser_Grammar; /* from graminit.c */ 13 | 14 | // from Python/bltinmodule.c 15 | static const char * 16 | source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) 17 | { 18 | const char *str; 19 | Py_ssize_t size; 20 | Py_buffer view; 21 | 22 | *cmd_copy = NULL; 23 | if (PyUnicode_Check(cmd)) { 24 | cf->cf_flags |= PyCF_IGNORE_COOKIE; 25 | str = PyUnicode_AsUTF8AndSize(cmd, &size); 26 | if (str == NULL) 27 | return NULL; 28 | } 29 | else if (PyBytes_Check(cmd)) { 30 | str = PyBytes_AS_STRING(cmd); 31 | size = PyBytes_GET_SIZE(cmd); 32 | } 33 | else if (PyByteArray_Check(cmd)) { 34 | str = PyByteArray_AS_STRING(cmd); 35 | size = PyByteArray_GET_SIZE(cmd); 36 | } 37 | else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) { 38 | /* Copy to NUL-terminated buffer. */ 39 | *cmd_copy = PyBytes_FromStringAndSize( 40 | (const char *)view.buf, view.len); 41 | PyBuffer_Release(&view); 42 | if (*cmd_copy == NULL) { 43 | return NULL; 44 | } 45 | str = PyBytes_AS_STRING(*cmd_copy); 46 | size = PyBytes_GET_SIZE(*cmd_copy); 47 | } 48 | else { 49 | PyErr_Format(PyExc_TypeError, 50 | "%s() arg 1 must be a %s object", 51 | funcname, what); 52 | return NULL; 53 | } 54 | 55 | if (strlen(str) != (size_t)size) { 56 | PyErr_SetString(PyExc_ValueError, 57 | "source code string cannot contain null bytes"); 58 | Py_CLEAR(*cmd_copy); 59 | return NULL; 60 | } 61 | return str; 62 | } 63 | 64 | // from Python/pythonrun.c 65 | /* compute parser flags based on compiler flags */ 66 | static int PARSER_FLAGS(PyCompilerFlags *flags) 67 | { 68 | int parser_flags = 0; 69 | if (!flags) 70 | return 0; 71 | if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) 72 | parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; 73 | if (flags->cf_flags & PyCF_IGNORE_COOKIE) 74 | parser_flags |= PyPARSE_IGNORE_COOKIE; 75 | return parser_flags; 76 | } 77 | 78 | // from Python/pythonrun.c 79 | /* Set the error appropriate to the given input error code (see errcode.h) */ 80 | static void 81 | err_input(perrdetail *err) 82 | { 83 | PyObject *v, *w, *errtype, *errtext; 84 | PyObject *msg_obj = NULL; 85 | char *msg = NULL; 86 | int offset = err->offset; 87 | 88 | errtype = PyExc_SyntaxError; 89 | switch (err->error) { 90 | case E_ERROR: 91 | return; 92 | case E_SYNTAX: 93 | errtype = PyExc_IndentationError; 94 | if (err->expected == INDENT) 95 | msg = "expected an indented block"; 96 | else if (err->token == INDENT) 97 | msg = "unexpected indent"; 98 | else if (err->token == DEDENT) 99 | msg = "unexpected unindent"; 100 | else { 101 | errtype = PyExc_SyntaxError; 102 | if (err->token == TYPE_COMMENT) 103 | msg = "misplaced type annotation"; 104 | else 105 | msg = "invalid syntax"; 106 | } 107 | break; 108 | case E_TOKEN: 109 | msg = "invalid token"; 110 | break; 111 | case E_EOFS: 112 | msg = "EOF while scanning triple-quoted string literal"; 113 | break; 114 | case E_EOLS: 115 | msg = "EOL while scanning string literal"; 116 | break; 117 | case E_INTR: 118 | if (!PyErr_Occurred()) 119 | PyErr_SetNone(PyExc_KeyboardInterrupt); 120 | goto cleanup; 121 | case E_NOMEM: 122 | PyErr_NoMemory(); 123 | goto cleanup; 124 | case E_EOF: 125 | msg = "unexpected EOF while parsing"; 126 | break; 127 | case E_TABSPACE: 128 | errtype = PyExc_TabError; 129 | msg = "inconsistent use of tabs and spaces in indentation"; 130 | break; 131 | case E_OVERFLOW: 132 | msg = "expression too long"; 133 | break; 134 | case E_DEDENT: 135 | errtype = PyExc_IndentationError; 136 | msg = "unindent does not match any outer indentation level"; 137 | break; 138 | case E_TOODEEP: 139 | errtype = PyExc_IndentationError; 140 | msg = "too many levels of indentation"; 141 | break; 142 | case E_DECODE: { 143 | PyObject *type, *value, *tb; 144 | PyErr_Fetch(&type, &value, &tb); 145 | msg = "unknown decode error"; 146 | if (value != NULL) 147 | msg_obj = PyObject_Str(value); 148 | Py_XDECREF(type); 149 | Py_XDECREF(value); 150 | Py_XDECREF(tb); 151 | break; 152 | } 153 | case E_LINECONT: 154 | msg = "unexpected character after line continuation character"; 155 | break; 156 | default: 157 | fprintf(stderr, "error=%d\n", err->error); 158 | msg = "unknown parsing error"; 159 | break; 160 | } 161 | /* err->text may not be UTF-8 in case of decoding errors. 162 | Explicitly convert to an object. */ 163 | if (!err->text) { 164 | errtext = Py_None; 165 | Py_INCREF(Py_None); 166 | } else { 167 | errtext = PyUnicode_DecodeUTF8(err->text, err->offset, 168 | "replace"); 169 | if (errtext != NULL) { 170 | Py_ssize_t len = strlen(err->text); 171 | offset = (int)PyUnicode_GET_LENGTH(errtext); 172 | if (len != err->offset) { 173 | Py_DECREF(errtext); 174 | errtext = PyUnicode_DecodeUTF8(err->text, len, 175 | "replace"); 176 | } 177 | } 178 | } 179 | v = Py_BuildValue("(OiiN)", err->filename, 180 | err->lineno, offset, errtext); 181 | if (v != NULL) { 182 | if (msg_obj) 183 | w = Py_BuildValue("(OO)", msg_obj, v); 184 | else 185 | w = Py_BuildValue("(sO)", msg, v); 186 | } else 187 | w = NULL; 188 | Py_XDECREF(v); 189 | PyErr_SetObject(errtype, w); 190 | Py_XDECREF(w); 191 | cleanup: 192 | Py_XDECREF(msg_obj); 193 | if (err->text != NULL) { 194 | PyObject_FREE(err->text); 195 | err->text = NULL; 196 | } 197 | } 198 | 199 | // from Python/pythonrun.c 200 | static void 201 | err_free(perrdetail *err) 202 | { 203 | Py_CLEAR(err->filename); 204 | } 205 | 206 | // copy of PyParser_ASTFromStringObject in Python/pythonrun.c 207 | /* Preferred access to parser is through AST. */ 208 | static mod_ty 209 | string_object_to_c_ast(const char *s, PyObject *filename, int start, 210 | PyCompilerFlags *flags, PyArena *arena) 211 | { 212 | mod_ty mod; 213 | PyCompilerFlags localflags; 214 | perrdetail err; 215 | int iflags = PARSER_FLAGS(flags); 216 | 217 | node *n = Ta27Parser_ParseStringObject(s, filename, 218 | &_Ta27Parser_Grammar, start, &err, 219 | &iflags); 220 | if (flags == NULL) { 221 | localflags.cf_flags = 0; 222 | flags = &localflags; 223 | } 224 | if (n) { 225 | flags->cf_flags |= iflags & PyCF_MASK; 226 | mod = Ta27AST_FromNode(n, flags, PyUnicode_AsUTF8(filename), arena); 227 | Ta27Node_Free(n); 228 | } 229 | else { 230 | err_input(&err); 231 | mod = NULL; 232 | } 233 | err_free(&err); 234 | return mod; 235 | } 236 | 237 | // adapted from Py_CompileStringObject in Python/pythonrun.c 238 | static PyObject * 239 | string_object_to_py_ast(const char *str, PyObject *filename, int start, 240 | PyCompilerFlags *flags) 241 | { 242 | mod_ty mod; 243 | PyObject *result; 244 | PyArena *arena = PyArena_New(); 245 | if (arena == NULL) 246 | return NULL; 247 | 248 | mod = string_object_to_c_ast(str, filename, start, flags, arena); 249 | if (mod == NULL) { 250 | PyArena_Free(arena); 251 | return NULL; 252 | } 253 | 254 | result = Ta27AST_mod2obj(mod); 255 | PyArena_Free(arena); 256 | return result; 257 | } 258 | 259 | // adapted from builtin_compile_impl in Python/bltinmodule.c 260 | static PyObject * 261 | ast27_parse_impl(PyObject *source, 262 | PyObject *filename, const char *mode) 263 | { 264 | PyObject *source_copy; 265 | const char *str; 266 | int compile_mode = -1; 267 | PyCompilerFlags cf; 268 | int start[] = {file_input, eval_input, single_input, func_type_input }; 269 | PyObject *result; 270 | 271 | cf.cf_flags = PyCF_ONLY_AST | PyCF_SOURCE_IS_UTF8; 272 | 273 | if (strcmp(mode, "exec") == 0) 274 | compile_mode = 0; 275 | else if (strcmp(mode, "eval") == 0) 276 | compile_mode = 1; 277 | else if (strcmp(mode, "single") == 0) 278 | compile_mode = 2; 279 | else if (strcmp(mode, "func_type") == 0) 280 | compile_mode = 3; 281 | else { 282 | PyErr_SetString(PyExc_ValueError, 283 | "parse() mode must be 'exec', 'eval', 'single', for 'func_type'"); 284 | goto error; 285 | } 286 | 287 | str = source_as_string(source, "parse", "string or bytes", &cf, &source_copy); 288 | if (str == NULL) 289 | goto error; 290 | 291 | result = string_object_to_py_ast(str, filename, start[compile_mode], &cf); 292 | Py_XDECREF(source_copy); 293 | goto finally; 294 | 295 | error: 296 | result = NULL; 297 | finally: 298 | Py_DECREF(filename); 299 | return result; 300 | } 301 | 302 | // adapted from builtin_compile in Python/clinic/bltinmodule.c.h 303 | PyObject * 304 | ast27_parse(PyObject *self, PyObject *args) 305 | { 306 | PyObject *return_value = NULL; 307 | PyObject *source; 308 | PyObject *filename; 309 | const char *mode; 310 | 311 | if (PyArg_ParseTuple(args, "OO&s:parse", &source, PyUnicode_FSDecoder, &filename, &mode)) 312 | return_value = ast27_parse_impl(source, filename, mode); 313 | 314 | return return_value; 315 | } 316 | -------------------------------------------------------------------------------- /ast27/Python/mystrtoul.c: -------------------------------------------------------------------------------- 1 | 2 | #include "Python.h" 3 | 4 | #if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE) 5 | #define _SGI_MP_SOURCE 6 | #endif 7 | 8 | /* strtol and strtoul, renamed to avoid conflicts */ 9 | 10 | 11 | #include 12 | #ifdef HAVE_ERRNO_H 13 | #include 14 | #endif 15 | 16 | /* Static overflow check values for bases 2 through 36. 17 | * smallmax[base] is the largest unsigned long i such that 18 | * i * base doesn't overflow unsigned long. 19 | */ 20 | static unsigned long smallmax[] = { 21 | 0, /* bases 0 and 1 are invalid */ 22 | 0, 23 | ULONG_MAX / 2, 24 | ULONG_MAX / 3, 25 | ULONG_MAX / 4, 26 | ULONG_MAX / 5, 27 | ULONG_MAX / 6, 28 | ULONG_MAX / 7, 29 | ULONG_MAX / 8, 30 | ULONG_MAX / 9, 31 | ULONG_MAX / 10, 32 | ULONG_MAX / 11, 33 | ULONG_MAX / 12, 34 | ULONG_MAX / 13, 35 | ULONG_MAX / 14, 36 | ULONG_MAX / 15, 37 | ULONG_MAX / 16, 38 | ULONG_MAX / 17, 39 | ULONG_MAX / 18, 40 | ULONG_MAX / 19, 41 | ULONG_MAX / 20, 42 | ULONG_MAX / 21, 43 | ULONG_MAX / 22, 44 | ULONG_MAX / 23, 45 | ULONG_MAX / 24, 46 | ULONG_MAX / 25, 47 | ULONG_MAX / 26, 48 | ULONG_MAX / 27, 49 | ULONG_MAX / 28, 50 | ULONG_MAX / 29, 51 | ULONG_MAX / 30, 52 | ULONG_MAX / 31, 53 | ULONG_MAX / 32, 54 | ULONG_MAX / 33, 55 | ULONG_MAX / 34, 56 | ULONG_MAX / 35, 57 | ULONG_MAX / 36, 58 | }; 59 | 60 | /* maximum digits that can't ever overflow for bases 2 through 36, 61 | * calculated by [int(math.floor(math.log(2**32, i))) for i in range(2, 37)]. 62 | * Note that this is pessimistic if sizeof(long) > 4. 63 | */ 64 | #if SIZEOF_LONG == 4 65 | static int digitlimit[] = { 66 | 0, 0, 32, 20, 16, 13, 12, 11, 10, 10, /* 0 - 9 */ 67 | 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, /* 10 - 19 */ 68 | 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, /* 20 - 29 */ 69 | 6, 6, 6, 6, 6, 6, 6}; /* 30 - 36 */ 70 | #elif SIZEOF_LONG == 8 71 | /* [int(math.floor(math.log(2**64, i))) for i in range(2, 37)] */ 72 | static int digitlimit[] = { 73 | 0, 0, 64, 40, 32, 27, 24, 22, 21, 20, /* 0 - 9 */ 74 | 19, 18, 17, 17, 16, 16, 16, 15, 15, 15, /* 10 - 19 */ 75 | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, /* 20 - 29 */ 76 | 13, 12, 12, 12, 12, 12, 12}; /* 30 - 36 */ 77 | #else 78 | #error "Need table for SIZEOF_LONG" 79 | #endif 80 | 81 | /* Table of digit values for 8-bit string -> integer conversion. 82 | * '0' maps to 0, ..., '9' maps to 9. 83 | * 'a' and 'A' map to 10, ..., 'z' and 'Z' map to 35. 84 | * All other indices map to 37. 85 | * Note that when converting a base B string, a char c is a legitimate 86 | * base B digit iff digitvalue[Py_CHARPyLong_MASK(c)] < B. 87 | */ 88 | unsigned char digitvalue[256] = { 89 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 90 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 91 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 92 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 37, 37, 37, 37, 37, 37, 93 | 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 94 | 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, 95 | 37, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 96 | 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 37, 37, 37, 37, 97 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 98 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 99 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 100 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 101 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 102 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 103 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 104 | 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 105 | }; 106 | 107 | /* 108 | ** strtoul 109 | ** This is a general purpose routine for converting 110 | ** an ascii string to an integer in an arbitrary base. 111 | ** Leading white space is ignored. If 'base' is zero 112 | ** it looks for a leading 0, 0b, 0B, 0o, 0O, 0x or 0X 113 | ** to tell which base. If these are absent it defaults 114 | ** to 10. Base must be 0 or between 2 and 36 (inclusive). 115 | ** If 'ptr' is non-NULL it will contain a pointer to 116 | ** the end of the scan. 117 | ** Errors due to bad pointers will probably result in 118 | ** exceptions - we don't check for them. 119 | */ 120 | unsigned long 121 | Ta27OS_strtoul(register char *str, char **ptr, int base) 122 | { 123 | register unsigned long result = 0; /* return value of the function */ 124 | register int c; /* current input character */ 125 | register int ovlimit; /* required digits to overflow */ 126 | 127 | /* skip leading white space */ 128 | while (*str && isspace(Py_CHARMASK(*str))) 129 | ++str; 130 | 131 | /* check for leading 0 or 0x for auto-base or base 16 */ 132 | switch (base) { 133 | case 0: /* look for leading 0, 0b, 0o or 0x */ 134 | if (*str == '0') { 135 | ++str; 136 | if (*str == 'x' || *str == 'X') { 137 | /* there must be at least one digit after 0x */ 138 | if (digitvalue[Py_CHARMASK(str[1])] >= 16) { 139 | if (ptr) 140 | *ptr = str; 141 | return 0; 142 | } 143 | ++str; 144 | base = 16; 145 | } else if (*str == 'o' || *str == 'O') { 146 | /* there must be at least one digit after 0o */ 147 | if (digitvalue[Py_CHARMASK(str[1])] >= 8) { 148 | if (ptr) 149 | *ptr = str; 150 | return 0; 151 | } 152 | ++str; 153 | base = 8; 154 | } else if (*str == 'b' || *str == 'B') { 155 | /* there must be at least one digit after 0b */ 156 | if (digitvalue[Py_CHARMASK(str[1])] >= 2) { 157 | if (ptr) 158 | *ptr = str; 159 | return 0; 160 | } 161 | ++str; 162 | base = 2; 163 | } else { 164 | base = 8; 165 | } 166 | } 167 | else 168 | base = 10; 169 | break; 170 | 171 | case 2: /* skip leading 0b or 0B */ 172 | if (*str == '0') { 173 | ++str; 174 | if (*str == 'b' || *str == 'B') { 175 | /* there must be at least one digit after 0b */ 176 | if (digitvalue[Py_CHARMASK(str[1])] >= 2) { 177 | if (ptr) 178 | *ptr = str; 179 | return 0; 180 | } 181 | ++str; 182 | } 183 | } 184 | break; 185 | 186 | case 8: /* skip leading 0o or 0O */ 187 | if (*str == '0') { 188 | ++str; 189 | if (*str == 'o' || *str == 'O') { 190 | /* there must be at least one digit after 0o */ 191 | if (digitvalue[Py_CHARMASK(str[1])] >= 8) { 192 | if (ptr) 193 | *ptr = str; 194 | return 0; 195 | } 196 | ++str; 197 | } 198 | } 199 | break; 200 | 201 | case 16: /* skip leading 0x or 0X */ 202 | if (*str == '0') { 203 | ++str; 204 | if (*str == 'x' || *str == 'X') { 205 | /* there must be at least one digit after 0x */ 206 | if (digitvalue[Py_CHARMASK(str[1])] >= 16) { 207 | if (ptr) 208 | *ptr = str; 209 | return 0; 210 | } 211 | ++str; 212 | } 213 | } 214 | break; 215 | } 216 | 217 | /* catch silly bases */ 218 | if (base < 2 || base > 36) { 219 | if (ptr) 220 | *ptr = str; 221 | return 0; 222 | } 223 | 224 | /* skip leading zeroes */ 225 | while (*str == '0') 226 | ++str; 227 | 228 | /* base is guaranteed to be in [2, 36] at this point */ 229 | ovlimit = digitlimit[base]; 230 | 231 | /* do the conversion until non-digit character encountered */ 232 | while ((c = digitvalue[Py_CHARMASK(*str)]) < base) { 233 | if (ovlimit > 0) /* no overflow check required */ 234 | result = result * base + c; 235 | else { /* requires overflow check */ 236 | register unsigned long temp_result; 237 | 238 | if (ovlimit < 0) /* guaranteed overflow */ 239 | goto overflowed; 240 | 241 | /* there could be an overflow */ 242 | /* check overflow just from shifting */ 243 | if (result > smallmax[base]) 244 | goto overflowed; 245 | 246 | result *= base; 247 | 248 | /* check overflow from the digit's value */ 249 | temp_result = result + c; 250 | if (temp_result < result) 251 | goto overflowed; 252 | 253 | result = temp_result; 254 | } 255 | 256 | ++str; 257 | --ovlimit; 258 | } 259 | 260 | /* set pointer to point to the last character scanned */ 261 | if (ptr) 262 | *ptr = str; 263 | 264 | return result; 265 | 266 | overflowed: 267 | if (ptr) { 268 | /* spool through remaining digit characters */ 269 | while (digitvalue[Py_CHARMASK(*str)] < base) 270 | ++str; 271 | *ptr = str; 272 | } 273 | errno = ERANGE; 274 | return (unsigned long)-1; 275 | } 276 | 277 | /* Checking for overflow in PyOS_strtol is a PITA; see comments 278 | * about PY_ABS_LONG_MIN in longobject.c. 279 | */ 280 | #define PY_ABS_LONG_MIN (0-(unsigned long)LONG_MIN) 281 | 282 | long 283 | Ta27OS_strtol(char *str, char **ptr, int base) 284 | { 285 | long result; 286 | unsigned long uresult; 287 | char sign; 288 | 289 | while (*str && isspace(Py_CHARMASK(*str))) 290 | str++; 291 | 292 | sign = *str; 293 | if (sign == '+' || sign == '-') 294 | str++; 295 | 296 | uresult = Ta27OS_strtoul(str, ptr, base); 297 | 298 | if (uresult <= (unsigned long)LONG_MAX) { 299 | result = (long)uresult; 300 | if (sign == '-') 301 | result = -result; 302 | } 303 | else if (sign == '-' && uresult == PY_ABS_LONG_MIN) { 304 | result = LONG_MIN; 305 | } 306 | else { 307 | errno = ERANGE; 308 | result = LONG_MAX; 309 | } 310 | return result; 311 | } 312 | -------------------------------------------------------------------------------- /ast3/Custom/typed_ast.c: -------------------------------------------------------------------------------- 1 | #include "Python.h" 2 | #include "../Include/Python-ast.h" 3 | #include "../Include/node.h" 4 | #include "../Include/grammar.h" 5 | #include "../Include/token.h" 6 | #include "../Include/ast.h" 7 | #include "../Include/parsetok.h" 8 | #include "../Include/errcode.h" 9 | #include "../Include/graminit.h" 10 | 11 | extern grammar _Ta3Parser_Grammar; /* from graminit.c */ 12 | 13 | // from Python/bltinmodule.c 14 | static const char * 15 | source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) 16 | { 17 | const char *str; 18 | Py_ssize_t size; 19 | Py_buffer view; 20 | 21 | *cmd_copy = NULL; 22 | if (PyUnicode_Check(cmd)) { 23 | cf->cf_flags |= PyCF_IGNORE_COOKIE; 24 | str = PyUnicode_AsUTF8AndSize(cmd, &size); 25 | if (str == NULL) 26 | return NULL; 27 | } 28 | else if (PyBytes_Check(cmd)) { 29 | str = PyBytes_AS_STRING(cmd); 30 | size = PyBytes_GET_SIZE(cmd); 31 | } 32 | else if (PyByteArray_Check(cmd)) { 33 | str = PyByteArray_AS_STRING(cmd); 34 | size = PyByteArray_GET_SIZE(cmd); 35 | } 36 | else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) { 37 | /* Copy to NUL-terminated buffer. */ 38 | *cmd_copy = PyBytes_FromStringAndSize( 39 | (const char *)view.buf, view.len); 40 | PyBuffer_Release(&view); 41 | if (*cmd_copy == NULL) { 42 | return NULL; 43 | } 44 | str = PyBytes_AS_STRING(*cmd_copy); 45 | size = PyBytes_GET_SIZE(*cmd_copy); 46 | } 47 | else { 48 | PyErr_Format(PyExc_TypeError, 49 | "%s() arg 1 must be a %s object", 50 | funcname, what); 51 | return NULL; 52 | } 53 | 54 | if (strlen(str) != (size_t)size) { 55 | PyErr_SetString(PyExc_ValueError, 56 | "source code string cannot contain null bytes"); 57 | Py_CLEAR(*cmd_copy); 58 | return NULL; 59 | } 60 | return str; 61 | } 62 | 63 | // from Python/pythonrun.c 64 | /* compute parser flags based on compiler flags */ 65 | static int PARSER_FLAGS(PyCompilerFlags *flags) 66 | { 67 | int parser_flags = 0; 68 | if (!flags) 69 | return 0; 70 | if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) 71 | parser_flags |= PyPARSE_DONT_IMPLY_DEDENT; 72 | if (flags->cf_flags & PyCF_IGNORE_COOKIE) 73 | parser_flags |= PyPARSE_IGNORE_COOKIE; 74 | if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) 75 | parser_flags |= PyPARSE_BARRY_AS_BDFL; 76 | return parser_flags; 77 | } 78 | 79 | // from Python/pythonrun.c 80 | /* Set the error appropriate to the given input error code (see errcode.h) */ 81 | static void 82 | err_input(perrdetail *err) 83 | { 84 | PyObject *v, *w, *errtype, *errtext; 85 | PyObject *msg_obj = NULL; 86 | char *msg = NULL; 87 | int offset = err->offset; 88 | 89 | errtype = PyExc_SyntaxError; 90 | switch (err->error) { 91 | case E_ERROR: 92 | return; 93 | case E_SYNTAX: 94 | errtype = PyExc_IndentationError; 95 | if (err->expected == INDENT) 96 | msg = "expected an indented block"; 97 | else if (err->token == INDENT) 98 | msg = "unexpected indent"; 99 | else if (err->token == DEDENT) 100 | msg = "unexpected unindent"; 101 | else { 102 | errtype = PyExc_SyntaxError; 103 | if (err->token == TYPE_COMMENT) 104 | msg = "misplaced type annotation"; 105 | else 106 | msg = "invalid syntax"; 107 | } 108 | break; 109 | case E_TOKEN: 110 | msg = "invalid token"; 111 | break; 112 | case E_EOFS: 113 | msg = "EOF while scanning triple-quoted string literal"; 114 | break; 115 | case E_EOLS: 116 | msg = "EOL while scanning string literal"; 117 | break; 118 | case E_INTR: 119 | if (!PyErr_Occurred()) 120 | PyErr_SetNone(PyExc_KeyboardInterrupt); 121 | goto cleanup; 122 | case E_NOMEM: 123 | PyErr_NoMemory(); 124 | goto cleanup; 125 | case E_EOF: 126 | msg = "unexpected EOF while parsing"; 127 | break; 128 | case E_TABSPACE: 129 | errtype = PyExc_TabError; 130 | msg = "inconsistent use of tabs and spaces in indentation"; 131 | break; 132 | case E_OVERFLOW: 133 | msg = "expression too long"; 134 | break; 135 | case E_DEDENT: 136 | errtype = PyExc_IndentationError; 137 | msg = "unindent does not match any outer indentation level"; 138 | break; 139 | case E_TOODEEP: 140 | errtype = PyExc_IndentationError; 141 | msg = "too many levels of indentation"; 142 | break; 143 | case E_DECODE: { 144 | PyObject *type, *value, *tb; 145 | PyErr_Fetch(&type, &value, &tb); 146 | msg = "unknown decode error"; 147 | if (value != NULL) 148 | msg_obj = PyObject_Str(value); 149 | Py_XDECREF(type); 150 | Py_XDECREF(value); 151 | Py_XDECREF(tb); 152 | break; 153 | } 154 | case E_LINECONT: 155 | msg = "unexpected character after line continuation character"; 156 | break; 157 | 158 | case E_IDENTIFIER: 159 | msg = "invalid character in identifier"; 160 | break; 161 | case E_BADSINGLE: 162 | msg = "multiple statements found while compiling a single statement"; 163 | break; 164 | default: 165 | fprintf(stderr, "error=%d\n", err->error); 166 | msg = "unknown parsing error"; 167 | break; 168 | } 169 | /* err->text may not be UTF-8 in case of decoding errors. 170 | Explicitly convert to an object. */ 171 | if (!err->text) { 172 | errtext = Py_None; 173 | Py_INCREF(Py_None); 174 | } else { 175 | errtext = PyUnicode_DecodeUTF8(err->text, err->offset, 176 | "replace"); 177 | if (errtext != NULL) { 178 | Py_ssize_t len = strlen(err->text); 179 | offset = (int)PyUnicode_GET_LENGTH(errtext); 180 | if (len != err->offset) { 181 | Py_DECREF(errtext); 182 | errtext = PyUnicode_DecodeUTF8(err->text, len, 183 | "replace"); 184 | } 185 | } 186 | } 187 | v = Py_BuildValue("(OiiN)", err->filename, 188 | err->lineno, offset, errtext); 189 | if (v != NULL) { 190 | if (msg_obj) 191 | w = Py_BuildValue("(OO)", msg_obj, v); 192 | else 193 | w = Py_BuildValue("(sO)", msg, v); 194 | } else 195 | w = NULL; 196 | Py_XDECREF(v); 197 | PyErr_SetObject(errtype, w); 198 | Py_XDECREF(w); 199 | cleanup: 200 | Py_XDECREF(msg_obj); 201 | if (err->text != NULL) { 202 | PyObject_FREE(err->text); 203 | err->text = NULL; 204 | } 205 | } 206 | 207 | // from Python/pythonrun.c 208 | static void 209 | err_free(perrdetail *err) 210 | { 211 | Py_CLEAR(err->filename); 212 | } 213 | 214 | // from Python/pythonrun.c 215 | node * 216 | Ta3Parser_SimpleParseStringFlagsFilename(const char *str, const char *filename, 217 | int start, int flags) 218 | { 219 | perrdetail err; 220 | node *n = Ta3Parser_ParseStringFlagsFilename(str, filename, 221 | &_Ta3Parser_Grammar, start, &err, flags); 222 | if (n == NULL) 223 | err_input(&err); 224 | err_free(&err); 225 | return n; 226 | } 227 | 228 | /* update compiler and parser flags based on feature version */ 229 | void 230 | _Ta3Parser_UpdateFlags(PyCompilerFlags *flags, int *iflags, int feature_version) 231 | { 232 | *iflags = PARSER_FLAGS(flags); 233 | if (feature_version >= 7) 234 | *iflags |= PyPARSE_ASYNC_ALWAYS; 235 | flags->cf_flags |= *iflags & PyCF_MASK; 236 | } 237 | 238 | // copy of PyParser_ASTFromStringObject in Python/pythonrun.c 239 | /* Preferred access to parser is through AST. */ 240 | static mod_ty 241 | string_object_to_c_ast(const char *s, PyObject *filename, int start, 242 | PyCompilerFlags *flags, int feature_version, 243 | PyArena *arena) 244 | { 245 | mod_ty mod; 246 | PyCompilerFlags localflags; 247 | perrdetail err; 248 | node *n; 249 | int iflags; 250 | 251 | if (flags == NULL) { 252 | localflags.cf_flags = 0; 253 | flags = &localflags; 254 | } 255 | _Ta3Parser_UpdateFlags(flags, &iflags, feature_version); 256 | n = Ta3Parser_ParseStringObject(s, filename, 257 | &_Ta3Parser_Grammar, start, &err, 258 | &iflags); 259 | if (n) { 260 | flags->cf_flags |= iflags & PyCF_MASK; 261 | mod = Ta3AST_FromNodeObject(n, flags, filename, feature_version, arena); 262 | Ta3Node_Free(n); 263 | } 264 | else { 265 | err_input(&err); 266 | mod = NULL; 267 | } 268 | err_free(&err); 269 | return mod; 270 | } 271 | 272 | // adapted from Py_CompileStringObject in Python/pythonrun.c 273 | static PyObject * 274 | string_object_to_py_ast(const char *str, PyObject *filename, int start, 275 | PyCompilerFlags *flags, int feature_version) 276 | { 277 | mod_ty mod; 278 | PyObject *result; 279 | PyArena *arena = PyArena_New(); 280 | if (arena == NULL) 281 | return NULL; 282 | 283 | mod = string_object_to_c_ast(str, filename, start, flags, feature_version, arena); 284 | if (mod == NULL) { 285 | PyArena_Free(arena); 286 | return NULL; 287 | } 288 | 289 | result = Ta3AST_mod2obj(mod); 290 | PyArena_Free(arena); 291 | return result; 292 | } 293 | 294 | // adapted from builtin_compile_impl in Python/bltinmodule.c 295 | static PyObject * 296 | ast3_parse_impl(PyObject *source, 297 | PyObject *filename, 298 | const char *mode, 299 | int feature_version) 300 | { 301 | PyObject *source_copy; 302 | const char *str; 303 | int compile_mode = -1; 304 | PyCompilerFlags cf; 305 | int start[] = {file_input, eval_input, single_input, func_type_input}; 306 | PyObject *result; 307 | 308 | cf.cf_flags = PyCF_ONLY_AST | PyCF_SOURCE_IS_UTF8; 309 | 310 | if (strcmp(mode, "exec") == 0) 311 | compile_mode = 0; 312 | else if (strcmp(mode, "eval") == 0) 313 | compile_mode = 1; 314 | else if (strcmp(mode, "single") == 0) 315 | compile_mode = 2; 316 | else if (strcmp(mode, "func_type") == 0) 317 | compile_mode = 3; 318 | else { 319 | PyErr_SetString(PyExc_ValueError, 320 | "parse() mode must be 'exec', 'eval', 'single', for 'func_type'"); 321 | goto error; 322 | } 323 | 324 | str = source_as_string(source, "parse", "string or bytes", &cf, &source_copy); 325 | if (str == NULL) 326 | goto error; 327 | 328 | result = string_object_to_py_ast(str, filename, start[compile_mode], &cf, feature_version); 329 | Py_XDECREF(source_copy); 330 | goto finally; 331 | 332 | error: 333 | result = NULL; 334 | finally: 335 | Py_DECREF(filename); 336 | return result; 337 | } 338 | 339 | // adapted from builtin_compile in Python/clinic/bltinmodule.c.h 340 | PyObject * 341 | ast3_parse(PyObject *self, PyObject *args) 342 | { 343 | PyObject *return_value = NULL; 344 | PyObject *source; 345 | PyObject *filename; 346 | const char *mode; 347 | int feature_version; 348 | 349 | if (PyArg_ParseTuple(args, "OO&si:parse", &source, PyUnicode_FSDecoder, &filename, &mode, &feature_version)) 350 | return_value = ast3_parse_impl(source, filename, mode, feature_version); 351 | 352 | return return_value; 353 | } 354 | -------------------------------------------------------------------------------- /ast27/Parser/asdl.py: -------------------------------------------------------------------------------- 1 | """An implementation of the Zephyr Abstract Syntax Definition Language. 2 | 3 | See http://asdl.sourceforge.net/ and 4 | http://www.cs.princeton.edu/research/techreps/TR-554-97 5 | 6 | Only supports top level module decl, not view. I'm guessing that view 7 | is intended to support the browser and I'm not interested in the 8 | browser. 9 | 10 | Changes for Python: Add support for module versions 11 | """ 12 | 13 | import os 14 | import traceback 15 | 16 | import spark 17 | 18 | class Token(object): 19 | # spark seems to dispatch in the parser based on a token's 20 | # type attribute 21 | def __init__(self, type, lineno): 22 | self.type = type 23 | self.lineno = lineno 24 | 25 | def __str__(self): 26 | return self.type 27 | 28 | def __repr__(self): 29 | return str(self) 30 | 31 | class Id(Token): 32 | def __init__(self, value, lineno): 33 | self.type = 'Id' 34 | self.value = value 35 | self.lineno = lineno 36 | 37 | def __str__(self): 38 | return self.value 39 | 40 | class String(Token): 41 | def __init__(self, value, lineno): 42 | self.type = 'String' 43 | self.value = value 44 | self.lineno = lineno 45 | 46 | class ASDLSyntaxError(Exception): 47 | 48 | def __init__(self, lineno, token=None, msg=None): 49 | self.lineno = lineno 50 | self.token = token 51 | self.msg = msg 52 | 53 | def __str__(self): 54 | if self.msg is None: 55 | return "Error at '%s', line %d" % (self.token, self.lineno) 56 | else: 57 | return "%s, line %d" % (self.msg, self.lineno) 58 | 59 | class ASDLScanner(spark.GenericScanner, object): 60 | 61 | def tokenize(self, input): 62 | self.rv = [] 63 | self.lineno = 1 64 | super(ASDLScanner, self).tokenize(input) 65 | return self.rv 66 | 67 | def t_id(self, s): 68 | r"[\w\.]+" 69 | # XXX doesn't distinguish upper vs. lower, which is 70 | # significant for ASDL. 71 | self.rv.append(Id(s, self.lineno)) 72 | 73 | def t_string(self, s): 74 | r'"[^"]*"' 75 | self.rv.append(String(s, self.lineno)) 76 | 77 | def t_xxx(self, s): # not sure what this production means 78 | r"<=" 79 | self.rv.append(Token(s, self.lineno)) 80 | 81 | def t_punctuation(self, s): 82 | r"[\{\}\*\=\|\(\)\,\?\:]" 83 | self.rv.append(Token(s, self.lineno)) 84 | 85 | def t_comment(self, s): 86 | r"\-\-[^\n]*" 87 | pass 88 | 89 | def t_newline(self, s): 90 | r"\n" 91 | self.lineno += 1 92 | 93 | def t_whitespace(self, s): 94 | r"[ \t]+" 95 | pass 96 | 97 | def t_default(self, s): 98 | r" . +" 99 | raise ValueError, "unmatched input: %s" % `s` 100 | 101 | class ASDLParser(spark.GenericParser, object): 102 | def __init__(self): 103 | super(ASDLParser, self).__init__("module") 104 | 105 | def typestring(self, tok): 106 | return tok.type 107 | 108 | def error(self, tok): 109 | raise ASDLSyntaxError(tok.lineno, tok) 110 | 111 | def p_module_0(self, (module, name, version, _0, _1)): 112 | " module ::= Id Id version { } " 113 | if module.value != "module": 114 | raise ASDLSyntaxError(module.lineno, 115 | msg="expected 'module', found %s" % module) 116 | return Module(name, None, version) 117 | 118 | def p_module(self, (module, name, version, _0, definitions, _1)): 119 | " module ::= Id Id version { definitions } " 120 | if module.value != "module": 121 | raise ASDLSyntaxError(module.lineno, 122 | msg="expected 'module', found %s" % module) 123 | return Module(name, definitions, version) 124 | 125 | def p_version(self, (version, V)): 126 | "version ::= Id String" 127 | if version.value != "version": 128 | raise ASDLSyntaxError(version.lineno, 129 | msg="expected 'version', found %" % version) 130 | return V 131 | 132 | def p_definition_0(self, (definition,)): 133 | " definitions ::= definition " 134 | return definition 135 | 136 | def p_definition_1(self, (definitions, definition)): 137 | " definitions ::= definition definitions " 138 | return definitions + definition 139 | 140 | def p_definition(self, (id, _, type)): 141 | " definition ::= Id = type " 142 | return [Type(id, type)] 143 | 144 | def p_type_0(self, (product,)): 145 | " type ::= product " 146 | return product 147 | 148 | def p_type_1(self, (sum,)): 149 | " type ::= sum " 150 | return Sum(sum) 151 | 152 | def p_type_2(self, (sum, id, _0, attributes, _1)): 153 | " type ::= sum Id ( fields ) " 154 | if id.value != "attributes": 155 | raise ASDLSyntaxError(id.lineno, 156 | msg="expected attributes, found %s" % id) 157 | if attributes: 158 | attributes.reverse() 159 | return Sum(sum, attributes) 160 | 161 | def p_product(self, (_0, fields, _1)): 162 | " product ::= ( fields ) " 163 | # XXX can't I just construct things in the right order? 164 | fields.reverse() 165 | return Product(fields) 166 | 167 | def p_sum_0(self, (constructor,)): 168 | " sum ::= constructor " 169 | return [constructor] 170 | 171 | def p_sum_1(self, (constructor, _, sum)): 172 | " sum ::= constructor | sum " 173 | return [constructor] + sum 174 | 175 | def p_sum_2(self, (constructor, _, sum)): 176 | " sum ::= constructor | sum " 177 | return [constructor] + sum 178 | 179 | def p_constructor_0(self, (id,)): 180 | " constructor ::= Id " 181 | return Constructor(id) 182 | 183 | def p_constructor_1(self, (id, _0, fields, _1)): 184 | " constructor ::= Id ( fields ) " 185 | # XXX can't I just construct things in the right order? 186 | fields.reverse() 187 | return Constructor(id, fields) 188 | 189 | def p_fields_0(self, (field,)): 190 | " fields ::= field " 191 | return [field] 192 | 193 | def p_fields_1(self, (field, _, fields)): 194 | " fields ::= field , fields " 195 | return fields + [field] 196 | 197 | def p_field_0(self, (type,)): 198 | " field ::= Id " 199 | return Field(type) 200 | 201 | def p_field_1(self, (type, name)): 202 | " field ::= Id Id " 203 | return Field(type, name) 204 | 205 | def p_field_2(self, (type, _, name)): 206 | " field ::= Id * Id " 207 | return Field(type, name, seq=True) 208 | 209 | def p_field_3(self, (type, _, name)): 210 | " field ::= Id ? Id " 211 | return Field(type, name, opt=True) 212 | 213 | def p_field_4(self, (type, _)): 214 | " field ::= Id * " 215 | return Field(type, seq=True) 216 | 217 | def p_field_5(self, (type, _)): 218 | " field ::= Id ? " 219 | return Field(type, opt=True) 220 | 221 | builtin_types = ("identifier", "string", "int", "bool", "object") 222 | 223 | # below is a collection of classes to capture the AST of an AST :-) 224 | # not sure if any of the methods are useful yet, but I'm adding them 225 | # piecemeal as they seem helpful 226 | 227 | class AST(object): 228 | pass # a marker class 229 | 230 | class Module(AST): 231 | def __init__(self, name, dfns, version): 232 | self.name = name 233 | self.dfns = dfns 234 | self.version = version 235 | self.types = {} # maps type name to value (from dfns) 236 | for type in dfns: 237 | self.types[type.name.value] = type.value 238 | 239 | def __repr__(self): 240 | return "Module(%s, %s)" % (self.name, self.dfns) 241 | 242 | class Type(AST): 243 | def __init__(self, name, value): 244 | self.name = name 245 | self.value = value 246 | 247 | def __repr__(self): 248 | return "Type(%s, %s)" % (self.name, self.value) 249 | 250 | class Constructor(AST): 251 | def __init__(self, name, fields=None): 252 | self.name = name 253 | self.fields = fields or [] 254 | 255 | def __repr__(self): 256 | return "Constructor(%s, %s)" % (self.name, self.fields) 257 | 258 | class Field(AST): 259 | def __init__(self, type, name=None, seq=False, opt=False): 260 | self.type = type 261 | self.name = name 262 | self.seq = seq 263 | self.opt = opt 264 | 265 | def __repr__(self): 266 | if self.seq: 267 | extra = ", seq=True" 268 | elif self.opt: 269 | extra = ", opt=True" 270 | else: 271 | extra = "" 272 | if self.name is None: 273 | return "Field(%s%s)" % (self.type, extra) 274 | else: 275 | return "Field(%s, %s%s)" % (self.type, self.name, extra) 276 | 277 | class Sum(AST): 278 | def __init__(self, types, attributes=None): 279 | self.types = types 280 | self.attributes = attributes or [] 281 | 282 | def __repr__(self): 283 | if self.attributes is None: 284 | return "Sum(%s)" % self.types 285 | else: 286 | return "Sum(%s, %s)" % (self.types, self.attributes) 287 | 288 | class Product(AST): 289 | def __init__(self, fields): 290 | self.fields = fields 291 | 292 | def __repr__(self): 293 | return "Product(%s)" % self.fields 294 | 295 | class VisitorBase(object): 296 | 297 | def __init__(self, skip=False): 298 | self.cache = {} 299 | self.skip = skip 300 | 301 | def visit(self, object, *args): 302 | meth = self._dispatch(object) 303 | if meth is None: 304 | return 305 | try: 306 | meth(object, *args) 307 | except Exception, err: 308 | print "Error visiting", repr(object) 309 | print err 310 | traceback.print_exc() 311 | # XXX hack 312 | if hasattr(self, 'file'): 313 | self.file.flush() 314 | os._exit(1) 315 | 316 | def _dispatch(self, object): 317 | assert isinstance(object, AST), repr(object) 318 | klass = object.__class__ 319 | meth = self.cache.get(klass) 320 | if meth is None: 321 | methname = "visit" + klass.__name__ 322 | if self.skip: 323 | meth = getattr(self, methname, None) 324 | else: 325 | meth = getattr(self, methname) 326 | self.cache[klass] = meth 327 | return meth 328 | 329 | class Check(VisitorBase): 330 | 331 | def __init__(self): 332 | super(Check, self).__init__(skip=True) 333 | self.cons = {} 334 | self.errors = 0 335 | self.types = {} 336 | 337 | def visitModule(self, mod): 338 | for dfn in mod.dfns: 339 | self.visit(dfn) 340 | 341 | def visitType(self, type): 342 | self.visit(type.value, str(type.name)) 343 | 344 | def visitSum(self, sum, name): 345 | for t in sum.types: 346 | self.visit(t, name) 347 | 348 | def visitConstructor(self, cons, name): 349 | key = str(cons.name) 350 | conflict = self.cons.get(key) 351 | if conflict is None: 352 | self.cons[key] = name 353 | else: 354 | print "Redefinition of constructor %s" % key 355 | print "Defined in %s and %s" % (conflict, name) 356 | self.errors += 1 357 | for f in cons.fields: 358 | self.visit(f, key) 359 | 360 | def visitField(self, field, name): 361 | key = str(field.type) 362 | l = self.types.setdefault(key, []) 363 | l.append(name) 364 | 365 | def visitProduct(self, prod, name): 366 | for f in prod.fields: 367 | self.visit(f, name) 368 | 369 | def check(mod): 370 | v = Check() 371 | v.visit(mod) 372 | 373 | for t in v.types: 374 | if t not in mod.types and not t in builtin_types: 375 | v.errors += 1 376 | uses = ", ".join(v.types[t]) 377 | print "Undefined type %s, used in %s" % (t, uses) 378 | 379 | return not v.errors 380 | 381 | def parse(file): 382 | scanner = ASDLScanner() 383 | parser = ASDLParser() 384 | 385 | buf = open(file).read() 386 | tokens = scanner.tokenize(buf) 387 | try: 388 | return parser.parse(tokens) 389 | except ASDLSyntaxError, err: 390 | print err 391 | lines = buf.split("\n") 392 | print lines[err.lineno - 1] # lines starts at 0, files at 1 393 | 394 | if __name__ == "__main__": 395 | import glob 396 | import sys 397 | 398 | if len(sys.argv) > 1: 399 | files = sys.argv[1:] 400 | else: 401 | testdir = "tests" 402 | files = glob.glob(testdir + "/*.asdl") 403 | 404 | for file in files: 405 | print file 406 | mod = parse(file) 407 | print "module", mod.name 408 | print len(mod.dfns), "definitions" 409 | if not check(mod): 410 | print "Check failed" 411 | else: 412 | for dfn in mod.dfns: 413 | print dfn.type 414 | --------------------------------------------------------------------------------