├── CMakeLists.txt ├── ClangHighlight.cpp ├── Fuzzy ├── AnnotatedToken.h ├── CMakeLists.txt ├── FuzzyAST.h ├── FuzzyASTPrinter.cpp └── FuzzyParser.cpp ├── FuzzyType.h ├── Makefile ├── OutputWriter.cpp ├── OutputWriter.h ├── TokenClassifier.cpp ├── TokenClassifier.h ├── docs ├── LibFuzzy.rst └── clang-highlight.rst ├── latex ├── clanghighlight.sty ├── fuzzyparser.pdf ├── fuzzyparser.tex ├── sample.pdf └── sample.tex └── unittests ├── CMakeLists.txt └── FuzzyParseTest.cpp /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(LLVM_LINK_COMPONENTS support) 2 | 3 | set(LLVM_USED_LIBS clangTooling) 4 | 5 | add_clang_executable(clang-highlight 6 | ClangHighlight.cpp 7 | TokenClassifier.cpp 8 | OutputWriter.cpp 9 | ) 10 | 11 | target_link_libraries(clang-highlight 12 | clangAST 13 | clangFuzzy 14 | ) 15 | include_directories("Fuzzy") 16 | 17 | install(TARGETS clang-highlight RUNTIME DESTINATION bin) 18 | 19 | add_custom_target(ClangHighlightUnitTests) 20 | set_target_properties(ClangHighlightUnitTests PROPERTIES FOLDER "Clang Highlight Unit Tests") 21 | 22 | function(add_highlight_unittest test_dirname) 23 | add_unittest(ClangHighlightUnitTests ${test_dirname} ${ARGN}) 24 | endfunction() 25 | 26 | add_subdirectory(Fuzzy) 27 | add_subdirectory(unittests) 28 | -------------------------------------------------------------------------------- /ClangHighlight.cpp: -------------------------------------------------------------------------------- 1 | //===-- clang-highlight/ClangHighlight.cpp - Clang highlight tool ---------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | /// 10 | /// \file ClangHighlight.cpp 11 | /// \brief This file implements a clang-highlight tool that automatically 12 | /// highlights (fragments of) C++ code. 13 | /// 14 | //===----------------------------------------------------------------------===// 15 | #include "llvm/Support/Signals.h" 16 | #include "llvm/Support/raw_ostream.h" 17 | #include "llvm/Support/FileSystem.h" 18 | #include "llvm/Support/CommandLine.h" 19 | #include "llvm/Support/MemoryBuffer.h" 20 | #include "clang/Basic/Version.h" 21 | #include "OutputWriter.h" 22 | #include "TokenClassifier.h" 23 | 24 | using namespace llvm; 25 | using namespace clang::highlight; 26 | 27 | // Mark all our options with this category, everything else (except for -version 28 | // and -help) will be hidden. 29 | static cl::OptionCategory ClangHighlightCategory("Clang-highlight options"); 30 | cl::OptionCategory &getClangHighlightCategory() { 31 | return ClangHighlightCategory; 32 | } 33 | 34 | static cl::opt IdentifiersOnly( 35 | "identifiers-only", 36 | cl::desc("Highlight identifiers only. E.g. don't highlight the '*' " 37 | "in \"type *i;\""), 38 | cl::cat(ClangHighlightCategory)); 39 | 40 | static cl::opt DumpAST("dump-ast", cl::desc("Print the fuzzy AST."), 41 | cl::cat(ClangHighlightCategory)); 42 | 43 | static cl::opt OutputFormatFlag( 44 | cl::desc("Output format for the highlighted code."), 45 | cl::values(clEnumValN(OutputFormat::StdoutColored, "stdout", 46 | "write colored stdout"), 47 | clEnumValN(OutputFormat::HTML, "html", "write html"), 48 | clEnumValN(OutputFormat::SemanticHTML, "shtml", 49 | "write semantic html"), 50 | clEnumValN(OutputFormat::LaTeX, "latex", "write latex"), 51 | clEnumValEnd), 52 | cl::cat(ClangHighlightCategory)); 53 | 54 | cl::opt OutputFilename("o", cl::desc("Write output to "), 55 | cl::value_desc("file"), 56 | cl::cat(ClangHighlightCategory)); 57 | 58 | static cl::opt FileName(cl::Positional, cl::desc(""), 59 | cl::Required, 60 | cl::cat(ClangHighlightCategory)); 61 | 62 | static void PrintVersion() { 63 | raw_ostream &OS = llvm::outs(); 64 | OS << clang::getClangToolFullVersion("clang-highlight") << '\n'; 65 | } 66 | 67 | static bool parserHighlight(StringRef File, OutputFormat Format, 68 | StringRef OutFile, bool IdentifiersOnly, 69 | bool DumpAST) { 70 | auto Source = llvm::MemoryBuffer::getFileOrSTDIN(File); 71 | if (std::error_code err = Source.getError()) { 72 | llvm::errs() << err.message() << '\n'; 73 | return true; 74 | } 75 | 76 | if (!OutFile.empty()) { 77 | std::error_code ErrMsg; 78 | raw_fd_ostream Out(std::string(OutFile).c_str(), ErrMsg, 79 | llvm::sys::fs::F_Text); 80 | if (ErrMsg) { 81 | llvm::errs() << ErrMsg.message() << '\n'; 82 | return true; 83 | } 84 | highlight(std::move(*Source), File, makeOutputWriter(Format, Out), 85 | IdentifiersOnly, DumpAST); 86 | } else { 87 | highlight(std::move(*Source), File, makeOutputWriter(Format, llvm::outs()), 88 | IdentifiersOnly, DumpAST); 89 | } 90 | return false; 91 | } 92 | 93 | int main(int argc, const char **argv) { 94 | llvm::sys::PrintStackTraceOnErrorSignal(); 95 | 96 | // Hide unrelated options. 97 | StringMap Options; 98 | cl::getRegisteredOptions(Options); 99 | for (auto &Option : Options) 100 | if (Option.second->Category != &ClangHighlightCategory && 101 | Option.first() != "help" && Option.first() != "version") 102 | Option.second->setHiddenFlag(cl::ReallyHidden); 103 | 104 | cl::SetVersionPrinter(PrintVersion); 105 | cl::ParseCommandLineOptions( 106 | argc, argv, "A tool to highlight C and C++ code.\n\n" 107 | "If no arguments are specified, it highlights the code from " 108 | "standard input\n" 109 | "and writes the result to the standard output.\n"); 110 | 111 | bool Error = false; 112 | 113 | Error |= parserHighlight(FileName, OutputFormatFlag, OutputFilename, 114 | IdentifiersOnly, DumpAST); 115 | 116 | return Error ? 1 : 0; 117 | } 118 | -------------------------------------------------------------------------------- /Fuzzy/AnnotatedToken.h: -------------------------------------------------------------------------------- 1 | //===--- AnnotatedToken.h - clang-highlight ---------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #ifndef LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_ANNOTATED_TOKEN_H 11 | #define LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_ANNOTATED_TOKEN_H 12 | 13 | #include "clang/Lex/Lexer.h" 14 | #include 15 | 16 | namespace clang { 17 | namespace fuzzy { 18 | 19 | class ASTElement; 20 | 21 | class AnnotatedToken { 22 | Token Tok_; 23 | ASTElement *Annot; 24 | 25 | public: 26 | AnnotatedToken(Token Tok) : Tok_(Tok), Annot(nullptr) {} 27 | 28 | StringRef getText(const SourceManager &SourceMgr) const { 29 | return StringRef(SourceMgr.getCharacterData(Tok().getLocation()), 30 | Tok().getLength()); 31 | } 32 | 33 | tok::TokenKind getTokenKind() const { return Tok().getKind(); } 34 | 35 | Token& Tok() { return Tok_; } 36 | const Token& Tok() const { return Tok_; } 37 | 38 | void setASTReference(ASTElement *ASTReference) { Annot = ASTReference; } 39 | const ASTElement *getASTReference() const { return Annot; } 40 | ASTElement *getASTReference() { return Annot; } 41 | bool hasASTReference() const { return Annot; } 42 | }; 43 | 44 | class AnnotatedTokenRef { 45 | AnnotatedToken *ATok; 46 | 47 | public: 48 | AnnotatedTokenRef(AnnotatedToken *ATok, ASTElement *AstRef) : ATok(ATok) { 49 | if (ATok) 50 | ATok->setASTReference(AstRef); 51 | } 52 | AnnotatedTokenRef(nullptr_t = nullptr) : ATok(nullptr) {} 53 | AnnotatedTokenRef(const AnnotatedTokenRef &) = default; 54 | AnnotatedTokenRef(AnnotatedTokenRef &&) = default; 55 | 56 | AnnotatedTokenRef(AnnotatedTokenRef const &o, ASTElement *AstRef) 57 | : ATok(o.ATok) { 58 | if (ATok) 59 | ATok->setASTReference(AstRef); 60 | } 61 | 62 | AnnotatedTokenRef &operator=(const AnnotatedTokenRef &) = default; 63 | AnnotatedTokenRef &operator=(AnnotatedTokenRef &&) = default; 64 | 65 | operator bool() const { return ATok; } 66 | AnnotatedToken *get() { return ATok; } 67 | AnnotatedToken *get() const { return ATok; } 68 | 69 | AnnotatedToken &getRef() { 70 | assert(*this); 71 | return *ATok; 72 | } 73 | const AnnotatedToken &getRef() const { 74 | assert(*this); 75 | return *ATok; 76 | } 77 | 78 | AnnotatedToken &operator*() { return getRef(); } 79 | const AnnotatedToken &operator*() const { return getRef(); } 80 | AnnotatedToken *operator->() { return &getRef(); } 81 | const AnnotatedToken *operator->() const { return &getRef(); } 82 | 83 | AnnotatedTokenRef &operator=(AnnotatedToken *ATok) { 84 | this->ATok = ATok; 85 | return *this; 86 | } 87 | }; 88 | 89 | } // end namespace fuzzy 90 | } // end namespace clang 91 | 92 | #endif // LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_ANNOTATED_TOKEN_H 93 | -------------------------------------------------------------------------------- /Fuzzy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(LLVM_LINK_COMPONENTS support) 2 | 3 | clang_tablegen(FuzzyNodes.inc -gen-clang-stmt-nodes 4 | SOURCE FuzzyNodes.td 5 | TARGET FuzzyNodes) 6 | 7 | add_clang_library(clangFuzzy 8 | FuzzyParser.cpp 9 | FuzzyASTPrinter.cpp 10 | 11 | ADDITIONAL_HEADERS 12 | FuzzyNodes.td 13 | ) 14 | -------------------------------------------------------------------------------- /Fuzzy/FuzzyAST.h: -------------------------------------------------------------------------------- 1 | //===--- FuzzyAST.h - clang-highlight ---------------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #ifndef LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_FUZZY_AST_H 11 | #define LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_FUZZY_AST_H 12 | 13 | #include "llvm/ADT/StringRef.h" 14 | #include "llvm/ADT/STLExtras.h" 15 | #include "clang/Basic/SourceManager.h" 16 | #include "AnnotatedToken.h" 17 | #include 18 | 19 | namespace llvm { 20 | class raw_ostream; 21 | } 22 | 23 | namespace clang { 24 | namespace fuzzy { 25 | 26 | /// ASTElement: Anything inside the AST that may be referenced by an 27 | /// AnnotatedToken must be an ASTElement. This class is not strictly needed 28 | /// from an AST point of view. 29 | class ASTElement { 30 | public: 31 | virtual ~ASTElement() = default; // Not accessible 32 | // TODO: TableGen 33 | enum ASTElementClass { 34 | NoASTElementClass = 0, 35 | UnparsableBlockClass, 36 | TypeClass, 37 | TemplateDeclClass, 38 | TypeDecorationClass, 39 | VarInitializationClass, 40 | VarDeclClass, 41 | ExprLineStmtClass, 42 | ReturnStmtClass, 43 | CompoundStmtClass, 44 | DeclStmtClass, 45 | firstExpr, 46 | DeclRefExprClass, 47 | ParenExprClass, 48 | LiteralConstantClass, 49 | UnaryOperatorClass, 50 | BinaryOperatorClass, 51 | CallExprClass, 52 | lastExpr, 53 | LabelStmtClass, 54 | WhileStmtClass, 55 | DoWhileStmtClass, 56 | ForStmtClass, 57 | IfStmtClass, 58 | ClassDeclClass, 59 | NamespaceDeclClass, 60 | FunctionDeclClass, 61 | TemplateParameterTypeClass, 62 | PPStringClass, 63 | firstPPDirective, 64 | PPIncludeClass, 65 | PPIfClass, 66 | UnparsablePPClass, 67 | lastPPDirective, 68 | }; 69 | 70 | ASTElementClass getASTClass() const { return sClass; } 71 | 72 | protected: 73 | ASTElement(ASTElementClass SC) : sClass(SC) {} 74 | 75 | private: 76 | ASTElementClass sClass; 77 | }; 78 | 79 | /// An expression in it's classical sense. If an expression is used as a 80 | /// statement, it has to be embedded into a ExprStmt (yet to be implemented). 81 | /// Rationale is that there is otherwise no way to store the semicolon. 82 | class Expr : public ASTElement { 83 | protected: 84 | Expr(ASTElementClass SC) : ASTElement(SC) {} 85 | 86 | public: 87 | virtual ~Expr() = 0; 88 | static bool classof(const ASTElement *T) { 89 | return firstExpr <= T->getASTClass() && T->getASTClass() <= lastExpr; 90 | } 91 | }; 92 | inline Expr::~Expr() {} 93 | 94 | class Type; 95 | 96 | class TypeOrExpression { 97 | std::unique_ptr Ptr; 98 | 99 | public: 100 | TypeOrExpression(std::unique_ptr T); 101 | TypeOrExpression(std::unique_ptr E) : Ptr(std::move(E)) {} 102 | TypeOrExpression(const TypeOrExpression &) = delete; 103 | TypeOrExpression &operator=(const TypeOrExpression &) = delete; 104 | TypeOrExpression(TypeOrExpression &&O) = default; 105 | TypeOrExpression &operator=(TypeOrExpression &&O) = default; 106 | 107 | bool isType() const { 108 | assert(Ptr); 109 | return isa(Ptr.get()); 110 | } 111 | Type &asType() { return *cast(Ptr.get()); } 112 | Expr &asExpr() { return *cast(Ptr.get()); } 113 | }; 114 | 115 | struct QualifiedID { 116 | struct TemplateArguments { 117 | llvm::SmallVector Args; 118 | llvm::SmallVector Separators; 119 | }; 120 | 121 | llvm::SmallVector NameSegments; 122 | llvm::Optional > TemplateArgs; 123 | 124 | void reown(ASTElement *Ref) { 125 | for (auto &N : NameSegments) 126 | N->setASTReference(Ref); 127 | if (TemplateArgs) { 128 | for (auto &ATok : (*TemplateArgs)->Separators) 129 | ATok->setASTReference(Ref); 130 | } 131 | } 132 | 133 | void addNameQualifier(AnnotatedToken *NameTok, ASTElement *Ref) { 134 | NameSegments.push_back(AnnotatedTokenRef(NameTok, Ref)); 135 | } 136 | 137 | void makeTemplateArgs() { 138 | TemplateArgs = std::make_shared(); 139 | } 140 | void addTemplateSeparator(AnnotatedToken *ATok, ASTElement *Ref) { 141 | (*TemplateArgs)->Separators.push_back(AnnotatedTokenRef(ATok, Ref)); 142 | } 143 | void addTemplateArgument(std::unique_ptr T) { 144 | (*TemplateArgs)->Args.push_back(TypeOrExpression(std::move(T))); 145 | } 146 | void addTemplateArgument(std::unique_ptr E) { 147 | (*TemplateArgs)->Args.push_back(TypeOrExpression(std::move(E))); 148 | } 149 | }; 150 | 151 | // Parentheses over an expression 152 | class ParenExpr : public Expr { 153 | enum { 154 | LEFT, 155 | RIGHT, 156 | END_EXPR 157 | }; 158 | AnnotatedTokenRef Parens[END_EXPR]; 159 | 160 | public: 161 | std::unique_ptr Value; 162 | 163 | ParenExpr(AnnotatedToken *Left, std::unique_ptr Value, 164 | AnnotatedToken *Right) 165 | : Expr(ParenExprClass), Value(std::move(Value)) { 166 | setLeftParen(Left); 167 | setRightParen(Right); 168 | } 169 | 170 | void setParen(int Index, AnnotatedToken *AT) { 171 | Parens[Index] = AnnotatedTokenRef(AT, this); 172 | } 173 | void setLeftParen(AnnotatedToken *AT) { setParen(LEFT, AT); } 174 | void setRightParen(AnnotatedToken *AT) { setParen(RIGHT, AT); } 175 | 176 | static bool classof(const ASTElement *T) { 177 | return T->getASTClass() == ParenExprClass; 178 | } 179 | }; 180 | 181 | // A variable name or function name inside an expression. 182 | class DeclRefExpr : public Expr { 183 | public: 184 | QualifiedID Qualifier; 185 | 186 | DeclRefExpr() : Expr(DeclRefExprClass) {} 187 | 188 | void addNameQualifier(AnnotatedToken *NameTok) { 189 | Qualifier.addNameQualifier(NameTok, this); 190 | } 191 | void makeTemplateArgs() { Qualifier.makeTemplateArgs(); } 192 | void addTemplateSeparator(AnnotatedToken *ATok) { 193 | Qualifier.addTemplateSeparator(ATok, this); 194 | } 195 | void addTemplateArgument(std::unique_ptr T) { 196 | Qualifier.addTemplateArgument(std::move(T)); 197 | } 198 | void addTemplateArgument(std::unique_ptr E) { 199 | Qualifier.addTemplateArgument(std::move(E)); 200 | } 201 | 202 | static bool classof(const ASTElement *T) { 203 | return T->getASTClass() == DeclRefExprClass; 204 | } 205 | }; 206 | 207 | /// Int, char or string literals 208 | class LiteralConstant : public Expr { 209 | public: 210 | AnnotatedTokenRef Tok; 211 | LiteralConstant(AnnotatedToken *Tok) 212 | : Expr(LiteralConstantClass), Tok(Tok, this) { 213 | Tok->setASTReference(this); 214 | } 215 | 216 | static bool classof(const ASTElement *T) { 217 | return T->getASTClass() == LiteralConstantClass; 218 | } 219 | }; 220 | 221 | /// Any unary operator, even the overloaded ones. 222 | class UnaryOperator : public Expr { 223 | public: 224 | AnnotatedTokenRef OperatorTok; 225 | std::unique_ptr Value; 226 | 227 | UnaryOperator(AnnotatedToken *OperatorTok, std::unique_ptr Value) 228 | : Expr(UnaryOperatorClass), OperatorTok(OperatorTok, this), 229 | Value(std::move(Value)) { 230 | OperatorTok->setASTReference(this); 231 | } 232 | 233 | static bool classof(const ASTElement *T) { 234 | return T->getASTClass() == UnaryOperatorClass; 235 | } 236 | }; 237 | 238 | /// Used to store any kind of binary operators, even the overloaded ones. 239 | class BinaryOperator : public Expr { 240 | enum { 241 | LHS, 242 | RHS, 243 | END_EXPR 244 | }; 245 | std::unique_ptr SubExprs[END_EXPR]; 246 | 247 | public: 248 | AnnotatedTokenRef OperatorTok; 249 | 250 | BinaryOperator(std::unique_ptr lhs, std::unique_ptr rhs, 251 | AnnotatedToken *OperatorTok) 252 | : Expr(BinaryOperatorClass), OperatorTok(OperatorTok, this) { 253 | SubExprs[LHS] = std::move(lhs); 254 | SubExprs[RHS] = std::move(rhs); 255 | } 256 | 257 | static bool classof(const ASTElement *T) { 258 | return T->getASTClass() == BinaryOperatorClass; 259 | } 260 | 261 | Expr *getLHS() { return cast(SubExprs[LHS].get()); } 262 | const Expr *getLHS() const { return cast(SubExprs[LHS].get()); } 263 | Expr *getRHS() { return cast(SubExprs[RHS].get()); } 264 | const Expr *getRHS() const { return cast(SubExprs[RHS].get()); } 265 | }; 266 | 267 | /// Function calls 268 | class CallExpr : public Expr { 269 | public: 270 | QualifiedID Qualifier; 271 | enum { 272 | LEFT, 273 | RIGHT, 274 | END_EXPR 275 | }; 276 | AnnotatedTokenRef Parens[END_EXPR]; 277 | llvm::SmallVector, 4> Args; 278 | llvm::SmallVector Commas; 279 | 280 | CallExpr(std::unique_ptr FunctionName) 281 | : Expr(CallExprClass), Qualifier(FunctionName->Qualifier) { 282 | Qualifier.reown(this); 283 | } 284 | 285 | void setParen(int Index, AnnotatedToken *AT) { 286 | Parens[Index] = AnnotatedTokenRef(AT, this); 287 | } 288 | void setLeftParen(AnnotatedToken *AT) { setParen(LEFT, AT); } 289 | void setRightParen(AnnotatedToken *AT) { setParen(RIGHT, AT); } 290 | 291 | void appendComma(AnnotatedToken *AT) { 292 | Commas.push_back(AnnotatedTokenRef(AT, this)); 293 | } 294 | 295 | static bool classof(const ASTElement *T) { 296 | return T->getASTClass() == CallExprClass; 297 | } 298 | }; 299 | 300 | /// In contrast to the clang AST, a Stmt is a real statement, that is either a 301 | /// CompoundStmt or a LineStmt. 302 | class Stmt : public ASTElement { 303 | public: 304 | virtual ~Stmt() = 0; // Not optimized 305 | 306 | Stmt(ASTElementClass SC) : ASTElement(SC) {} 307 | }; 308 | inline Stmt::~Stmt() {} 309 | 310 | struct UnparsableBlock : Stmt { 311 | UnparsableBlock() : Stmt(UnparsableBlockClass) {} 312 | void push_back(AnnotatedToken *Tok) { 313 | Body.push_back(AnnotatedTokenRef(Tok, this)); 314 | } 315 | llvm::SmallVector Body; 316 | 317 | static bool classof(const ASTElement *T) { 318 | return T->getASTClass() == UnparsableBlockClass; 319 | } 320 | }; 321 | 322 | class Expr; 323 | 324 | /// By a semicolon terminated statement 325 | class LineStmt : public Stmt { 326 | AnnotatedTokenRef Semi; 327 | 328 | protected: 329 | LineStmt(ASTElementClass SC, AnnotatedToken *Semi) 330 | : Stmt(SC), Semi(Semi, this) {} 331 | LineStmt(ASTElementClass SC, nullptr_t) : Stmt(SC), Semi(nullptr) {} 332 | 333 | public: 334 | void setSemi(AnnotatedToken *Tok) { Semi = AnnotatedTokenRef(Tok, this); } 335 | }; 336 | 337 | /// An expression terminated by a semicolon 338 | struct ExprLineStmt : LineStmt { 339 | ExprLineStmt(std::unique_ptr Body, AnnotatedToken *Semi) 340 | : LineStmt(ExprLineStmtClass, Semi), Body(std::move(Body)) {} 341 | 342 | std::unique_ptr Body; 343 | 344 | static bool classof(const ASTElement *T) { 345 | return T->getASTClass() == ExprLineStmtClass; 346 | } 347 | }; 348 | 349 | struct ReturnStmt : LineStmt { 350 | ReturnStmt(AnnotatedToken *Return, std::unique_ptr Body, 351 | AnnotatedToken *Semi) 352 | : LineStmt(ReturnStmtClass, Semi), Body(std::move(Body)), 353 | Return(Return, this) {} 354 | 355 | std::unique_ptr Body; 356 | AnnotatedTokenRef Return; 357 | 358 | static bool classof(const ASTElement *T) { 359 | return T->getASTClass() == ReturnStmtClass; 360 | } 361 | }; 362 | 363 | struct LabelStmt : Stmt { 364 | AnnotatedTokenRef LabelName, Colon; 365 | 366 | LabelStmt(AnnotatedToken *LabelName, AnnotatedToken *Colon) 367 | : Stmt(LabelStmtClass), LabelName(LabelName, this), Colon(Colon, this) {} 368 | 369 | static bool classof(const ASTElement *T) { 370 | return T->getASTClass() == LabelStmtClass; 371 | } 372 | }; 373 | 374 | /// A Type with it's decorations. 375 | struct Type : ASTElement { 376 | Type() : ASTElement(TypeClass) {} 377 | 378 | struct Decoration : ASTElement { 379 | enum DecorationClass { 380 | Pointer, 381 | Reference, 382 | }; 383 | Decoration(DecorationClass Class, AnnotatedToken *Tok) 384 | : ASTElement(TypeDecorationClass), Class(Class), Tok(Tok) {} 385 | DecorationClass Class; 386 | AnnotatedToken *Tok; 387 | 388 | void fix() { Tok->setASTReference(this); } 389 | 390 | static bool classof(const ASTElement *T) { 391 | return T->getASTClass() == TypeDecorationClass; 392 | } 393 | }; 394 | 395 | llvm::SmallVector Decorations; 396 | QualifiedID Qualifier; 397 | 398 | void addDecoration(Decoration Dec) { 399 | auto *OldLoc = Decorations.empty() ? nullptr : &Decorations.front(); 400 | Decorations.push_back(Dec); 401 | if (OldLoc != &Decorations.front()) 402 | for (auto &D : Decorations) 403 | D.fix(); 404 | } 405 | 406 | void addNameQualifier(AnnotatedToken *NameTok) { 407 | Qualifier.addNameQualifier(NameTok, this); 408 | } 409 | void makeTemplateArgs() { Qualifier.makeTemplateArgs(); } 410 | void addTemplateSeparator(AnnotatedToken *ATok) { 411 | Qualifier.addTemplateSeparator(ATok, this); 412 | } 413 | void addTemplateArgument(std::unique_ptr T) { 414 | Qualifier.addTemplateArgument(std::move(T)); 415 | } 416 | void addTemplateArgument(std::unique_ptr E) { 417 | Qualifier.addTemplateArgument(std::move(E)); 418 | } 419 | static bool classof(const ASTElement *T) { 420 | return T->getASTClass() == TypeClass; 421 | } 422 | 423 | std::unique_ptr cloneWithoutDecorations() { 424 | auto Clone = llvm::make_unique(); 425 | Clone->Qualifier = Qualifier; 426 | Clone->Qualifier.reown(Clone.get()); 427 | return Clone; 428 | } 429 | }; 430 | 431 | inline TypeOrExpression::TypeOrExpression(std::unique_ptr T) 432 | : Ptr(std::move(T)) {} 433 | 434 | /// Initialization of a variable 435 | struct VarInitialization : ASTElement { 436 | enum InitializationType { 437 | NONE = 0, 438 | ASSIGNMENT, 439 | CONSTRUCTOR, 440 | BRACE, 441 | }; 442 | VarInitialization() : ASTElement(VarInitializationClass), InitType(NONE) {} 443 | 444 | void setAssignmentOps(InitializationType InitType, 445 | AnnotatedToken AssignmentOps[2]) { 446 | this->InitType = ASSIGNMENT; 447 | if (InitType == ASSIGNMENT) { 448 | this->AssignmentOps[0] = AnnotatedTokenRef(&AssignmentOps[0], this); 449 | this->AssignmentOps[1] = AnnotatedTokenRef(nullptr); 450 | } else { 451 | this->AssignmentOps[0] = AnnotatedTokenRef(&AssignmentOps[0], this); 452 | this->AssignmentOps[1] = AnnotatedTokenRef(&AssignmentOps[1], this); 453 | } 454 | } 455 | 456 | InitializationType InitType; 457 | AnnotatedTokenRef AssignmentOps[2]; // '=' or '('+')' or '{'+'}' 458 | std::unique_ptr Value; 459 | 460 | static bool classof(const ASTElement *T) { 461 | return T->getASTClass() == VarInitializationClass; 462 | } 463 | }; 464 | 465 | /// Declaration of a variable with optional initialization 466 | struct VarDecl : ASTElement { 467 | VarDecl() : ASTElement(VarDeclClass) {} 468 | 469 | void setName(AnnotatedToken *Tok) { 470 | this->NameTok = AnnotatedTokenRef(Tok, this); 471 | } 472 | 473 | std::unique_ptr VariableType; 474 | AnnotatedTokenRef NameTok; 475 | llvm::Optional Value; 476 | 477 | static bool classof(const ASTElement *T) { 478 | return T->getASTClass() == VarDeclClass; 479 | } 480 | }; 481 | 482 | /// Only for variable declarations (for now) 483 | struct DeclStmt : LineStmt { 484 | llvm::SmallVector, 2> Decls; 485 | llvm::SmallVector Commas; 486 | 487 | DeclStmt() : LineStmt(DeclStmtClass, nullptr) {} 488 | 489 | void appendComma(AnnotatedToken *Tok) { 490 | Commas.push_back(AnnotatedTokenRef(Tok, this)); 491 | } 492 | 493 | static bool classof(const ASTElement *T) { 494 | return T->getASTClass() == DeclStmtClass; 495 | } 496 | }; 497 | 498 | class CompoundStmt; 499 | 500 | struct TemplateParameterType : ASTElement { 501 | TemplateParameterType() : ASTElement(TemplateParameterTypeClass) {} 502 | enum { 503 | KEYWORD, 504 | NAME, 505 | EQUAL, 506 | END_EXPR 507 | }; 508 | AnnotatedTokenRef Refs[END_EXPR]; 509 | std::unique_ptr DefaultType; 510 | 511 | void setRef(int Index, AnnotatedToken *Tok) { 512 | Refs[Index] = AnnotatedTokenRef(Tok, this); 513 | } 514 | void setKeyword(AnnotatedToken *Tok) { setRef(KEYWORD, Tok); } 515 | void setName(AnnotatedToken *Tok) { setRef(NAME, Tok); } 516 | void setEqual(AnnotatedToken *Tok) { setRef(EQUAL, Tok); } 517 | 518 | static bool classof(const ASTElement *T) { 519 | return T->getASTClass() == TemplateParameterTypeClass; 520 | } 521 | }; 522 | 523 | struct TemplateDecl : Stmt { 524 | TemplateDecl() : Stmt(TemplateDeclClass) {} 525 | 526 | std::unique_ptr Templated; 527 | enum { 528 | KEYWORD, 529 | LEFT, 530 | RIGHT, 531 | END_EXPR 532 | }; 533 | AnnotatedTokenRef Refs[END_EXPR]; 534 | 535 | llvm::SmallVector, 2> Params; 536 | llvm::SmallVector Commas; 537 | 538 | void addParam(std::unique_ptr P) { 539 | Params.push_back(std::move(P)); 540 | } 541 | void addComma(AnnotatedToken *Tok) { 542 | Commas.push_back(AnnotatedTokenRef(Tok, this)); 543 | } 544 | 545 | void setRef(int Index, AnnotatedToken *Tok) { 546 | Refs[Index] = AnnotatedTokenRef(Tok, this); 547 | } 548 | void setKeyword(AnnotatedToken *Tok) { setRef(KEYWORD, Tok); } 549 | void setLess(AnnotatedToken *Tok) { setRef(LEFT, Tok); } 550 | void setGreater(AnnotatedToken *Tok) { setRef(RIGHT, Tok); } 551 | 552 | static bool classof(const ASTElement *T) { 553 | return T->getASTClass() == TemplateDeclClass; 554 | } 555 | }; 556 | 557 | struct FunctionDecl : Stmt { 558 | FunctionDecl() : Stmt(FunctionDeclClass) {} 559 | enum { 560 | LEFT, 561 | RIGHT, 562 | SEMI, 563 | END_EXPR 564 | }; 565 | AnnotatedTokenRef Refs[END_EXPR]; 566 | llvm::SmallVector Decls; 567 | llvm::SmallVector, 4> Params; 568 | llvm::SmallVector Commas; 569 | 570 | void appendComma(AnnotatedToken *AT) { 571 | Commas.push_back(AnnotatedTokenRef(AT, this)); 572 | } 573 | 574 | std::unique_ptr ReturnType; 575 | 576 | void setRef(int Index, AnnotatedToken *Tok) { 577 | Refs[Index] = AnnotatedTokenRef(Tok, this); 578 | } 579 | void setLeftBrace(AnnotatedToken *Tok) { setRef(LEFT, Tok); } 580 | void setRightBrace(AnnotatedToken *Tok) { setRef(RIGHT, Tok); } 581 | void setSemi(AnnotatedToken *Tok) { setRef(SEMI, Tok); } 582 | void addDeclSpecifier(AnnotatedToken *Tok) { 583 | Decls.push_back(AnnotatedTokenRef(Tok, this)); 584 | } 585 | 586 | QualifiedID Name; 587 | void addNameQualifier(AnnotatedToken *NameTok) { 588 | Name.addNameQualifier(NameTok, this); 589 | } 590 | void makeTemplateArgs(AnnotatedToken *Tok) { 591 | llvm_unreachable("don't add template arguments to function names"); 592 | } 593 | void addTemplateSeparator(AnnotatedToken *Tok) { 594 | llvm_unreachable("don't add template arguments to function names"); 595 | } 596 | 597 | std::unique_ptr Body; 598 | 599 | static bool classof(const ASTElement *T) { 600 | return T->getASTClass() == FunctionDeclClass; 601 | } 602 | }; 603 | 604 | template class IndirectRange { 605 | public: 606 | IndirectRange(Iter First, Iter Last) : First(First), Last(Last) {} 607 | struct IndirectIter { 608 | IndirectIter(Iter Pos) : Pos(Pos) {} 609 | Iter Pos; 610 | friend bool operator==(IndirectIter LHS, IndirectIter RHS) { 611 | return LHS.Pos == RHS.Pos; 612 | } 613 | friend bool operator!=(IndirectIter LHS, IndirectIter RHS) { 614 | return LHS.Pos != RHS.Pos; 615 | } 616 | IndirectIter operator++() { 617 | ++Pos; 618 | return *this; 619 | } 620 | IndirectIter operator++(int) { 621 | auto Self = *this; 622 | ++*this; 623 | return Self; 624 | } 625 | Value &operator*() { return **Pos; } 626 | }; 627 | 628 | IndirectIter begin() { return First; } 629 | IndirectIter end() { return Last; } 630 | 631 | std::size_t size() const { 632 | static_assert( 633 | std::is_base_of< 634 | std::random_access_iterator_tag, 635 | typename std::iterator_traits::iterator_category>::value, 636 | "Size only allowed for Random Access Iterators."); 637 | return std::distance(First.Pos, Last.Pos); 638 | } 639 | 640 | private: 641 | IndirectIter First, Last; 642 | }; 643 | 644 | struct Scope { 645 | using child_range = IndirectRange< 646 | llvm::SmallVector, 8>::iterator, Stmt>; 647 | using const_child_range = IndirectRange< 648 | llvm::SmallVector, 8>::const_iterator, Stmt>; 649 | 650 | llvm::SmallVector, 8> Body; 651 | 652 | child_range children() { return child_range(Body.begin(), Body.end()); } 653 | const_child_range children() const { 654 | return const_child_range(Body.begin(), Body.end()); 655 | } 656 | 657 | void addStmt(std::unique_ptr Statement) { 658 | Body.push_back(std::move(Statement)); 659 | } 660 | }; 661 | 662 | template struct BlockScope : Scope { 663 | enum { 664 | LBR, 665 | RBR, 666 | END_EXPR 667 | }; 668 | AnnotatedTokenRef Braces[END_EXPR]; 669 | void setBrace(int BraceIdx, AnnotatedToken *Tok) { 670 | assert(0 <= BraceIdx && BraceIdx < END_EXPR); 671 | Braces[BraceIdx] = AnnotatedTokenRef(Tok, static_cast(this)); 672 | } 673 | void setLeftBrace(AnnotatedToken *Tok) { setBrace(LBR, Tok); } 674 | void setRightBrace(AnnotatedToken *Tok) { setBrace(RBR, Tok); } 675 | 676 | bool hasScope() const { return Braces[LBR]; } 677 | }; 678 | 679 | /// A {}-Block with Statements inside. 680 | class CompoundStmt : public Stmt, public BlockScope { 681 | public: 682 | CompoundStmt(AnnotatedToken *lbr, AnnotatedToken *rbr) 683 | : Stmt(CompoundStmtClass) { 684 | setLeftBrace(lbr); 685 | setRightBrace(rbr); 686 | } 687 | 688 | CompoundStmt() : Stmt(CompoundStmtClass) {} 689 | 690 | static bool classof(const ASTElement *T) { 691 | return T->getASTClass() == CompoundStmtClass; 692 | } 693 | }; 694 | 695 | using CondExpr = std::unique_ptr; 696 | 697 | struct WhileStmt : Stmt { 698 | WhileStmt() : Stmt(WhileStmtClass) {} 699 | 700 | CondExpr Cond; 701 | std::unique_ptr Body; 702 | 703 | enum { 704 | KEYWORD, 705 | LEFT, 706 | RIGHT, 707 | END_EXPR, 708 | }; 709 | AnnotatedTokenRef Refs[END_EXPR]; 710 | 711 | void setRef(int Index, AnnotatedToken *Tok) { 712 | Refs[Index] = AnnotatedTokenRef(Tok, this); 713 | } 714 | void setKeyword(AnnotatedToken *Tok) { setRef(KEYWORD, Tok); } 715 | void setLeftParen(AnnotatedToken *Tok) { setRef(LEFT, Tok); } 716 | void setRightParen(AnnotatedToken *Tok) { setRef(RIGHT, Tok); } 717 | 718 | static bool classof(const ASTElement *T) { 719 | return T->getASTClass() == WhileStmtClass; 720 | } 721 | }; 722 | 723 | struct DoWhileStmt : LineStmt { 724 | DoWhileStmt() : LineStmt(DoWhileStmtClass, nullptr) {} 725 | 726 | CondExpr Cond; 727 | std::unique_ptr Body; 728 | 729 | enum { 730 | KEYWORD_DO, 731 | KEYWORD_WHILE, 732 | LEFT, 733 | RIGHT, 734 | END_EXPR, 735 | }; 736 | AnnotatedTokenRef Refs[END_EXPR]; 737 | 738 | void setRef(int Index, AnnotatedToken *Tok) { 739 | Refs[Index] = AnnotatedTokenRef(Tok, this); 740 | } 741 | void setDo(AnnotatedToken *Tok) { setRef(KEYWORD_DO, Tok); } 742 | void setWhile(AnnotatedToken *Tok) { setRef(KEYWORD_WHILE, Tok); } 743 | void setLeftParen(AnnotatedToken *Tok) { setRef(LEFT, Tok); } 744 | void setRightParen(AnnotatedToken *Tok) { setRef(RIGHT, Tok); } 745 | 746 | static bool classof(const ASTElement *T) { 747 | return T->getASTClass() == WhileStmtClass; 748 | } 749 | }; 750 | 751 | struct IfStmt : Stmt { 752 | IfStmt() : Stmt(IfStmtClass) {} 753 | 754 | struct IfBranch { 755 | CondExpr Cond; 756 | std::unique_ptr Body; 757 | 758 | enum { 759 | KEYWORD1, 760 | KEYWORD2, 761 | LEFT, 762 | RIGHT, 763 | END_EXPR, 764 | }; 765 | AnnotatedTokenRef Refs[END_EXPR]; 766 | 767 | IfBranch(ASTElement *ASTRef, AnnotatedToken *Keyword1, 768 | AnnotatedToken *Keyword2, AnnotatedToken *LeftParen, CondExpr Cond, 769 | AnnotatedToken *RightParen, std::unique_ptr Body) 770 | : Cond(std::move(Cond)), Body(std::move(Body)) { 771 | setRef(KEYWORD1, Keyword1, ASTRef); 772 | setRef(KEYWORD2, Keyword2, ASTRef); 773 | setRef(LEFT, LeftParen, ASTRef); 774 | setRef(RIGHT, RightParen, ASTRef); 775 | } 776 | void setRef(int Index, AnnotatedToken *Tok, ASTElement *ASTRef) { 777 | Refs[Index] = AnnotatedTokenRef(Tok, ASTRef); 778 | } 779 | }; 780 | 781 | llvm::SmallVector Branches; 782 | 783 | void addBranch(AnnotatedToken *Keyword1, AnnotatedToken *Keyword2, 784 | AnnotatedToken *LeftParen, CondExpr Cond, 785 | AnnotatedToken *RightParen, std::unique_ptr Body) { 786 | Branches.push_back(IfBranch(this, Keyword1, Keyword2, LeftParen, 787 | std::move(Cond), RightParen, std::move(Body))); 788 | } 789 | 790 | static bool classof(const ASTElement *T) { 791 | return T->getASTClass() == IfStmtClass; 792 | } 793 | }; 794 | 795 | struct ForStmt : Stmt { 796 | 797 | ForStmt() : Stmt(ForStmtClass) {} 798 | 799 | CondExpr Init, Cond; 800 | std::unique_ptr Inc; 801 | std::unique_ptr Body; 802 | 803 | enum { 804 | KEYWORD, 805 | LEFT, 806 | RIGHT, 807 | SEMI1, 808 | SEMI2, 809 | END_EXPR, 810 | }; 811 | AnnotatedTokenRef Refs[END_EXPR]; 812 | 813 | void setRef(int Index, AnnotatedToken *Tok) { 814 | Refs[Index] = AnnotatedTokenRef(Tok, this); 815 | } 816 | void setKeyword(AnnotatedToken *Tok) { setRef(KEYWORD, Tok); } 817 | void setLeftParen(AnnotatedToken *Tok) { setRef(LEFT, Tok); } 818 | void setRightParen(AnnotatedToken *Tok) { setRef(RIGHT, Tok); } 819 | 820 | void setSemi1(AnnotatedToken *Tok) { setRef(SEMI1, Tok); } 821 | void setSemi2(AnnotatedToken *Tok) { setRef(SEMI2, Tok); } 822 | 823 | static bool classof(const ASTElement *T) { 824 | return T->getASTClass() == ForStmtClass; 825 | } 826 | }; 827 | 828 | struct ClassDecl : LineStmt, BlockScope { 829 | enum { 830 | CLASS, 831 | COLON, 832 | END_EXPR 833 | }; 834 | AnnotatedTokenRef Refs[END_EXPR]; 835 | 836 | std::unique_ptr Name; 837 | 838 | struct BaseClass { 839 | AnnotatedTokenRef Accessibility, Comma; 840 | std::unique_ptr T; 841 | }; 842 | 843 | llvm::SmallVector BaseClasses; 844 | 845 | ClassDecl() : LineStmt(ClassDeclClass, nullptr) {} 846 | 847 | void setRef(int Index, AnnotatedToken *Tok) { 848 | Refs[Index] = AnnotatedTokenRef(Tok, this); 849 | } 850 | void setClass(AnnotatedToken *Tok) { setRef(CLASS, Tok); } 851 | void setColon(AnnotatedToken *Tok) { setRef(COLON, Tok); } 852 | 853 | void addBaseClass(AnnotatedToken *Accessibility, std::unique_ptr T, 854 | AnnotatedToken *Comma) { 855 | BaseClasses.push_back({ AnnotatedTokenRef(Accessibility, this), 856 | AnnotatedTokenRef(Comma, this), 857 | std::move(T), }); 858 | } 859 | 860 | static bool classof(const ASTElement *T) { 861 | return T->getASTClass() == ClassDeclClass; 862 | } 863 | }; 864 | 865 | struct NamespaceDecl : Stmt, BlockScope { 866 | enum { 867 | NAMESPACE, 868 | NAME, 869 | END_EXPR 870 | }; 871 | AnnotatedTokenRef Refs[END_EXPR]; 872 | 873 | NamespaceDecl() : Stmt(NamespaceDeclClass) {} 874 | 875 | void setRef(int Index, AnnotatedToken *Tok) { 876 | Refs[Index] = AnnotatedTokenRef(Tok, this); 877 | } 878 | void setNamespace(AnnotatedToken *Tok) { setRef(NAMESPACE, Tok); } 879 | void setName(AnnotatedToken *Tok) { setRef(NAME, Tok); } 880 | 881 | static bool classof(const ASTElement *T) { 882 | return T->getASTClass() == NamespaceDeclClass; 883 | } 884 | }; 885 | 886 | struct PPDirective : ASTElement { 887 | protected: 888 | PPDirective(ASTElementClass SC) : ASTElement(SC) {} 889 | 890 | public: 891 | static bool classof(const ASTElement *T) { 892 | auto Class = T->getASTClass(); 893 | return firstPPDirective <= Class && Class <= lastPPDirective; 894 | } 895 | }; 896 | 897 | struct PPString : ASTElement { 898 | PPString() : ASTElement(PPStringClass) {} 899 | 900 | llvm::SmallVector Refs; 901 | 902 | void addToken(AnnotatedToken *Tok) { 903 | Refs.push_back(AnnotatedTokenRef(Tok, this)); 904 | } 905 | 906 | static bool classof(const ASTElement *T) { 907 | return T->getASTClass() == PPStringClass; 908 | } 909 | }; 910 | 911 | struct PPInclude : PPDirective { 912 | PPInclude() : PPDirective(PPIncludeClass) {} 913 | 914 | enum { 915 | HASH, 916 | INCLUDE, 917 | EOD, 918 | END_EXPR 919 | }; 920 | AnnotatedTokenRef Refs[END_EXPR]; 921 | std::unique_ptr Path; 922 | 923 | void setRef(int Index, AnnotatedToken *Tok) { 924 | Refs[Index] = AnnotatedTokenRef(Tok, this); 925 | } 926 | void setHash(AnnotatedToken *Tok) { setRef(HASH, Tok); } 927 | void setInclude(AnnotatedToken *Tok) { setRef(INCLUDE, Tok); } 928 | void setEOD(AnnotatedToken *Tok) { setRef(EOD, Tok); } 929 | 930 | static bool classof(const ASTElement *T) { 931 | return T->getASTClass() == PPIncludeClass; 932 | } 933 | }; 934 | 935 | struct PPIf : PPDirective { 936 | PPIf() : PPDirective(PPIfClass) {} 937 | 938 | enum { 939 | HASH, 940 | KEYWORD, 941 | EOD, 942 | END_EXPR 943 | }; 944 | AnnotatedTokenRef Refs[END_EXPR]; 945 | 946 | std::unique_ptr Cond; 947 | 948 | void setRef(int Index, AnnotatedToken *Tok) { 949 | Refs[Index] = AnnotatedTokenRef(Tok, this); 950 | } 951 | void setHash(AnnotatedToken *Tok) { setRef(HASH, Tok); } 952 | void setKeyword(AnnotatedToken *Tok) { setRef(KEYWORD, Tok); } 953 | void setEOD(AnnotatedToken *Tok) { setRef(EOD, Tok); } 954 | 955 | static bool classof(const ASTElement *T) { 956 | return T->getASTClass() == PPIfClass; 957 | } 958 | }; 959 | 960 | struct UnparsablePP : PPDirective { 961 | UnparsablePP() : PPDirective(UnparsablePPClass) {} 962 | 963 | llvm::SmallVector Refs; 964 | void push_back(AnnotatedToken *Tok) { 965 | Refs.push_back(AnnotatedTokenRef(Tok, this)); 966 | } 967 | 968 | static bool classof(const ASTElement *T) { 969 | return T->getASTClass() == UnparsablePPClass; 970 | } 971 | }; 972 | 973 | struct TranslationUnit : Scope { 974 | llvm::SmallVector, 8> PPDirectives; 975 | 976 | void addPPDirective(std::unique_ptr PP) { 977 | PPDirectives.push_back(std::move(PP)); 978 | } 979 | }; 980 | 981 | TranslationUnit fuzzyparse(AnnotatedToken *first, AnnotatedToken *last); 982 | 983 | void printAST(llvm::raw_ostream &OS, const Stmt &Root, 984 | const SourceManager &SourceMgr); 985 | 986 | void printAST(llvm::raw_ostream &OS, const TranslationUnit &TU, 987 | const SourceManager &SourceMgr); 988 | 989 | } // end namespace fuzzy 990 | } // end namespace clang 991 | 992 | #endif // LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_FUZZY_AST_H 993 | -------------------------------------------------------------------------------- /Fuzzy/FuzzyASTPrinter.cpp: -------------------------------------------------------------------------------- 1 | //===--- FuzzyParser.cpp - clang-highlight ----------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | #include "llvm/Support/raw_os_ostream.h" 10 | #include "llvm/Support/Debug.h" 11 | #include "llvm/ADT/STLExtras.h" 12 | #include "FuzzyAST.h" 13 | 14 | using namespace llvm; 15 | 16 | namespace clang { 17 | namespace fuzzy { 18 | 19 | namespace { 20 | struct Indented { 21 | const int Indent; 22 | explicit Indented(int Indent) : Indent(Indent) {} 23 | friend raw_ostream &operator<<(raw_ostream &OS, Indented ID) { 24 | const int Total = 4 * ID.Indent; 25 | for (int i = 0; i < Total; ++i) 26 | OS.write(' '); 27 | return OS; 28 | } 29 | Indented next() { return Indented(Indent + 1); } 30 | }; 31 | } // end anonymous namespace 32 | 33 | namespace { 34 | struct ASTPrinter { 35 | const SourceManager &SourceMgr; 36 | raw_ostream &OS; 37 | 38 | void print(Indented Indent, const Type &T); 39 | void print(Indented Indent, const VarDecl &DCL); 40 | void print(Indented Indent, const Expr &EXP); 41 | void print(Indented Indent, const Stmt &stmt); 42 | void print(Indented Indent, const QualifiedID &Qual); 43 | void print(Indented Indent, const PPDirective &Qual); 44 | void printScope(Indented Indent, const Scope &Sc); 45 | void printCondition(Indented Indent, const char *Name, ASTElement *E); 46 | }; 47 | } // end anonymous namespace 48 | 49 | void ASTPrinter::printScope(Indented Indent, const Scope &Sc) { 50 | OS << "{\n"; 51 | for (auto &S : Sc.children()) 52 | print(Indent.next(), S); 53 | OS << Indent << "}\n"; 54 | } 55 | 56 | void ASTPrinter::printCondition(Indented Indent, const char *Name, 57 | ASTElement *E) { 58 | OS << Indent.next() << Name << (E ? "\n" : ": \n"); 59 | if (E) { 60 | if (auto *D = dyn_cast(E)) 61 | print(Indent.next().next(), *D); 62 | else if (auto *V = dyn_cast(E)) 63 | print(Indent.next().next(), *V); 64 | else if (auto *U = dyn_cast(E)) 65 | print(Indent.next().next(), *U); 66 | else 67 | print(Indent.next().next(), *cast(E)); 68 | } 69 | } 70 | 71 | void ASTPrinter::print(Indented Indent, const QualifiedID &Qual) { 72 | for (auto &N : Qual.NameSegments) { 73 | OS << N->getText(SourceMgr); 74 | } 75 | if (Qual.TemplateArgs) { 76 | OS << "\n" << Indent << "<\n"; 77 | for (auto &A : (*Qual.TemplateArgs)->Args) { 78 | if (A.isType()) 79 | print(Indent.next(), A.asType()); 80 | else 81 | print(Indent.next(), A.asExpr()); 82 | } 83 | OS << Indent << '>'; 84 | } 85 | } 86 | 87 | void ASTPrinter::print(Indented Indent, const Type &T) { 88 | OS << Indent << "Type "; 89 | for (auto &D : T.Decorations) 90 | OS << '\'' << D.Tok->getText(SourceMgr) << "' "; 91 | OS << '\''; 92 | print(Indent.next(), T.Qualifier); 93 | OS << "'\n"; 94 | } 95 | 96 | void ASTPrinter::print(Indented Indent, const VarDecl &DCL) { 97 | OS << Indent << "VarDecl '" << DCL.NameTok->getText(SourceMgr) << "'\n"; 98 | print(Indent.next(), *DCL.VariableType); 99 | if (DCL.Value) { 100 | const char *InitName[] = { "?", "=", "()", "{}" }; 101 | assert(1 <= DCL.Value->InitType && DCL.Value->InitType < 4); 102 | OS << Indent.next() << "Assignment Type '" << InitName[DCL.Value->InitType] 103 | << "'\n"; 104 | assert(DCL.Value->Value); 105 | print(Indent.next(), *DCL.Value->Value); 106 | } 107 | } 108 | 109 | void ASTPrinter::print(Indented Indent, const Expr &EXP) { 110 | if (auto *BinOp = dyn_cast(&EXP)) { 111 | print(Indent.next(), *BinOp->getLHS()); 112 | OS << Indent << tok::getTokenName(BinOp->OperatorTok->getTokenKind()) 113 | << '\n'; 114 | print(Indent.next(), *BinOp->getRHS()); 115 | } else if (auto *Decl = dyn_cast(&EXP)) { 116 | OS << Indent << "DeclRefExpr '"; 117 | print(Indent.next(), Decl->Qualifier); 118 | OS << "'\n"; 119 | } else if (auto *Lit = dyn_cast(&EXP)) { 120 | OS << Indent << Lit->Tok->getText(SourceMgr) << '\n'; 121 | } else if (auto *Call = dyn_cast(&EXP)) { 122 | OS << Indent << "call expr '"; 123 | print(Indent.next(), Call->Qualifier); 124 | OS << "'\n"; 125 | for (auto &Arg : Call->Args) 126 | print(Indent.next(), *Arg); 127 | } else if (auto *Unar = dyn_cast(&EXP)) { 128 | OS << Indent << Unar->OperatorTok->getText(SourceMgr) << "\n"; 129 | print(Indent.next(), *Unar->Value); 130 | } else if (auto *PE = dyn_cast(&EXP)) { 131 | OS << Indent << "ParenExpr:\n"; 132 | print(Indent.next(), *PE->Value); 133 | } else { 134 | llvm_unreachable("TODO: unhandled fuzzy ast node of type Expr"); 135 | } 136 | } 137 | 138 | void ASTPrinter::print(Indented Indent, const Stmt &stmt) { 139 | if (auto *DS = dyn_cast(&stmt)) { 140 | OS << Indent << "DeclStmt\n"; 141 | for (const auto &VD : DS->Decls) 142 | print(Indent.next(), *VD); 143 | } else if (auto *UB = dyn_cast(&stmt)) { 144 | (void)UB; 145 | OS << Indent << "Unparsable Block:\n"; 146 | for (auto T : UB->Body) 147 | OS << Indent.next() << T->getText(SourceMgr) << '\n'; 148 | } else if (auto *ELS = dyn_cast(&stmt)) { 149 | OS << Indent << "ExprLineStmt\n"; 150 | print(Indent.next(), *ELS->Body); 151 | } else if (auto *RS = dyn_cast(&stmt)) { 152 | OS << Indent << "ReturnStmt\n"; 153 | if (RS->Body) 154 | print(Indent.next(), *RS->Body); 155 | else 156 | OS << Indent.next() << "\n"; 157 | } else if (auto *FD = dyn_cast(&stmt)) { 158 | OS << Indent << "FunctionDecl '"; 159 | print(Indent.next().next(), FD->Name); 160 | OS << "'\n" << Indent.next() << "Body:\n"; 161 | if (FD->Body) 162 | print(Indent.next().next(), *FD->Body); 163 | } else if (auto *CD = dyn_cast(&stmt)) { 164 | OS << Indent << '\'' << CD->Refs[ClassDecl::CLASS]->getText(SourceMgr) 165 | << "' "; 166 | print(Indent.next(), *CD->Name); 167 | if (!CD->BaseClasses.empty()) { 168 | OS << " derived from\n"; 169 | for (auto &BC : CD->BaseClasses) { 170 | OS << Indent.next() 171 | << (BC.Accessibility ? BC.Accessibility->getText(SourceMgr) 172 | : "") << ' '; 173 | print(Indent.next().next(), *BC.T); 174 | } 175 | } 176 | if (!CD->hasScope()) 177 | OS << " (declaration only)\n"; 178 | else 179 | printScope(Indent, *CD); 180 | } else if (auto *LBL = dyn_cast(&stmt)) { 181 | OS << Indent << "Label '" << LBL->LabelName->getText(SourceMgr) << "'\n"; 182 | } else if (auto *NS = dyn_cast(&stmt)) { 183 | OS << Indent << "Namespace '" 184 | << (NS->Refs[NamespaceDecl::NAME] 185 | ? NS->Refs[NamespaceDecl::NAME]->getText(SourceMgr) 186 | : "") << '\''; 187 | printScope(Indent, *NS); 188 | } else if (auto TD = dyn_cast(&stmt)) { 189 | OS << Indent << "Template <'\n"; 190 | for (auto &A : TD->Params) { 191 | if (auto *E = dyn_cast(A.get())) 192 | print(Indent.next().next(), *E); 193 | else if (auto *VD = dyn_cast(A.get())) 194 | print(Indent.next().next(), *VD); 195 | else 196 | print(Indent.next().next(), *static_cast(A.get())); 197 | } 198 | OS << Indent.next() << "> with Body:\n"; 199 | print(Indent.next().next(), *TD->Templated); 200 | } else if (auto *If = dyn_cast(&stmt)) { 201 | OS << Indent << "If\n"; 202 | for (auto &B : If->Branches) { 203 | printCondition(Indent, "Condition", B.Cond.get()); 204 | OS << Indent.next() << "Body:\n"; 205 | print(Indent.next().next(), *B.Body); 206 | } 207 | } else if (auto *CS = dyn_cast(&stmt)) { 208 | OS << Indent << "CompoundStmt:\n"; 209 | for (auto &S : CS->Body) 210 | print(Indent.next(), *S); 211 | } else if (auto *While = dyn_cast(&stmt)) { 212 | OS << Indent << "WhileStmt:\n"; 213 | printCondition(Indent, "Condition", While->Cond.get()); 214 | OS << Indent.next() << "Body:\n"; 215 | print(Indent.next().next(), *While->Body); 216 | } else if (auto *For = dyn_cast(&stmt)) { 217 | OS << Indent << "ForStmt:\n"; 218 | printCondition(Indent, "Init", For->Init.get()); 219 | printCondition(Indent, "Condition", For->Cond.get()); 220 | printCondition(Indent, "Incr", For->Inc.get()); 221 | OS << Indent.next() << "Body:\n"; 222 | print(Indent.next().next(), *For->Body); 223 | } else { 224 | llvm_unreachable("TODO: unhandled fuzzy ast node"); 225 | } 226 | } 227 | 228 | void ASTPrinter::print(Indented Indent, const PPDirective &PP) { 229 | if (auto *Inc = dyn_cast(&PP)) { 230 | OS << Indent << "Include Directive: '"; 231 | if (Inc->Path) 232 | for (auto &S : Inc->Path->Refs) 233 | OS << S->getText(SourceMgr); 234 | OS << "'\n"; 235 | } else if (auto *If = dyn_cast(&PP)) { 236 | OS << Indent << "Preprocessor '" 237 | << If->Refs[PPIf::KEYWORD]->getText(SourceMgr) << "':\n"; 238 | if (If->Cond) { 239 | if (auto *E = dyn_cast(If->Cond.get())) 240 | print(Indent.next(), *E); 241 | else 242 | print(Indent.next(), *cast(If->Cond.get())); 243 | } 244 | } else if (auto *UP = dyn_cast(&PP)) { 245 | OS << Indent << "Unparsable PP:\n"; 246 | for (auto R : UP->Refs) 247 | OS << Indent.next() << R->getText(SourceMgr) << '\n'; 248 | } else { 249 | llvm_unreachable("TODO: unhandled preprocessor directive"); 250 | } 251 | } 252 | 253 | void printAST(raw_ostream &OS, const Stmt &Root, 254 | const SourceManager &SourceMgr) { 255 | ASTPrinter AP{ SourceMgr, OS }; 256 | AP.print(Indented(0), Root); 257 | } 258 | 259 | void printAST(raw_ostream &OS, const TranslationUnit &TU, 260 | const SourceManager &SourceMgr) { 261 | ASTPrinter AP{ SourceMgr, OS }; 262 | for (auto &P : TU.PPDirectives) { 263 | assert(P); 264 | AP.print(Indented(0), *P); 265 | } 266 | for (auto &S : TU.Body) { 267 | assert(S); 268 | AP.print(Indented(0), *S); 269 | } 270 | } 271 | 272 | } // end namespace fuzzy 273 | } // end namespace clang 274 | -------------------------------------------------------------------------------- /Fuzzy/FuzzyParser.cpp: -------------------------------------------------------------------------------- 1 | //===--- FuzzyParser.cpp - clang-highlight ----------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | #include "llvm/Support/Debug.h" 10 | #include "llvm/ADT/STLExtras.h" 11 | #include "clang/Basic/IdentifierTable.h" 12 | #include "clang/Basic/OperatorPrecedence.h" 13 | #include "FuzzyAST.h" 14 | 15 | using namespace llvm; 16 | 17 | namespace clang { 18 | namespace fuzzy { 19 | 20 | namespace { 21 | template class BasicTokenFilter { 22 | AnnotatedToken *First, *Last; 23 | 24 | void skipWhitespaces() { 25 | for (;;) { 26 | while (First != Last && (First->getTokenKind() == tok::unknown || 27 | First->getTokenKind() == tok::comment)) 28 | ++First; 29 | 30 | if (SkipPreprocessor && First->getTokenKind() == tok::hash && 31 | First->Tok().isAtStartOfLine()) 32 | while (First != Last && First++->getTokenKind() != tok::eod) 33 | ; 34 | else 35 | break; 36 | } 37 | assert(First <= Last); 38 | } 39 | 40 | public: 41 | BasicTokenFilter(AnnotatedToken *First, AnnotatedToken *Last) 42 | : First(First), Last(Last) { 43 | skipWhitespaces(); 44 | } 45 | 46 | AnnotatedToken *next() { 47 | assert(!eof()); 48 | auto Ret = First++; 49 | skipWhitespaces(); 50 | assert(Ret->getTokenKind() != tok::raw_identifier); 51 | return Ret; 52 | } 53 | 54 | class TokenFilterState { 55 | friend class BasicTokenFilter; 56 | TokenFilterState(AnnotatedToken *First, AnnotatedToken *Last) 57 | : First(First), Last(Last) {} 58 | AnnotatedToken *First, *Last; 59 | }; 60 | 61 | TokenFilterState mark() const { return TokenFilterState(First, Last); } 62 | void rewind(TokenFilterState State) { 63 | First = State.First; 64 | Last = State.Last; 65 | } 66 | 67 | BasicTokenFilter rangeAsTokenFilter(TokenFilterState From, 68 | TokenFilterState To) const { 69 | assert(From.Last == To.Last); 70 | assert(From.First <= To.First); 71 | assert(To.First < To.Last); 72 | return BasicTokenFilter(From.First, To.First + 1); 73 | } 74 | 75 | class TokenFilterGuard { 76 | friend class BasicTokenFilter; 77 | TokenFilterGuard(BasicTokenFilter *TF, TokenFilterState State) 78 | : TF(TF), State(State) {} 79 | 80 | public: 81 | ~TokenFilterGuard() { 82 | if (TF) 83 | TF->rewind(State); 84 | } 85 | void dismiss() { TF = nullptr; } 86 | BasicTokenFilter *TF; 87 | TokenFilterState State; 88 | }; 89 | TokenFilterGuard guard() { return TokenFilterGuard(this, mark()); } 90 | 91 | AnnotatedToken *peek() { return First; } 92 | const AnnotatedToken *peek() const { return First; } 93 | tok::TokenKind peekKind() const { return First->getTokenKind(); } 94 | 95 | bool eof() const { return peekKind() == tok::eof; } 96 | }; 97 | using TokenFilter = BasicTokenFilter; 98 | using RawTokenFilter = BasicTokenFilter; 99 | } // end anonymous namespace 100 | 101 | template 102 | static bool checkKind(BasicTokenFilter &TF, tok::TokenKind Kind) { 103 | return TF.peekKind() == Kind; 104 | } 105 | 106 | static int PrecedenceUnaryOperator = prec::PointerToMember + 1; 107 | static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 108 | 109 | static std::unique_ptr parseExpr(TokenFilter &TF, int Precedence = 1, 110 | bool StopAtGreater = false); 111 | 112 | static std::unique_ptr parseType(TokenFilter &TF, 113 | bool WithDecorations = true); 114 | 115 | static std::unique_ptr parseUnaryOperator(TokenFilter &TF) { 116 | if (checkKind(TF, tok::plus) || checkKind(TF, tok::minus) || 117 | checkKind(TF, tok::exclaim) || checkKind(TF, tok::tilde) || 118 | checkKind(TF, tok::star) || checkKind(TF, tok::amp) || 119 | checkKind(TF, tok::plusplus) || checkKind(TF, tok::minusminus)) { 120 | AnnotatedToken *Op = TF.next(); 121 | auto Operand = parseUnaryOperator(TF); 122 | if (!Operand) 123 | return {}; 124 | return make_unique(Op, std::move(Operand)); 125 | } 126 | 127 | return parseExpr(TF, PrecedenceArrowAndPeriod); 128 | } 129 | 130 | static std::unique_ptr 131 | parseCallExpr(TokenFilter &TF, std::unique_ptr FunctionName) { 132 | assert(checkKind(TF, tok::l_paren)); 133 | auto Func = make_unique(std::move(FunctionName)); 134 | Func->setLeftParen(TF.next()); 135 | while (!checkKind(TF, tok::r_paren)) { 136 | Func->Args.push_back(parseExpr(TF, prec::Comma + 1)); 137 | if (checkKind(TF, tok::comma)) 138 | Func->appendComma(TF.next()); 139 | else 140 | break; 141 | } 142 | if (checkKind(TF, tok::r_paren)) { 143 | Func->setRightParen(TF.next()); 144 | return std::move(Func); 145 | } 146 | return {}; 147 | } 148 | 149 | static bool isLiteralOrConstant(tok::TokenKind K) { 150 | if (isLiteral(K)) 151 | return true; 152 | 153 | switch (K) { 154 | case tok::kw_this: 155 | case tok::kw_true: 156 | case tok::kw_false: 157 | case tok::kw___objc_yes: 158 | case tok::kw___objc_no: 159 | case tok::kw_nullptr: 160 | return true; 161 | default: 162 | return false; 163 | } 164 | } 165 | 166 | template 167 | static bool parseNamespaceQualifiers(TokenFilter &TF, QualOwner &Qual) { 168 | auto Guard = TF.guard(); 169 | 170 | if (checkKind(TF, tok::kw_operator)) { 171 | Qual.addNameQualifier(TF.next()); 172 | if (!TF.peek()) 173 | return false; 174 | Qual.addNameQualifier(TF.next()); 175 | Guard.dismiss(); 176 | return true; 177 | } 178 | 179 | bool GlobalNamespaceColon = true; 180 | do { 181 | if (checkKind(TF, tok::coloncolon)) 182 | Qual.addNameQualifier(TF.next()); 183 | else if (!GlobalNamespaceColon) 184 | return false; 185 | GlobalNamespaceColon = false; 186 | if (!checkKind(TF, tok::identifier)) 187 | return false; 188 | Qual.addNameQualifier(TF.next()); 189 | } while (checkKind(TF, tok::coloncolon)); 190 | 191 | Guard.dismiss(); 192 | return true; 193 | } 194 | 195 | template 196 | static bool parseTemplateArgs(TokenFilter &TF, QualOwner &Qual, 197 | std::false_type) { 198 | return true; 199 | } 200 | template 201 | static bool parseTemplateArgs(TokenFilter &TF, QualOwner &Qual, 202 | std::true_type) { 203 | auto Guard = TF.guard(); 204 | 205 | if (checkKind(TF, tok::less)) { 206 | Qual.makeTemplateArgs(); 207 | bool isFirst = true; 208 | do { 209 | Qual.addTemplateSeparator(TF.next()); 210 | 211 | if (isFirst && checkKind(TF, tok::greater)) 212 | break; 213 | isFirst = false; 214 | 215 | if (auto Arg = parseType(TF)) 216 | Qual.addTemplateArgument(std::move(Arg)); 217 | else if (auto E = parseExpr(TF, prec::Comma + 1, /*StopAtGreater=*/true)) 218 | Qual.addTemplateArgument(std::move(E)); 219 | else 220 | return false; 221 | } while (checkKind(TF, tok::comma)); 222 | if (!checkKind(TF, tok::greater)) 223 | return false; 224 | Qual.addTemplateSeparator(TF.next()); 225 | } 226 | 227 | Guard.dismiss(); 228 | return true; 229 | } 230 | 231 | template 232 | static bool parseQualifiedID(TokenFilter &TF, QualOwner &Qual, 233 | WithTemplateArgs WTA = std::true_type{}) { 234 | auto Guard = TF.guard(); 235 | if (parseNamespaceQualifiers(TF, Qual) && parseTemplateArgs(TF, Qual, WTA)) { 236 | Guard.dismiss(); 237 | return true; 238 | } 239 | return false; 240 | } 241 | 242 | static std::unique_ptr parseExpr(TokenFilter &TF, int Precedence, 243 | bool StopAtGreater) { 244 | if (!TF.peek()) 245 | return {}; 246 | 247 | if (Precedence == PrecedenceUnaryOperator) 248 | return parseUnaryOperator(TF); 249 | 250 | if (Precedence > PrecedenceArrowAndPeriod) { 251 | if (isLiteralOrConstant(TF.peekKind())) 252 | return make_unique(TF.next()); 253 | 254 | if (checkKind(TF, tok::l_paren)) { 255 | auto Left = TF.next(); 256 | auto Val = parseExpr(TF, 1, false); 257 | if (!checkKind(TF, tok::r_paren)) 258 | return {}; 259 | auto Right = TF.next(); 260 | return make_unique(Left, std::move(Val), Right); 261 | } 262 | 263 | if (checkKind(TF, tok::identifier) || checkKind(TF, tok::coloncolon)) { 264 | auto DR = make_unique(); 265 | if (!parseQualifiedID(TF, *DR) && 266 | !parseQualifiedID(TF, *DR, std::false_type{})) 267 | return {}; 268 | if (checkKind(TF, tok::l_paren)) 269 | return parseCallExpr(TF, std::move(DR)); 270 | std::unique_ptr Ret = std::move(DR); 271 | while (checkKind(TF, tok::plusplus) || checkKind(TF, tok::minusminus)) 272 | Ret = make_unique(TF.next(), std::move(Ret)); 273 | return std::move(Ret); 274 | } 275 | 276 | return {}; 277 | } 278 | auto LeftExpr = parseExpr(TF, Precedence + 1, StopAtGreater); 279 | if (!LeftExpr) 280 | return {}; 281 | 282 | while (!TF.eof()) { 283 | if (StopAtGreater && checkKind(TF, tok::greater)) 284 | break; 285 | 286 | int CurrentPrecedence = getBinOpPrecedence(TF.peekKind(), true, true); 287 | if (checkKind(TF, tok::period) || checkKind(TF, tok::arrow)) 288 | CurrentPrecedence = PrecedenceArrowAndPeriod; 289 | if (CurrentPrecedence == 0) 290 | return LeftExpr; 291 | 292 | assert(CurrentPrecedence <= Precedence); 293 | if (CurrentPrecedence < Precedence) 294 | break; 295 | assert(CurrentPrecedence == Precedence); 296 | 297 | AnnotatedToken *OperatorTok = TF.next(); 298 | 299 | auto RightExpr = parseExpr(TF, Precedence + 1, StopAtGreater); 300 | if (!RightExpr) 301 | return {}; 302 | 303 | LeftExpr = make_unique(std::move(LeftExpr), 304 | std::move(RightExpr), OperatorTok); 305 | } 306 | 307 | return LeftExpr; 308 | } 309 | 310 | static std::unique_ptr parseReturnStmt(TokenFilter &TF) { 311 | auto Guard = TF.guard(); 312 | if (!checkKind(TF, tok::kw_return)) 313 | return {}; 314 | auto *Return = TF.next(); 315 | std::unique_ptr Body; 316 | if (!checkKind(TF, tok::semi)) { 317 | Body = parseExpr(TF); 318 | if (!Body || !checkKind(TF, tok::semi)) 319 | return {}; 320 | } 321 | assert(checkKind(TF, tok::semi)); 322 | auto *Semi = TF.next(); 323 | Guard.dismiss(); 324 | return make_unique(Return, std::move(Body), Semi); 325 | } 326 | 327 | static void parseTypeDecorations(TokenFilter &TF, Type &T) { 328 | // TODO: add const and volatile 329 | while (checkKind(TF, tok::star) || checkKind(TF, tok::amp) || 330 | checkKind(TF, tok::ampamp)) 331 | T.Decorations.push_back(Type::Decoration(checkKind(TF, tok::star) 332 | ? Type::Decoration::Pointer 333 | : Type::Decoration::Reference, 334 | TF.next())); 335 | for (auto &Dec : T.Decorations) 336 | Dec.fix(); 337 | } 338 | 339 | static bool isBuiltinType(tok::TokenKind K) { 340 | switch (K) { 341 | case tok::kw_short: 342 | case tok::kw_long: 343 | case tok::kw___int64: 344 | case tok::kw___int128: 345 | case tok::kw_signed: 346 | case tok::kw_unsigned: 347 | case tok::kw__Complex: 348 | case tok::kw__Imaginary: 349 | case tok::kw_void: 350 | case tok::kw_char: 351 | case tok::kw_wchar_t: 352 | case tok::kw_char16_t: 353 | case tok::kw_char32_t: 354 | case tok::kw_int: 355 | case tok::kw_half: 356 | case tok::kw_float: 357 | case tok::kw_double: 358 | case tok::kw_bool: 359 | case tok::kw__Bool: 360 | case tok::kw__Decimal32: 361 | case tok::kw__Decimal64: 362 | case tok::kw__Decimal128: 363 | case tok::kw___vector: 364 | return true; 365 | default: 366 | return false; 367 | } 368 | } 369 | 370 | static bool isCVQualifier(tok::TokenKind K) { 371 | switch (K) { 372 | case tok::kw_const: 373 | case tok::kw_constexpr: 374 | case tok::kw_volatile: 375 | case tok::kw_register: 376 | return true; 377 | default: 378 | return false; 379 | } 380 | } 381 | 382 | static std::unique_ptr parseType(TokenFilter &TF, bool WithDecorations) { 383 | auto Guard = TF.guard(); 384 | std::unique_ptr T = make_unique(); 385 | 386 | while (isCVQualifier(TF.peekKind()) || checkKind(TF, tok::kw_typename)) 387 | T->addNameQualifier(TF.next()); 388 | 389 | if (checkKind(TF, tok::kw_auto)) { 390 | T->addNameQualifier(TF.next()); 391 | } else if (isBuiltinType(TF.peekKind())) { 392 | while (isBuiltinType(TF.peekKind())) 393 | T->addNameQualifier(TF.next()); 394 | } else if (!parseQualifiedID(TF, *T)) { 395 | return {}; 396 | } 397 | while (isCVQualifier(TF.peekKind())) 398 | T->addNameQualifier(TF.next()); 399 | 400 | if (WithDecorations) 401 | parseTypeDecorations(TF, *T); 402 | 403 | Guard.dismiss(); 404 | return T; 405 | } 406 | 407 | static std::unique_ptr parseVarDecl(TokenFilter &TF, 408 | Type *TypeName = 0, 409 | bool NameOptional = false, 410 | bool StopAtGreater = false) { 411 | auto Guard = TF.guard(); 412 | auto VD = make_unique(); 413 | VarDecl &D = *VD; 414 | 415 | if (!TypeName) { 416 | D.VariableType = parseType(TF); 417 | if (!D.VariableType) 418 | return {}; 419 | } else { 420 | D.VariableType = TypeName->cloneWithoutDecorations(); 421 | } 422 | parseTypeDecorations(TF, *D.VariableType); 423 | 424 | if (checkKind(TF, tok::identifier)) { 425 | D.setName(TF.next()); 426 | } else if (!NameOptional) { 427 | return {}; 428 | } 429 | 430 | if (checkKind(TF, tok::equal)) { 431 | auto *EqualTok = TF.next(); 432 | if (auto Value = parseExpr(TF, prec::Comma + 1, StopAtGreater)) { 433 | D.Value = VarInitialization(); 434 | D.Value->setAssignmentOps(VarInitialization::ASSIGNMENT, EqualTok); 435 | D.Value->Value = std::move(Value); 436 | } else { 437 | return {}; 438 | } 439 | } else { 440 | // TODO: var(init) and var{init} not yet implemented 441 | } 442 | Guard.dismiss(); 443 | return VD; 444 | } 445 | 446 | static std::unique_ptr parseDeclStmt(TokenFilter &TF, 447 | bool WithSemi = true) { 448 | auto Guard = TF.guard(); 449 | 450 | auto TypeName = parseType(TF, /*WithDecorations=*/false); 451 | if (!TypeName) 452 | return {}; 453 | auto Declaration = make_unique(); 454 | 455 | while (!TF.eof()) { 456 | if (checkKind(TF, tok::semi)) { 457 | if (Declaration->Decls.empty()) 458 | return {}; 459 | if (WithSemi) 460 | Declaration->setSemi(TF.next()); 461 | Guard.dismiss(); 462 | return std::move(Declaration); 463 | } 464 | if (auto D = parseVarDecl(TF, TypeName.get())) 465 | Declaration->Decls.push_back(std::move(D)); 466 | else 467 | return {}; 468 | 469 | if (checkKind(TF, tok::comma)) { 470 | Declaration->appendComma(TF.next()); 471 | } else if (!checkKind(TF, tok::semi)) { 472 | return {}; 473 | } 474 | } 475 | 476 | return {}; 477 | } 478 | 479 | static bool parseDestructor(TokenFilter &TF, FunctionDecl &F) { 480 | auto Pos = TF.mark(); 481 | 482 | int Tildes = 0; 483 | while (checkKind(TF, tok::tilde) || checkKind(TF, tok::identifier) || 484 | checkKind(TF, tok::coloncolon)) { 485 | Tildes += checkKind(TF, tok::tilde); 486 | TF.next(); 487 | } 488 | if (Tildes != 1) 489 | return false; 490 | 491 | if (!checkKind(TF, tok::l_paren)) 492 | return false; 493 | 494 | TF.rewind(Pos); 495 | 496 | F.ReturnType = make_unique(); 497 | 498 | while (checkKind(TF, tok::tilde) || checkKind(TF, tok::identifier) || 499 | checkKind(TF, tok::coloncolon)) { 500 | if (checkKind(TF, tok::tilde)) 501 | F.addNameQualifier(TF.next()); 502 | else 503 | F.ReturnType->addNameQualifier(TF.next()); 504 | } 505 | 506 | return true; 507 | } 508 | 509 | static bool isDeclSpecifier(tok::TokenKind K) { 510 | switch (K) { 511 | case tok::kw_friend: 512 | // case tok::kw_constexpr: 513 | // case tok::kw_const: 514 | // case tok::kw_mutable: 515 | case tok::kw_typedef: 516 | // case tok::kw_register: 517 | case tok::kw_static: 518 | // case tok::kw_thread_local: 519 | case tok::kw_extern: 520 | case tok::kw_inline: 521 | case tok::kw_virtual: 522 | case tok::kw_explicit: 523 | return true; 524 | default: 525 | return false; 526 | } 527 | } 528 | 529 | static std::unique_ptr 530 | parseFunctionDecl(TokenFilter &TF, bool NameOptional = false) { 531 | auto Guard = TF.guard(); 532 | auto F = make_unique(); 533 | 534 | while (isDeclSpecifier(TF.peekKind())) 535 | F->addDeclSpecifier(TF.next()); 536 | 537 | bool InDestructor = false; 538 | 539 | if (auto T = parseType(TF)) { 540 | F->ReturnType = std::move(T); 541 | } else if (NameOptional && parseDestructor(TF, *F)) { 542 | InDestructor = true; 543 | } else { 544 | return {}; 545 | } 546 | 547 | if (!InDestructor) { 548 | if (!checkKind(TF, tok::identifier) && !checkKind(TF, tok::kw_operator)) { 549 | if (!NameOptional) 550 | return {}; 551 | } else if (!parseQualifiedID(TF, *F, std::false_type{})) { 552 | return {}; 553 | } 554 | } 555 | 556 | if (!checkKind(TF, tok::l_paren)) 557 | return {}; 558 | 559 | F->setLeftBrace(TF.next()); 560 | while (!checkKind(TF, tok::r_paren)) { 561 | F->Params.push_back(parseVarDecl(TF, 0, true)); 562 | if (!F->Params.back()) 563 | return {}; 564 | if (checkKind(TF, tok::comma)) 565 | F->appendComma(TF.next()); 566 | else 567 | break; 568 | } 569 | if (!checkKind(TF, tok::r_paren)) 570 | return {}; 571 | 572 | F->setRightBrace(TF.next()); 573 | 574 | // if (InConstructor && checkKind(TF, tok::colon)) { 575 | // TODO: Don't skip initializer list and [[x]] and const 576 | while (!TF.eof() && !checkKind(TF, tok::l_brace) && !checkKind(TF, tok::semi)) 577 | TF.next(); 578 | //} 579 | 580 | if (checkKind(TF, tok::semi)) 581 | F->setSemi(TF.next()); 582 | Guard.dismiss(); 583 | return std::move(F); 584 | } 585 | 586 | static std::unique_ptr skipUnparsable(TokenFilter &TF) { 587 | assert(!TF.eof()); 588 | auto UB = make_unique(); 589 | while (!TF.eof()) { 590 | auto Kind = TF.peekKind(); 591 | UB->push_back(TF.next()); 592 | if (Kind == tok::semi || Kind == tok::r_brace || Kind == tok::l_brace) 593 | break; 594 | } 595 | return std::move(UB); 596 | } 597 | 598 | static std::unique_ptr parseLabelStmt(TokenFilter &TF) { 599 | auto Guard = TF.guard(); 600 | if (!(checkKind(TF, tok::identifier) || checkKind(TF, tok::kw_private) || 601 | checkKind(TF, tok::kw_protected) || checkKind(TF, tok::kw_public))) 602 | return {}; 603 | auto *LabelName = TF.next(); 604 | if (!checkKind(TF, tok::colon)) 605 | return {}; 606 | Guard.dismiss(); 607 | return make_unique(LabelName, TF.next()); 608 | } 609 | 610 | static std::unique_ptr parseIncludeDirective(RawTokenFilter &TF) { 611 | if (!checkKind(TF, tok::hash)) 612 | return {}; 613 | auto Guard = TF.guard(); 614 | 615 | auto *HashTok = TF.next(); 616 | if (TF.peek()->Tok().getIdentifierInfo()->getPPKeywordID() != tok::pp_include) 617 | return {}; 618 | 619 | auto Inc = make_unique(); 620 | Inc->setHash(HashTok); 621 | Inc->setInclude(TF.next()); 622 | Inc->Path = make_unique(); 623 | 624 | while (!checkKind(TF, tok::eod)) { 625 | Inc->Path->addToken(TF.next()); 626 | } 627 | Inc->setEOD(TF.next()); 628 | return Inc; 629 | } 630 | 631 | static std::unique_ptr parsePPIf(RawTokenFilter &TF) { 632 | if (!checkKind(TF, tok::hash)) 633 | return {}; 634 | auto Guard = TF.guard(); 635 | 636 | auto *HashTok = TF.next(); 637 | 638 | if (TF.peek()->Tok().getIdentifierInfo()->getPPKeywordID() != tok::pp_else && 639 | TF.peek()->Tok().getIdentifierInfo()->getPPKeywordID() != tok::pp_if && 640 | TF.peek()->Tok().getIdentifierInfo()->getPPKeywordID() != tok::pp_elif && 641 | TF.peek()->Tok().getIdentifierInfo()->getPPKeywordID() != tok::pp_endif) 642 | return {}; 643 | 644 | auto If = make_unique(); 645 | If->setHash(HashTok); 646 | If->setKeyword(TF.next()); 647 | 648 | auto Start = TF.mark(); 649 | 650 | if (!checkKind(TF, tok::eod)) { 651 | while (!checkKind(TF, tok::eod)) 652 | TF.next(); 653 | assert(checkKind(TF, tok::eod)); 654 | 655 | TokenFilter SubTF = TF.rangeAsTokenFilter(Start, TF.mark()); 656 | 657 | auto SubStart = SubTF.mark(); 658 | std::unique_ptr Cond; 659 | if ((Cond = parseExpr(SubTF)) && checkKind(TF, tok::eod)) 660 | If->Cond = std::move(Cond); 661 | else { 662 | SubTF.rewind(SubStart); 663 | auto UB = make_unique(); 664 | while (!checkKind(SubTF, tok::eod)) 665 | UB->push_back(SubTF.next()); 666 | If->Cond = std::move(UB); 667 | } 668 | } 669 | 670 | assert(checkKind(TF, tok::eod)); 671 | If->setEOD(TF.next()); 672 | return If; 673 | } 674 | 675 | static std::unique_ptr parsePPDirective(RawTokenFilter &TF) { 676 | assert(checkKind(TF, tok::hash)); 677 | if (auto I = parseIncludeDirective(TF)) 678 | return std::move(I); 679 | if (auto D = parsePPIf(TF)) 680 | return std::move(D); 681 | auto UP = make_unique(); 682 | while (!checkKind(TF, tok::eod)) 683 | UP->push_back(TF.next()); 684 | return std::move(UP); 685 | } 686 | 687 | static std::unique_ptr parseAny(TokenFilter &TF, 688 | bool SkipUnparsable = true, 689 | bool NameOptional = false); 690 | 691 | static bool parseScope(TokenFilter &TF, Scope &Sc, bool NameOptional = false) { 692 | if (checkKind(TF, tok::r_brace)) 693 | return true; 694 | while (auto St = parseAny(TF, true, NameOptional)) { 695 | Sc.addStmt(std::move(St)); 696 | if (TF.eof()) 697 | return false; 698 | if (checkKind(TF, tok::r_brace)) 699 | return true; 700 | } 701 | return checkKind(TF, tok::r_brace); 702 | } 703 | 704 | static std::unique_ptr parseCompoundStmt(TokenFilter &TF) { 705 | if (!checkKind(TF, tok::l_brace)) 706 | return {}; 707 | auto C = make_unique(); 708 | C->setLeftBrace(TF.next()); 709 | parseScope(TF, *C); 710 | if (checkKind(TF, tok::r_brace)) 711 | C->setRightBrace(TF.next()); 712 | // else: just pass 713 | return C; 714 | } 715 | 716 | static std::unique_ptr parseControlFlowBody(TokenFilter &TF) { 717 | return checkKind(TF, tok::l_brace) ? parseCompoundStmt(TF) : parseAny(TF); 718 | } 719 | 720 | static std::unique_ptr parseCond(TokenFilter &TF, 721 | bool ForLoopInit = false) { 722 | if (ForLoopInit) 723 | if (auto D = parseDeclStmt(TF, /*WithSemi=*/false)) 724 | return std::move(D); 725 | { 726 | auto Guard = TF.guard(); 727 | if (auto D = parseVarDecl(TF)) { 728 | if (checkKind(TF, tok::r_paren)) { 729 | Guard.dismiss(); 730 | return std::move(D); 731 | } 732 | } 733 | } 734 | if (auto E = parseExpr(TF)) 735 | return std::move(E); 736 | 737 | auto UB = make_unique(); 738 | int ParenOpen = 1; 739 | while (!TF.eof()) { 740 | if (checkKind(TF, tok::l_paren)) { 741 | ++ParenOpen; 742 | } else if (checkKind(TF, tok::r_paren)) { 743 | if (--ParenOpen == 0) { 744 | return std::move(UB); 745 | } 746 | } 747 | 748 | if (checkKind(TF, tok::l_brace) || checkKind(TF, tok::r_brace) || 749 | checkKind(TF, tok::semi)) 750 | return std::move(UB); 751 | 752 | UB->push_back(TF.next()); 753 | } 754 | return std::move(UB); 755 | } 756 | 757 | static std::unique_ptr parseControlFlowStmt(TokenFilter &TF) { 758 | auto Guard = TF.guard(); 759 | 760 | if (checkKind(TF, tok::kw_while)) { 761 | auto S = make_unique(); 762 | 763 | S->setKeyword(TF.next()); 764 | if (!checkKind(TF, tok::l_paren)) 765 | return {}; 766 | S->setLeftParen(TF.next()); 767 | 768 | if (!(S->Cond = parseCond(TF))) 769 | return {}; 770 | 771 | if (checkKind(TF, tok::r_paren)) 772 | S->setRightParen(TF.next()); 773 | 774 | S->Body = parseControlFlowBody(TF); 775 | 776 | Guard.dismiss(); 777 | return std::move(S); 778 | } 779 | 780 | if (checkKind(TF, tok::kw_if)) { 781 | auto If = make_unique(); 782 | for (bool ElseBranch = false, First = true; !ElseBranch; First = false) { 783 | AnnotatedToken *KW1, *KW2 = nullptr; 784 | if (First && checkKind(TF, tok::kw_if)) { 785 | KW1 = TF.next(); 786 | } else if (checkKind(TF, tok::kw_else)) { 787 | KW1 = TF.next(); 788 | if (checkKind(TF, tok::kw_if)) 789 | KW2 = TF.next(); 790 | else 791 | ElseBranch = true; 792 | } else { 793 | break; 794 | } 795 | 796 | std::unique_ptr Cond; 797 | AnnotatedToken *LPar = nullptr, *RPar = nullptr; 798 | 799 | if (!ElseBranch) { 800 | if (!checkKind(TF, tok::l_paren)) 801 | return {}; 802 | LPar = TF.next(); 803 | 804 | if (!(Cond = parseCond(TF))) 805 | return {}; 806 | 807 | if (checkKind(TF, tok::r_paren)) 808 | RPar = TF.next(); 809 | } 810 | 811 | auto Body = parseControlFlowBody(TF); 812 | 813 | If->addBranch(KW1, KW2, LPar, std::move(Cond), RPar, std::move(Body)); 814 | } 815 | Guard.dismiss(); 816 | return std::move(If); 817 | } 818 | 819 | if (checkKind(TF, tok::kw_for)) { 820 | auto S = make_unique(); 821 | 822 | S->setKeyword(TF.next()); 823 | if (!checkKind(TF, tok::l_paren)) 824 | return {}; 825 | S->setLeftParen(TF.next()); 826 | 827 | if (!checkKind(TF, tok::semi) && 828 | !(S->Init = parseCond(TF, /*ForLoopInit=*/true))) 829 | return {}; 830 | if (!checkKind(TF, tok::semi)) 831 | return {}; 832 | S->setSemi1(TF.next()); 833 | if (!checkKind(TF, tok::semi) && !(S->Cond = parseCond(TF))) 834 | return {}; 835 | if (!checkKind(TF, tok::semi)) 836 | return {}; 837 | S->setSemi2(TF.next()); 838 | if (!checkKind(TF, tok::r_paren) && !(S->Inc = parseExpr(TF))) 839 | return {}; 840 | 841 | if (checkKind(TF, tok::r_paren)) 842 | S->setRightParen(TF.next()); 843 | 844 | S->Body = parseControlFlowBody(TF); 845 | 846 | Guard.dismiss(); 847 | return std::move(S); 848 | } 849 | 850 | return {}; 851 | } 852 | 853 | static bool parseClassScope(TokenFilter &TF, ClassDecl &C) { 854 | if (!checkKind(TF, tok::l_brace)) 855 | return false; 856 | 857 | C.setLeftBrace(TF.next()); 858 | if (!parseScope(TF, C, true)) 859 | return false; 860 | 861 | if (checkKind(TF, tok::r_brace)) 862 | C.setRightBrace(TF.next()); 863 | 864 | if (checkKind(TF, tok::semi)) 865 | C.setSemi(TF.next()); 866 | // else: just pass 867 | 868 | return true; 869 | } 870 | 871 | static std::unique_ptr parseNamespaceDecl(TokenFilter &TF) { 872 | if (!checkKind(TF, tok::kw_namespace)) 873 | return {}; 874 | auto Guard = TF.guard(); 875 | 876 | AnnotatedToken *NSTok = TF.next(), *NameTok = nullptr; 877 | if (checkKind(TF, tok::identifier)) 878 | NameTok = TF.next(); 879 | 880 | if (!checkKind(TF, tok::l_brace)) 881 | return {}; 882 | 883 | auto NS = make_unique(); 884 | NS->setNamespace(NSTok); 885 | NS->setName(NameTok); 886 | NS->setLeftBrace(TF.next()); 887 | 888 | parseScope(TF, *NS); 889 | 890 | if (checkKind(TF, tok::r_brace)) 891 | NS->setRightBrace(TF.next()); 892 | 893 | Guard.dismiss(); 894 | return std::move(NS); 895 | } 896 | 897 | static std::unique_ptr parseClassDecl(TokenFilter &TF) { 898 | if (!(checkKind(TF, tok::kw_class) || checkKind(TF, tok::kw_struct) || 899 | checkKind(TF, tok::kw_union) || checkKind(TF, tok::kw_enum))) 900 | return {}; 901 | 902 | auto Guard = TF.guard(); 903 | 904 | auto C = make_unique(); 905 | C->setClass(TF.next()); 906 | 907 | if (!(C->Name = parseType(TF))) 908 | return {}; 909 | 910 | if (checkKind(TF, tok::colon)) { 911 | C->setColon(TF.next()); 912 | bool Skip = true; 913 | for (;;) { 914 | AnnotatedToken *Accessibility = nullptr; 915 | if (checkKind(TF, tok::kw_private) || checkKind(TF, tok::kw_protected) || 916 | checkKind(TF, tok::kw_public)) 917 | Accessibility = TF.next(); 918 | auto T = parseType(TF, false); 919 | if (!T) 920 | break; 921 | if (checkKind(TF, tok::l_brace)) { 922 | C->addBaseClass(Accessibility, std::move(T), nullptr); 923 | Skip = false; 924 | break; 925 | } 926 | if (!checkKind(TF, tok::comma)) 927 | break; 928 | C->addBaseClass(Accessibility, std::move(T), TF.next()); 929 | } 930 | if (Skip) { 931 | while (!checkKind(TF, tok::l_brace)) 932 | TF.next(); 933 | } 934 | } 935 | 936 | if (checkKind(TF, tok::semi)) 937 | C->setSemi(TF.next()); 938 | else 939 | parseClassScope(TF, *C); 940 | 941 | Guard.dismiss(); 942 | return C; 943 | } 944 | 945 | static std::unique_ptr 946 | parseTemplateParameterType(TokenFilter &TF) { 947 | if (!(checkKind(TF, tok::kw_typename) || checkKind(TF, tok::kw_class))) 948 | return {}; 949 | auto Guard = TF.guard(); 950 | 951 | auto TPT = make_unique(); 952 | TPT->setKeyword(TF.next()); 953 | if (!checkKind(TF, tok::identifier)) 954 | return {}; 955 | TPT->setName(TF.next()); 956 | 957 | if (checkKind(TF, tok::equal)) { 958 | TPT->setEqual(TF.next()); 959 | if (!(TPT->DefaultType = parseType(TF))) 960 | return {}; 961 | } 962 | 963 | Guard.dismiss(); 964 | return TPT; 965 | } 966 | static std::unique_ptr parseTemplateDecl(TokenFilter &TF) { 967 | if (!checkKind(TF, tok::kw_template)) 968 | return {}; 969 | 970 | auto Guard = TF.guard(); 971 | auto T = make_unique(); 972 | T->setKeyword(TF.next()); 973 | 974 | if (!checkKind(TF, tok::less)) 975 | return {}; 976 | T->setLess(TF.next()); 977 | 978 | while (!checkKind(TF, tok::greater)) { 979 | if (auto D = parseVarDecl(TF, /*TypeName=*/0, /*NameOptional*/ false, 980 | /*StopAtGreater=*/true)) 981 | T->addParam(std::move(D)); 982 | else if (auto TPT = parseTemplateParameterType(TF)) 983 | T->addParam(std::move(TPT)); 984 | else 985 | return {}; 986 | 987 | if (checkKind(TF, tok::comma)) 988 | T->addComma(TF.next()); 989 | else if (!checkKind(TF, tok::greater)) 990 | return {}; 991 | } 992 | 993 | assert(checkKind(TF, tok::greater)); 994 | T->setGreater(TF.next()); 995 | 996 | if (auto F = parseFunctionDecl(TF)) 997 | T->Templated = std::move(F); 998 | else if (auto C = parseClassDecl(TF)) 999 | T->Templated = std::move(C); 1000 | else 1001 | return {}; 1002 | 1003 | Guard.dismiss(); 1004 | return T; 1005 | } 1006 | 1007 | static std::unique_ptr parseAny(TokenFilter &TF, bool SkipUnparsable, 1008 | bool NameOptional) { 1009 | if (auto S = parseDeclStmt(TF)) 1010 | return S; 1011 | if (auto S = parseReturnStmt(TF)) 1012 | return S; 1013 | if (auto S = parseLabelStmt(TF)) 1014 | return S; 1015 | if (auto S = parseControlFlowStmt(TF)) 1016 | return S; 1017 | if (auto S = parseTemplateDecl(TF)) 1018 | return std::move(S); 1019 | if (auto S = parseFunctionDecl(TF, NameOptional)) { 1020 | if (checkKind(TF, tok::semi)) 1021 | S->setSemi(TF.next()); 1022 | else if (checkKind(TF, tok::l_brace)) { 1023 | S->Body = parseCompoundStmt(TF); 1024 | } 1025 | return std::move(S); 1026 | } 1027 | if (auto S = parseNamespaceDecl(TF)) 1028 | return S; 1029 | 1030 | if (auto S = parseClassDecl(TF)) { 1031 | if (checkKind(TF, tok::semi)) 1032 | S->setSemi(TF.next()); 1033 | else if (checkKind(TF, tok::l_brace)) { 1034 | parseClassScope(TF, *S); 1035 | } 1036 | return std::move(S); 1037 | } 1038 | { 1039 | auto Guard = TF.guard(); 1040 | if (auto E = parseExpr(TF)) { 1041 | if (checkKind(TF, tok::semi)) { 1042 | Guard.dismiss(); 1043 | return make_unique(std::move(E), TF.next()); 1044 | } 1045 | } 1046 | } 1047 | return SkipUnparsable ? skipUnparsable(TF) : std::unique_ptr(); 1048 | } 1049 | 1050 | TranslationUnit fuzzyparse(AnnotatedToken *first, AnnotatedToken *last) { 1051 | TranslationUnit TU; 1052 | { 1053 | BasicTokenFilter TF(first, last); 1054 | while (!TF.eof()) { 1055 | if (TF.peekKind() == tok::hash && TF.peek()->Tok().isAtStartOfLine()) 1056 | TU.addPPDirective(parsePPDirective(TF)); 1057 | TF.next(); 1058 | } 1059 | } 1060 | { 1061 | TokenFilter TF(first, last); 1062 | while (!TF.eof()) 1063 | TU.addStmt(parseAny(TF)); 1064 | } 1065 | return TU; 1066 | } 1067 | 1068 | } // end namespace fuzzy 1069 | } // end namespace clang 1070 | -------------------------------------------------------------------------------- /FuzzyType.h: -------------------------------------------------------------------------------- 1 | #error NOT YET NEEDED 2 | //===--- FuzzyType.h - clang-highlight --------------------------*- C++ -*-===// 3 | // 4 | // The LLVM Compiler Infrastructure 5 | // 6 | // This file is distributed under the University of Illinois Open Source 7 | // License. See LICENSE.TXT for details. 8 | // 9 | //===----------------------------------------------------------------------===// 10 | 11 | #ifndef LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_FUZZY_TYPE_H 12 | #define LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_FUZZY_TYPE_H 13 | 14 | #include "clang/Basic/SourceManager.h" 15 | #include "AnnotatedToken.h" 16 | #include 17 | 18 | using namespace clang; 19 | 20 | namespace clang { 21 | namespace fuzzy { 22 | 23 | struct Type { 24 | struct TypeAnnotation { 25 | enum AnnotationClass { 26 | Pointer, 27 | Reference, 28 | }; 29 | AnnotationClass Class; 30 | AnnotatedToken *Tok; 31 | }; 32 | llvm::SmallVector Annotations; 33 | AnnotatedToken *NameToken; 34 | }; 35 | 36 | } // end namespace fuzzy 37 | } // end namespace clang 38 | 39 | #endif // LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_FUZZY_TYPE_H 40 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ##===- clang-format/Makefile -------------------------------*- Makefile -*-===## 2 | # 3 | # The LLVM Compiler Infrastructure 4 | # 5 | # This file is distributed under the University of Illinois Open Source 6 | # License. See LICENSE.TXT for details. 7 | # 8 | ##===----------------------------------------------------------------------===## 9 | 10 | CLANG_LEVEL := ../.. 11 | 12 | TOOLNAME = clang-highlight 13 | 14 | # No plugins, optimize startup time. 15 | TOOL_NO_EXPORTS = 1 16 | 17 | include $(CLANG_LEVEL)/../../Makefile.config 18 | LINK_COMPONENTS := $(TARGETS_TO_BUILD) asmparser bitreader support mc option 19 | USEDLIBS = clangFormat.a clangTooling.a clangFrontend.a clangSerialization.a \ 20 | clangDriver.a clangParse.a clangSema.a clangAnalysis.a \ 21 | clangRewriteFrontend.a clangRewriteCore.a clangEdit.a clangAST.a \ 22 | clangLex.a clangBasic.a 23 | 24 | include $(CLANG_LEVEL)/Makefile 25 | -------------------------------------------------------------------------------- /OutputWriter.cpp: -------------------------------------------------------------------------------- 1 | //===--- OutputWriter.cpp - clang-highlight ---------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | /// 10 | /// \file OutputWriter.cpp 11 | /// \brief Converts the metadata into a given output format. 12 | /// 13 | //===----------------------------------------------------------------------===// 14 | 15 | #include "llvm/Support/ErrorHandling.h" 16 | #include "llvm/Support/raw_ostream.h" 17 | #include "OutputWriter.h" 18 | 19 | using namespace llvm; 20 | 21 | namespace clang { 22 | namespace highlight { 23 | 24 | OutputWriter::~OutputWriter() {} 25 | 26 | namespace { 27 | struct StdoutFormatInfo { 28 | StdoutFormatInfo(raw_ostream::Colors Color, bool Bold = false) 29 | : Color(Color), Bold(Bold) {} 30 | raw_ostream::Colors Color; 31 | bool Bold; 32 | }; 33 | } // end anonymous namespace 34 | 35 | static StdoutFormatInfo getFormatInfo(TokenClass Class) { 36 | switch (Class) { 37 | case TokenClass::Type: 38 | return { raw_ostream::GREEN }; 39 | case TokenClass::Keyword: 40 | return { raw_ostream::BLUE }; 41 | case TokenClass::Comment: 42 | return { raw_ostream::RED }; 43 | case TokenClass::Namespace: 44 | return { raw_ostream::GREEN }; 45 | case TokenClass::Preprocessor: 46 | return { raw_ostream::CYAN }; 47 | case TokenClass::String: 48 | case TokenClass::Char: 49 | return { raw_ostream::MAGENTA }; 50 | case TokenClass::Numeric: 51 | return { raw_ostream::BLUE, true }; 52 | case TokenClass::Function: 53 | return { raw_ostream::BLACK, true }; 54 | default: 55 | return { raw_ostream::BLACK }; 56 | } 57 | } 58 | 59 | static const char *getSpanStyle(TokenClass Class) { 60 | switch (Class) { 61 | case TokenClass::Namespace: 62 | case TokenClass::Type: 63 | return "color:green"; 64 | case TokenClass::Keyword: 65 | return "color:blue"; 66 | case TokenClass::Comment: 67 | return "color:darkred"; 68 | case TokenClass::Preprocessor: 69 | return "color:purple"; 70 | case TokenClass::String: 71 | return "color:red"; 72 | case TokenClass::Char: 73 | return "color:magenta"; 74 | case TokenClass::Numeric: 75 | return "color:DarkSlateGray"; 76 | case TokenClass::Function: 77 | return "color:black;font-style:italic"; 78 | default: 79 | return "color:black"; 80 | } 81 | } 82 | 83 | static const char *getClassName(TokenClass Class) { 84 | switch (Class) { 85 | case TokenClass::Namespace: 86 | return "namespace"; 87 | case TokenClass::Type: 88 | return "type"; 89 | case TokenClass::Keyword: 90 | return "keyword"; 91 | case TokenClass::Comment: 92 | return "comment"; 93 | case TokenClass::Preprocessor: 94 | return "preprocessor"; 95 | case TokenClass::String: 96 | return "string"; 97 | case TokenClass::Char: 98 | return "char"; 99 | case TokenClass::Function: 100 | return "function"; 101 | case TokenClass::Numeric: 102 | return "numeric"; 103 | case TokenClass::Variable: 104 | return "variable"; 105 | default: 106 | return "default"; 107 | } 108 | } 109 | 110 | namespace { 111 | class XmlEscaper { 112 | StringRef S; 113 | 114 | public: 115 | XmlEscaper(StringRef S) : S(S) {}; 116 | 117 | friend raw_ostream &operator<<(raw_ostream &OS, const XmlEscaper &HE) { 118 | for (char C : HE.S) 119 | switch (C) { 120 | case '&': 121 | OS << "&"; 122 | break; 123 | case '\'': 124 | OS << "'"; 125 | break; 126 | case '"': 127 | OS << """; 128 | break; 129 | case '<': 130 | OS << "<"; 131 | break; 132 | case '>': 133 | OS << ">"; 134 | break; 135 | default: 136 | OS << C; 137 | break; 138 | } 139 | return OS; 140 | } 141 | }; 142 | } // end anonymous namespace 143 | 144 | XmlEscaper xmlEscaped(StringRef S) { return XmlEscaper(S); } 145 | 146 | namespace { 147 | class ColorStreamWriter : public OutputWriter { 148 | raw_ostream &OS; 149 | 150 | public: 151 | ColorStreamWriter(raw_ostream &OS) : OS(OS) { 152 | OS.changeColor(raw_ostream::BLACK); 153 | } 154 | ~ColorStreamWriter() { OS.changeColor(raw_ostream::BLACK); } 155 | 156 | void writeToken(StringRef Text, TokenClass Class) override { 157 | StdoutFormatInfo Style = getFormatInfo(Class); 158 | OS.changeColor(Style.Color, Style.Bold); 159 | OS << Text; 160 | } 161 | }; 162 | } // end anonymous namespace 163 | 164 | namespace { 165 | class HtmlWriter : public OutputWriter { 166 | raw_ostream &OS; 167 | 168 | public: 169 | HtmlWriter(raw_ostream &OS) : OS(OS) { 170 | OS << "

"; 171 | } 172 | ~HtmlWriter() { OS << "

"; } 173 | 174 | void writeToken(StringRef Text, TokenClass Class) override { 175 | OS << R"()" 176 | << xmlEscaped(Text) << ""; 177 | } 178 | }; 179 | } // end anonymous namespace 180 | 181 | namespace { 182 | class SemanticHtmlWriter : public OutputWriter { 183 | raw_ostream &OS; 184 | 185 | public: 186 | SemanticHtmlWriter(raw_ostream &OS) : OS(OS) { 187 | OS << R"( 200 |

)"; 201 | } 202 | ~SemanticHtmlWriter() { OS << "

"; } 203 | 204 | void writeToken(StringRef Text, TokenClass Class) override { 205 | OS << R"()" 206 | << xmlEscaped(Text) << ""; 207 | } 208 | }; 209 | } // end anonymous namespace 210 | 211 | namespace { 212 | class LaTeXEscaper { 213 | StringRef S; 214 | 215 | public: 216 | LaTeXEscaper(StringRef S) : S(S) {}; 217 | 218 | friend raw_ostream &operator<<(raw_ostream &OS, const LaTeXEscaper &HE) { 219 | for (char C : HE.S) 220 | switch (C) { 221 | case '{': 222 | case '}': 223 | case '_': 224 | case '&': 225 | case '#': 226 | case '%': 227 | case '$': 228 | OS << "{\\" << C << "}"; 229 | break; 230 | case '^': 231 | OS << "{\\^{}}"; 232 | break; 233 | case '\\': 234 | OS << "{\\textbackslash}"; 235 | break; 236 | case '<': 237 | OS << "{\\textless}"; 238 | break; 239 | case '>': 240 | OS << "{\\textgreater}"; 241 | break; 242 | case '~': 243 | OS << "{\\textasciitilde}"; 244 | break; 245 | default: 246 | OS << C; 247 | } 248 | return OS; 249 | } 250 | }; 251 | } // end anonymous namespace 252 | 253 | LaTeXEscaper latexEscaped(StringRef S) { return LaTeXEscaper(S); } 254 | 255 | namespace { 256 | class LaTeXWriter : public OutputWriter { 257 | raw_ostream &OS; 258 | 259 | public: 260 | LaTeXWriter(raw_ostream &OS) : OS(OS) {} 261 | ~LaTeXWriter() {} 262 | 263 | void writeToken(StringRef Text, TokenClass Class) override { 264 | if (Class == TokenClass::Whitespace) 265 | OS << latexEscaped(Text); 266 | else 267 | OS << "\\clangHighlightToken{" << getClassName(Class) << "}{" 268 | << latexEscaped(Text) << "}"; 269 | } 270 | }; 271 | } // end anonymous namespace 272 | 273 | std::unique_ptr makeOutputWriter(OutputFormat Format, 274 | raw_ostream &OS) { 275 | switch (Format) { 276 | case OutputFormat::StdoutColored: 277 | return std::unique_ptr(new ColorStreamWriter(OS)); 278 | case OutputFormat::HTML: 279 | return std::unique_ptr(new HtmlWriter(OS)); 280 | case OutputFormat::SemanticHTML: 281 | return std::unique_ptr(new SemanticHtmlWriter(OS)); 282 | case OutputFormat::LaTeX: 283 | return std::unique_ptr(new LaTeXWriter(OS)); 284 | default: 285 | llvm_unreachable("invalid flag"); 286 | } 287 | } 288 | 289 | } // end namespace highlight 290 | } // end namespace clang 291 | -------------------------------------------------------------------------------- /OutputWriter.h: -------------------------------------------------------------------------------- 1 | //===--- OutputWriter.h - clang-highlight -----------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #ifndef LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_OUTPUT_WRITER_H 11 | #define LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_OUTPUT_WRITER_H 12 | 13 | #include "llvm/ADT/StringRef.h" 14 | #include 15 | 16 | namespace clang { 17 | namespace highlight { 18 | 19 | enum class OutputFormat { 20 | StdoutColored, 21 | HTML, 22 | SemanticHTML, 23 | LaTeX, 24 | // TODO: XML, LaTeX, SemanticLaTeX, ... 25 | }; 26 | 27 | enum class TokenClass { 28 | NONE, 29 | Type, 30 | Variable, 31 | Function, 32 | Namespace, 33 | Keyword, 34 | Comment, 35 | Preprocessor, 36 | String, 37 | Char, 38 | Numeric, 39 | Other, 40 | Whitespace, 41 | }; 42 | 43 | class OutputWriter { 44 | public: 45 | virtual void writeToken(llvm::StringRef Text, TokenClass Class) = 0; 46 | virtual ~OutputWriter(); 47 | }; 48 | 49 | // \brief Creates a output writer that writes in the specified Format to stdout 50 | std::unique_ptr makeOutputWriter(OutputFormat Format, 51 | llvm::raw_ostream &OS); 52 | 53 | } // end namespace highlight 54 | } // end namespace clang 55 | 56 | #endif // LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_OUTPUT_WRITER_H 57 | -------------------------------------------------------------------------------- /TokenClassifier.cpp: -------------------------------------------------------------------------------- 1 | //===--- TokenClassifier.cpp - clang-highlight ------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | #include "llvm/Support/Debug.h" 10 | #include "clang/Lex/Lexer.h" 11 | #include "clang/Basic/IdentifierTable.h" 12 | #include "clang/Basic/Diagnostic.h" 13 | #include "clang/Basic/FileManager.h" 14 | #include "clang/Basic/LangOptions.h" 15 | #include "clang/Basic/SourceManager.h" 16 | #include "llvm/Config/config.h" 17 | #include "OutputWriter.h" 18 | #include 19 | #include "TokenClassifier.h" 20 | #include "Fuzzy/FuzzyAST.h" 21 | 22 | using namespace clang; 23 | 24 | namespace clang { 25 | namespace highlight { 26 | 27 | LangOptions getFormattingLangOpts(bool Cpp03 = false) { 28 | LangOptions LangOpts; 29 | LangOpts.CPlusPlus = 1; 30 | LangOpts.CPlusPlus11 = Cpp03 ? 0 : 1; 31 | LangOpts.CPlusPlus14 = Cpp03 ? 0 : 1; 32 | LangOpts.LineComment = 1; 33 | LangOpts.Bool = 1; 34 | LangOpts.ObjC1 = 1; 35 | LangOpts.ObjC2 = 1; 36 | return LangOpts; 37 | } 38 | 39 | bool isCharLiteral(tok::TokenKind TK) { 40 | switch (TK) { 41 | case tok::char_constant: 42 | case tok::wide_char_constant: 43 | case tok::utf16_char_constant: 44 | case tok::utf32_char_constant: 45 | return true; 46 | default: 47 | return false; 48 | } 49 | } 50 | 51 | bool isKeyword(tok::TokenKind TK) { 52 | switch (TK) { 53 | #define KEYWORD(X, Y) case tok::kw_##X: 54 | #include "clang/Basic/TokenKinds.def" 55 | return true; 56 | default: 57 | return false; 58 | } 59 | } 60 | 61 | TokenClass convertTokenKindToTokenClass(tok::TokenKind TK) { 62 | if (isCharLiteral(TK)) 63 | return TokenClass::Char; 64 | if (isStringLiteral(TK)) 65 | return TokenClass::String; 66 | if (TK == tok::numeric_constant) 67 | return TokenClass::Numeric; 68 | if (isKeyword(TK)) 69 | return TokenClass::Keyword; 70 | if (TK == tok::annot_typename) 71 | return TokenClass::Type; 72 | if (TK == tok::comment) 73 | return TokenClass::Comment; 74 | if (TK == tok::unknown || TK == tok::eod) 75 | return TokenClass::Whitespace; 76 | return TokenClass::Other; 77 | } 78 | 79 | void highlight(std::unique_ptr Source, StringRef FileName, 80 | std::unique_ptr OW, bool IdentifiersOnly, 81 | bool DumpAST) { 82 | using namespace llvm; 83 | using namespace clang; 84 | 85 | FileManager Files((FileSystemOptions())); 86 | DiagnosticsEngine Diagnostics( 87 | IntrusiveRefCntPtr(new DiagnosticIDs), 88 | new DiagnosticOptions); 89 | SourceManager SourceMgr(Diagnostics, Files); 90 | llvm::MemoryBuffer *Buf = Source.release(); // SourceMgr owns Buf for us 91 | const clang::FileEntry *Entry = 92 | Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 93 | SourceMgr.overrideFileContents(Entry, Buf, false); 94 | FileID ID = 95 | SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 96 | 97 | auto Langs = getFormattingLangOpts(); 98 | Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, Langs); 99 | Lex.SetKeepWhitespaceMode(true); 100 | 101 | IdentifierTable IdentTable(getFormattingLangOpts()); 102 | 103 | std::vector AllTokens; 104 | 105 | for (;;) { 106 | Token TmpTok; 107 | Lex.LexFromRawLexer(TmpTok); 108 | 109 | if (TmpTok.getKind() == tok::hash && TmpTok.isAtStartOfLine()) 110 | Lex.setParsingPreprocessorDirective(true); 111 | if (TmpTok.getKind() == tok::eod) 112 | Lex.setParsingPreprocessorDirective(false); 113 | 114 | AllTokens.push_back(fuzzy::AnnotatedToken(TmpTok)); 115 | Token &ThisTok = AllTokens.back().Tok(); 116 | 117 | StringRef TokenText(SourceMgr.getCharacterData(ThisTok.getLocation()), 118 | ThisTok.getLength()); 119 | 120 | if (ThisTok.is(tok::raw_identifier)) { 121 | IdentifierInfo &Info = IdentTable.get(TokenText); 122 | ThisTok.setIdentifierInfo(&Info); 123 | ThisTok.setKind(Info.getTokenID()); 124 | } 125 | 126 | if (ThisTok.is(tok::eof)) 127 | break; 128 | } 129 | 130 | auto TU = fuzzy::fuzzyparse(&*AllTokens.begin(), &*AllTokens.end()); 131 | 132 | if (DumpAST) { 133 | fuzzy::printAST(llvm::dbgs(), TU, SourceMgr); 134 | return; 135 | } 136 | 137 | const char *LastTokenStart = nullptr, *ThisTokenStart = nullptr; 138 | Token LastTok; 139 | TokenClass Class = TokenClass::NONE; 140 | for (auto &ATok : AllTokens) { 141 | Token &ThisTok = ATok.Tok(); 142 | 143 | ThisTokenStart = SourceMgr.getCharacterData(ThisTok.getLocation()); 144 | if (LastTokenStart) { 145 | if (Class == TokenClass::NONE || LastTok.getKind() == tok::eod) 146 | Class = convertTokenKindToTokenClass(LastTok.getKind()); 147 | OW->writeToken(StringRef(LastTokenStart, ThisTokenStart - LastTokenStart), 148 | Class); 149 | } 150 | 151 | Class = TokenClass::NONE; 152 | 153 | StringRef TokenText(SourceMgr.getCharacterData(ThisTok.getLocation()), 154 | ThisTok.getLength()); 155 | 156 | if (ATok.hasASTReference()) { 157 | auto *R = ATok.getASTReference(); 158 | if (llvm::isa(R) && 159 | ATok.getTokenKind() == tok::identifier) { 160 | Class = TokenClass::Namespace; 161 | } 162 | auto isType = [&] { 163 | return llvm::isa(R) || 164 | llvm::isa(R); 165 | }; 166 | auto isTypeDecl = [&] { 167 | return ATok.getTokenKind() == tok::identifier && 168 | (llvm::isa(R) || 169 | llvm::isa(R)); 170 | }; 171 | if ((!IdentifiersOnly || ATok.getTokenKind() == tok::identifier) && 172 | (isType() || isTypeDecl())) { 173 | Class = TokenClass::Type; 174 | ThisTok.setKind(tok::annot_typename); 175 | } 176 | if (isa(R)) { 177 | Class = TokenClass::String; 178 | } 179 | if (isa(R)) { 180 | Class = TokenClass::Preprocessor; 181 | } 182 | if (isa(R)) { 183 | Class = TokenClass::Variable; 184 | } 185 | if (ATok.getTokenKind() == tok::identifier && 186 | (isa(R) || isa(R))) { 187 | Class = TokenClass::Function; 188 | } 189 | } 190 | LastTok = ThisTok; 191 | LastTokenStart = ThisTokenStart; 192 | } 193 | } 194 | 195 | } // end namespace highlight 196 | } // end namespace clang 197 | -------------------------------------------------------------------------------- /TokenClassifier.h: -------------------------------------------------------------------------------- 1 | //===--- TokenClassifier.h - clang-highlight --------------------*- C++ -*-===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #ifndef LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_TOKEN_CLASSIFIER_H 11 | #define LLVM_CLANG_TOOLS_CLANG_HIGHLIGHT_TOKEN_CLASSIFIER_H 12 | 13 | #include "llvm/ADT/StringRef.h" 14 | #include 15 | 16 | namespace llvm { 17 | class MemoryBuffer; 18 | } 19 | 20 | namespace clang { 21 | namespace highlight { 22 | 23 | class OutputWriter; 24 | 25 | void highlight(std::unique_ptr Source, 26 | llvm::StringRef FileName, std::unique_ptr OW, 27 | bool IdentifiersOnly = false, bool DumpAST = false); 28 | 29 | } // end namespace highlight 30 | } // end namespace clang 31 | 32 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_CLANG_TIDY_H 33 | -------------------------------------------------------------------------------- /docs/LibFuzzy.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | LibFuzzy 3 | ======== 4 | 5 | LibFuzzy is a library for heuristically parsing C++ based on Clang's Lexer. 6 | The fuzzy parser is fault-tolerant, works without knowledge of the build system 7 | and on incomplete source files. As the parser necessarily makes guesses, the 8 | resulting syntax tree may be partially wrong. 9 | 10 | This documents describes the LibFuzzy design and interface. 11 | 12 | When to use LibFuzzy 13 | -------------------- 14 | 15 | Use LibFuzzy when you ...: 16 | 17 | * need fault-tolerant AST information 18 | * need classification of tokens, but not more 19 | * don't want setup overhead for your tool 20 | * want fast results from a small input 21 | 22 | Do not use LibFuzzy when you ...: 23 | 24 | * need 100% accuracy 25 | * need the context informations that a full Clang AST provides 26 | 27 | Look at the different options for 28 | `Tooling http://clang.llvm.org/docs/Tooling.html` if you are interested in 29 | non-fuzzy approaches. 30 | 31 | The Fuzzy AST 32 | ------------- 33 | 34 | The fuzzy AST is defined in ``Fuzzy/FuzzyAST.h``. It is designed to be as 35 | similar as possible to the 36 | `Clang AST http://clang.llvm.org/docs/IntroductionToTheClangAST.html`, but 37 | differs because of some design decisions: 38 | 39 | * Each AST node contains references to all tokens that belong to it. This 40 | implies that by visiting all nodes of the AST of a particular source code, you 41 | find all the tokens lexed from that code. 42 | 43 | This has led to some hierarchy changes. E.g. ``Expr`` isn't derived from 44 | ``Stmt`` because as a statement ``Expr`` needs a trailing semicolon, but 45 | otherwise it doesn't. Therefore ``ExprLineStmt`` exists to make an ``Expr`` 46 | into a ``Stmt`` and keep track of the semicolon. 47 | 48 | * After parsing, each token of the input stream has a reference to the AST node 49 | that contains it. 50 | 51 | That's why a common base class for all AST nodes exists: ``ASTElement``. The 52 | Clang AST doesn't have that. 53 | 54 | * The fuzzy parser doesn't go much deeper than classification of tokens. 55 | 56 | There's no canonicalization of qualified identifiers. Types don't contain a 57 | reference to the type definition and can't be compared. 58 | 59 | How to use the Fuzzy AST 60 | ------------------------ 61 | 62 | The main to call the fuzzy parser is ``fuzzyparse`` which takes a range of 63 | AnnotateToken as input. 64 | 65 | .. code-block:: c++ 66 | 67 | TranslationUnit fuzzyparse(AnnotatedToken *first, AnnotatedToken *last); 68 | 69 | ``AnnotatedToken`` is a Clang Lexer token combined with a reference where 70 | in the fuzzy AST it is located. 71 | 72 | .. code-block:: c++ 73 | 74 | class AnnotatedToken { 75 | clang::Token Tok_; 76 | ASTElement *Annot; 77 | ... 78 | }; 79 | 80 | The Clang Tokens can be obtained by the Clang Lexer in raw mode. The source 81 | code of :program:`clang-highlight` contains sample usage. 82 | 83 | Current state 84 | ------------- 85 | 86 | The fuzzy parser can be tested with :program:`clang-highlight` and the 87 | ``-dump-ast`` option. 88 | 89 | .. code-block:: bash 90 | 91 | $ cat sample01.cpp 92 | if () { 93 | f(1+1); 94 | } 95 | $ clang-highlight -dump-ast sample01.cpp 96 | If 97 | Condition 98 | Unparsable Block: 99 | < 100 | unparsable 101 | > 102 | Body: 103 | CompoundStmt: 104 | ExprLineStmt 105 | call expr 'f' 106 | 1 107 | plus 108 | 1 109 | 110 | The parser recognizes the if statement but is unable to parse the condition. 111 | Every unparsable range of source code is put into a ``UnparsableBlock`` which 112 | itself is a subclass of ``ASTElement``. The fuzzy parser is successfully able 113 | to recover from this error. 114 | 115 | C++ does not have a context free grammar. If in doubt, a fuzzy parser has to 116 | make guesses which may or may not be right. 117 | 118 | .. code-block:: bash 119 | 120 | $ cat sample02.cpp 121 | auto ps = std::make_unique(); 122 | std::array a; 123 | const int SIZE=5; 124 | std::array b; 125 | $ clang-highlight -dump-ast sample02.cpp 126 | DeclStmt 127 | VarDecl 'ps' 128 | Type 'auto' 129 | Assignment Type '=' 130 | call expr 'std::make_unique 131 | < 132 | Type 'std::string' 133 | >' 134 | DeclStmt 135 | VarDecl 'a' 136 | Type 'std::array 137 | < 138 | Type 'int' 139 | 5 140 | >' 141 | DeclStmt 142 | VarDecl 'SIZE' 143 | Type 'constint' 144 | Assignment Type '=' 145 | 5 146 | DeclStmt 147 | VarDecl 'b' 148 | Type 'std::array 149 | < 150 | Type 'int' 151 | Type 'SIZE' 152 | >' 153 | 154 | There are a number of guesses that need to be made in this code. Most 155 | importantly: 156 | 157 | * Is ``std::make_unique`` a function or a type? 158 | * Is ``std::string`` a constant or a type? 159 | * Is ``SIZE`` a constant or a type? 160 | 161 | The first two questions cannot be decided without further context. The current 162 | strategy is simple: If something looks like a function call, then it's a 163 | function and not a constructor. If a template argument is either a type or a 164 | constant, then it's a type. 165 | 166 | This strategy may be wrong. Give that ``SIZE`` is declared inside this code 167 | snippet, it's very certain to assume that ``SIZE`` is a constant. However, the 168 | fuzzy parser currently does not include context information from the part he 169 | already has parsed. 170 | 171 | .. code-block:: bash 172 | 173 | $ cat sample03.cpp 174 | #if __cplusplus <= 199711L // C++03 or older 175 | std::tr1::auto_ptr p; 176 | #else // C++11 177 | std::unique_ptr p; 178 | #endif 179 | $ clang-highlight -dump-ast sample03.cpp 180 | Preprocessor 'if': 181 | DeclRefExpr '__cplusplus' 182 | lessequal 183 | 199711L 184 | Preprocessor 'else': 185 | Preprocessor 'endif': 186 | DeclStmt 187 | VarDecl 'p' 188 | Type 'std::tr1::auto_ptr 189 | < 190 | Type 'int' 191 | >' 192 | DeclStmt 193 | VarDecl 'p' 194 | Type 'std::unique_ptr 195 | < 196 | Type 'int' 197 | >' 198 | 199 | This illustrates why the Clang Parser isn't easily usable for highlighting even 200 | if the code is perfectly fine. There is no good solution to parse all 201 | preprocessor branches. If a program depends, say, on 10 macros (``__linux__``, 202 | ``__cplusplus``, ``sizeof int``, etc.) then there are 2^10=1024 compilation 203 | passes needed to get all possible results -- which may even lead to different 204 | ASTs in the same places. If a compiler ignores the conditions the code may 205 | contain syntax errors. The easiest solution would be to make only one pass and 206 | gray the unused code paths out. 207 | 208 | The fuzzy parser parses all preprocessor statements in one pass and the code 209 | without them in another. Because its fuzziness, this should go reasonably well. 210 | 211 | What next 212 | --------- 213 | 214 | * Add all syntax elements of C++: Currently, only the most used subset of C++ 215 | is implemented. 216 | 217 | * Improve the fuzziness. Add more sophisticated algorithms to handle unbalanced 218 | parentheses for example. 219 | 220 | * Use context information. The parser could make use of a symbol table based on 221 | the code it has seen already. 222 | 223 | * Language support for C and Objective C: Even though these languages share a 224 | lot of their syntax with C++, they have subtle differences. It shouldn't be 225 | hard to add those to the parser. 226 | 227 | * Optimize for speed: Add a memory manager for the AST and improve the parser. 228 | There hasn't been much focus on speed yet. 229 | 230 | * Conversion between Clang's AST and the fuzzy AST. It there is a way to 231 | produce a Clang AST, why not make use of it for tools that use the Fuzzy AST? 232 | -------------------------------------------------------------------------------- /docs/clang-highlight.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Clang-Highlight 3 | =============== 4 | 5 | :program:`clang-highlight` is a syntax highlighting tool for C++ based on a 6 | :doc:`LibFuzzy` framework. 7 | 8 | Using clang-highlight 9 | ===================== 10 | 11 | :program:`clang-highlight` accepts exactly one argument, the file you want to 12 | highlight. 13 | 14 | .. code-block:: bash 15 | 16 | $ clang-highlight test.cpp 17 | 18 | Or if you want to see it through a pager: 19 | 20 | .. code-block:: bash 21 | 22 | $ clang-highlight test.cpp | less -R 23 | 24 | Pass ``-`` as file name if you want to highlight the standard input. 25 | 26 | Output formats 27 | -------------- 28 | 29 | .. option:: -stdout 30 | 31 | The default output format. Uses console colors. 32 | 33 | .. option:: -html 34 | 35 | Writes HTML as output with hardcoded colors. The individual tokens have the 36 | form ``int``. 37 | 38 | .. option:: -shtml 39 | 40 | Writes semantic HTML with CSS selectors. The individual tokens have the 41 | form ``int``. The class can be specified in 42 | a separate style sheet by the user. 43 | 44 | .. option:: -latex 45 | 46 | Writes semantic LaTeX for use with the package that is bundled with 47 | clang-highlight. See below. 48 | 49 | Further options 50 | --------------- 51 | 52 | .. option:: -identifiers-only 53 | 54 | Per default, the star ``*`` in ``type *i;`` is classified as part of the type 55 | name as is ``<`` and ``>`` in ``unique_ptr``. To disable this feature, 56 | use the ``-identifiers-only`` option. 57 | 58 | .. option:: -dump-ast 59 | 60 | Only included for testing the fuzzy parser, will be removed later. 61 | 62 | .. option:: -o 63 | 64 | Output to a file instead of standard output. 65 | 66 | The LaTeX Package ``clanghighlight`` 67 | ------------------------------------ 68 | 69 | :program:`clang-highlight` can be used as a highlighter for LaTeX code. The 70 | file ``clanghighlight.sty`` that is included in this repository provides a 71 | package for easy usage. Just put it in the same directory as the ``.tex`` file 72 | you are writing. 73 | 74 | .. code-block:: latex 75 | 76 | \usepackage{clanghighlight} % put this into the preamble 77 | 78 | % You might need to specify the full path to clang-highlight 79 | % \clanghighlightCmd{/path/to/clang-highlight} 80 | 81 | % in the document: 82 | \begin{cxx} 83 | // your code goes here 84 | \end{cxx} 85 | 86 | \begin{cxx}[numbers=left] % the options are directly passed to fancyvrb 87 | // your code goes here 88 | \end{cxx} 89 | 90 | \inputcxx{file.cpp} % use code from a file 91 | 92 | This package is only in beta status and some more functionality might be added 93 | soon. 94 | 95 | Comparison to other highlighters 96 | -------------------------------- 97 | 98 | Other highlighters exist, but mostly use regular expressions and are therefore 99 | limited by design. See :doc:`LibFuzzy` for how :program:`clang-highlight` 100 | parses C++. 101 | 102 | * `Pygments http://pygments.org/`: "Generic syntax highlighter for general use" 103 | written in Python. Lexers are python classes. The current C++ lexer uses 104 | regular expressions and only highlights preprocessor and keywords. 105 | 106 | * `GNU Source-Highlight http://www.gnu.org/software/src-highlite/`: Generic 107 | highlighter available for many languages. Types, keywords, functions etc. can 108 | be defined by a regular expression in a configuration file. In C++, only 109 | keywords, symbols and functions (without templates) are highlighted. In 110 | particular, there is no code to highlight other types than the builtin ones. 111 | 112 | * Highlighter from Editors (:program:`emacs`, :program:`vim`, etc.): Mostly 113 | regex-based. Tightly coupled into the editor, not intended for use on the 114 | command line. 115 | -------------------------------------------------------------------------------- /latex/clanghighlight.sty: -------------------------------------------------------------------------------- 1 | \NeedsTeXFormat{LaTeX2e} 2 | \ProvidesPackage{clanghighlight}[2014/07/30 v0.1 clang-highlight package for LaTeX.] 3 | \usepackage{fancyvrb} 4 | \usepackage{xcolor} 5 | \usepackage{ifplatform} 6 | 7 | \ifwindows 8 | \providecommand\DeleteFile[1]{\immediate\write18{del #1}} 9 | \else 10 | \providecommand\DeleteFile[1]{\immediate\write18{rm #1}} 11 | \fi 12 | 13 | \newcommand\ch@style@namespace{\color{teal}} 14 | \newcommand\ch@style@type{\color[HTML]{228B22}} 15 | \newcommand\ch@style@keyword{\color{violet}} 16 | \newcommand\ch@style@comment{\color[HTML]{800000}\itshape} 17 | \newcommand\ch@style@preprocessor{\color[HTML]{483D8B}} 18 | \newcommand\ch@style@string{\color[HTML]{DE2E2E}} 19 | \newcommand\ch@style@char{\color{purple}} 20 | \newcommand\ch@style@function{\color[HTML]{000080}} 21 | \newcommand\ch@style@numeric{\color[HTML]{707070}} 22 | \newcommand\ch@style@variable{\color{black}} 23 | \newcommand\ch@style@default{\color{black}} 24 | 25 | \newcommand\clangHighlightToken[2]{{\expandafter\csname ch@style@#1\endcsname{}#2}} 26 | 27 | \providecommand\ch@clanghighlight[1]{clang-highlight #1} 28 | \newcommand\clanghighlightCmd[1]{\renewcommand\ch@clanghighlight[1]{#1 ##1}} 29 | \clanghighlightCmd{clang-highlight} 30 | 31 | \def\ch@fvopts{} 32 | \newcommand\cxxset[1]{\def\ch@fvopts{#1}} 33 | 34 | \begingroup 35 | \catcode`\^^M\active% 36 | \global\def\activeeol{^^M}% 37 | \endgroup 38 | 39 | \def\cxx@[#1]{\def\ch@fvoptsarg{#1} 40 | \VerbatimEnvironment\begin{VerbatimOut}[codes={\catcode`\^^I=12}]{\jobname.cc}} 41 | \def\cxx@noargs#1{\edef\temp{[]\activeeol\string#1}\expandafter\cxx@\temp} 42 | 43 | \newenvironment{cxx}% 44 | {\@ifnextchar[\cxx@\cxx@noargs}% 45 | {\end{VerbatimOut}% 46 | \inputcxx[\ch@fvoptsarg]{\jobname.cc}% 47 | \DeleteFile{\jobname.cc}% 48 | } 49 | 50 | \newcommand\inputcxx[2][]{% 51 | \protected@xdef\ch@cmd{\ch@clanghighlight{-latex #2 -o \jobname.ch}} 52 | \IfFileExists{\jobname.ch}{\DeleteFile{\jobname.ch}}{} 53 | \immediate\write18{\ch@cmd} 54 | \IfFileExists{\jobname.ch}{% 55 | \edef\ch@fvoptsall{\ch@fvopts,#1}% 56 | \expandafter\VerbatimInput\expandafter[\ch@fvoptsall,commandchars=\\\{\}]{\jobname.ch}% 57 | \DeleteFile{\jobname.ch}}% 58 | {\PackageError{clanghighlight}{Error executing `\ch@cmd'.}{Make sure% 59 | clang-highlight is properly installed or doesn't crash with the given input.}% 60 | }% 61 | } 62 | 63 | \AtEndOfPackage{ 64 | \ifnum\pdf@shellescape=1\relax\else 65 | \PackageError{clanghighlight} 66 | {You must invoke LaTeX with the -shell-escape flag} 67 | {Pass the -shell-escape flag to LaTeX.}\fi 68 | } 69 | -------------------------------------------------------------------------------- /latex/fuzzyparser.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kapf/clang-highlight/39d17ca5c774646a2dbcdc0a7dfe719a260bc2c7/latex/fuzzyparser.pdf -------------------------------------------------------------------------------- /latex/fuzzyparser.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \usepackage[T1]{fontenc} 3 | \usepackage{upquote} 4 | \usepackage{minted} 5 | \usepackage{listings} 6 | \usepackage{clanghighlight} 7 | 8 | % Note: You might need change the path to clang-highlight 9 | % \clanghighlightCmd{/path/to/clang-highlight} 10 | 11 | \newcommand\pkg[1]{\textsf{#1}} 12 | 13 | \begin{document} 14 | 15 | \inputcxx[numbers=left]{../Fuzzy/FuzzyParser.cpp} 16 | 17 | \end{document} 18 | -------------------------------------------------------------------------------- /latex/sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kapf/clang-highlight/39d17ca5c774646a2dbcdc0a7dfe719a260bc2c7/latex/sample.pdf -------------------------------------------------------------------------------- /latex/sample.tex: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \usepackage[T1]{fontenc} 3 | \usepackage{upquote} 4 | \usepackage{minted} 5 | \usepackage{listings} 6 | \usepackage{clanghighlight} 7 | 8 | % Note: You might need change the path to clang-highlight 9 | % \clanghighlightCmd{/path/to/clang-highlight} 10 | 11 | \newcommand\pkg[1]{\textsf{#1}} 12 | 13 | \begin{document} 14 | 15 | Sample code highlighted by the command line tool \verb|clang-highlight| and the 16 | \LaTeX\ package \pkg{clanghighlight}: 17 | \begin{cxx}[numbers=left] 18 | #include 19 | template T make(); 20 | int main() /* block */ { // comment 21 | const T& x = make("string", 'c'); 22 | } 23 | \end{cxx} 24 | % instead of inline, one can use \inputcxx[]{file.cpp} 25 | % Also, the options are optional. \begin{cxx} works as does \inputcxx{file} 26 | 27 | Same code using \textsf{Pygments} and the package \pkg{minted}: 28 | \begin{minted}[linenos=true]{c++} 29 | #include 30 | template T make(); 31 | int main() /* block */ { // comment 32 | const T& x = make("string", 'c'); 33 | } 34 | \end{minted} 35 | Note that \pkg{minted} has some limitations, because \textsf{Pygments} doesn't 36 | output semantic \LaTeX. The single quote isn't straight (can be fixed through 37 | a hack though) and the colors schemes can't be modified from within \LaTeX. 38 | 39 | And with \pkg{listings}, the pure \LaTeX\ solution: 40 | 41 | \begin{lstlisting}[language=c++,numbers=left] 42 | #include 43 | template T make(); 44 | int main() /* block */ { // comment 45 | const T& x = make("string", 'c'); 46 | } 47 | \end{lstlisting} 48 | 49 | \end{document} 50 | -------------------------------------------------------------------------------- /unittests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(LLVM_LINK_COMPONENTS 2 | Support 3 | ) 4 | 5 | include_directories("..") 6 | 7 | add_highlight_unittest(FuzzyParseTests 8 | FuzzyParseTest.cpp 9 | ) 10 | 11 | target_link_libraries(FuzzyParseTests 12 | clangAST 13 | clangTooling 14 | clangFuzzy 15 | ) 16 | -------------------------------------------------------------------------------- /unittests/FuzzyParseTest.cpp: -------------------------------------------------------------------------------- 1 | //===- unittests/FuzzyParseTest.cpp - fuzzy parsing unit tests ------------===// 2 | // 3 | // The LLVM Compiler Infrastructure 4 | // 5 | // This file is distributed under the University of Illinois Open Source 6 | // License. See LICENSE.TXT for details. 7 | // 8 | //===----------------------------------------------------------------------===// 9 | 10 | #include "llvm/Support/Debug.h" 11 | #include "clang/Lex/Lexer.h" 12 | #include "clang/Basic/IdentifierTable.h" 13 | #include "clang/Basic/Diagnostic.h" 14 | #include "clang/Basic/FileManager.h" 15 | #include "clang/Basic/LangOptions.h" 16 | #include "clang/Basic/SourceManager.h" 17 | #include "llvm/Config/config.h" 18 | #include "gtest/gtest.h" 19 | #include "Fuzzy/FuzzyAST.h" 20 | 21 | #define DEBUG_TYPE "highlight-test" 22 | 23 | using namespace llvm; 24 | 25 | namespace clang { 26 | namespace fuzzy { 27 | 28 | class ClassOfTester { 29 | bool (*FunPtr)(const ASTElement *); 30 | 31 | public: 32 | ClassOfTester(bool (*FunPtr)(const ASTElement *)) : FunPtr(FunPtr) {} 33 | bool verify(const ASTElement *AE) { return FunPtr(AE); } 34 | }; 35 | template ClassOfTester makeClassOfTester() { 36 | return ClassOfTester(&T::classof); 37 | } 38 | 39 | template SmallVector checkTypeSeq() { 40 | ClassOfTester Seq[] = { makeClassOfTester()... }; 41 | SmallVector Ret(Seq, Seq + sizeof...(T)); 42 | return Ret; 43 | } 44 | 45 | LangOptions getFormattingLangOpts(bool Cpp03 = false) { 46 | LangOptions LangOpts; 47 | LangOpts.CPlusPlus = 1; 48 | LangOpts.CPlusPlus11 = Cpp03 ? 0 : 1; 49 | LangOpts.CPlusPlus1y = Cpp03 ? 0 : 1; 50 | LangOpts.LineComment = 1; 51 | LangOpts.Bool = 1; 52 | LangOpts.ObjC1 = 1; 53 | LangOpts.ObjC2 = 1; 54 | return LangOpts; 55 | } 56 | 57 | class FuzzyParseTest : public ::testing::Test { 58 | protected: 59 | struct ParseResult { 60 | TranslationUnit TU; 61 | std::vector Tokens; 62 | static constexpr const char *FileName = ""; 63 | FileManager Files; 64 | DiagnosticsEngine Diagnostics; 65 | SourceManager SourceMgr; 66 | FileID ID; 67 | Lexer Lex; 68 | IdentifierTable IdentTable; 69 | 70 | ParseResult(StringRef Code) 71 | : Files((FileSystemOptions())), 72 | Diagnostics(IntrusiveRefCntPtr(new DiagnosticIDs), 73 | new DiagnosticOptions), 74 | SourceMgr(Diagnostics, Files), 75 | ID(SourceMgr.createFileID( 76 | MemoryBuffer::getMemBuffer(Code, FileName))), 77 | Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, getFormattingLangOpts()), 78 | IdentTable(getFormattingLangOpts()) { 79 | Lex.SetKeepWhitespaceMode(true); 80 | 81 | for (;;) { 82 | Token TmpTok; 83 | Lex.LexFromRawLexer(TmpTok); 84 | 85 | if (TmpTok.getKind() == tok::hash && TmpTok.isAtStartOfLine()) 86 | Lex.setParsingPreprocessorDirective(true); 87 | if (TmpTok.getKind() == tok::eod) 88 | Lex.setParsingPreprocessorDirective(false); 89 | 90 | Tokens.push_back(fuzzy::AnnotatedToken(TmpTok)); 91 | Token &ThisTok = Tokens.back().Tok(); 92 | 93 | StringRef TokenText(SourceMgr.getCharacterData(ThisTok.getLocation()), 94 | ThisTok.getLength()); 95 | 96 | if (ThisTok.is(tok::raw_identifier)) { 97 | IdentifierInfo &Info = IdentTable.get(TokenText); 98 | ThisTok.setIdentifierInfo(&Info); 99 | ThisTok.setKind(Info.getTokenID()); 100 | } 101 | 102 | if (ThisTok.is(tok::eof)) 103 | break; 104 | } 105 | 106 | TU = fuzzy::fuzzyparse(&*Tokens.begin(), &*Tokens.end()); 107 | } 108 | }; 109 | 110 | void checkParse(StringRef Code, 111 | SmallVector TokenTypes) { 112 | ParseResult Parsed(Code); 113 | auto &AllTokens = Parsed.Tokens; 114 | 115 | size_t NonWhitespaceTokens = 0; 116 | for (auto &Tok : AllTokens) 117 | if (Tok.getTokenKind() != tok::comment && 118 | Tok.getTokenKind() != tok::unknown && Tok.getTokenKind() != tok::eof) 119 | ++NonWhitespaceTokens; 120 | 121 | EXPECT_EQ(NonWhitespaceTokens, TokenTypes.size()); 122 | for (size_t I = 0, J = 0; I < TokenTypes.size(); ++I, ++J) { 123 | while (AllTokens[J].getTokenKind() == tok::comment || 124 | AllTokens[J].getTokenKind() == tok::unknown || 125 | AllTokens[J].getTokenKind() == tok::eof) 126 | ++J; 127 | if (!TokenTypes[I].verify(AllTokens[J].getASTReference())) { 128 | dbgs() << "Parsed " << Code << " into:\n"; 129 | for (auto &S : Parsed.TU.children()) 130 | printAST(dbgs(), S, Parsed.SourceMgr); 131 | dbgs() << "I=" << I << ", J=" << J << '\n'; 132 | EXPECT_TRUE(TokenTypes[I].verify(AllTokens[J].getASTReference())); 133 | } 134 | } 135 | } 136 | 137 | void checkUnparsable(StringRef Code) { 138 | ParseResult Parsed(Code); 139 | for (auto &Tok : Parsed.Tokens) 140 | if (Tok.getTokenKind() != tok::comment && 141 | Tok.getTokenKind() != tok::unknown && Tok.getTokenKind() != tok::eof) 142 | EXPECT_TRUE(isa(Tok.getASTReference())); 143 | } 144 | void checkUnparsable(std::initializer_list Codes) { 145 | for (const char *C : Codes) 146 | checkUnparsable(C); 147 | } 148 | 149 | void dump(ParseResult &Parsed, StringRef Code) { 150 | dbgs() << Code << '\n'; 151 | 152 | dbgs() << "Parsed " << Code << " into:\n"; 153 | for (auto &S : Parsed.TU.children()) 154 | printAST(dbgs(), S, Parsed.SourceMgr); 155 | } 156 | 157 | template void checkToplevel(StringRef Code) { 158 | ParseResult Parsed(Code); 159 | if (Parsed.TU.children().size() != 1 || 160 | !isa(Parsed.TU.Body[0].get())) { 161 | dump(Parsed, Code); 162 | } 163 | EXPECT_EQ(Parsed.TU.children().size(), size_t(1)); 164 | EXPECT_TRUE(isa(Parsed.TU.Body[0].get())); 165 | } 166 | 167 | template 168 | void checkToplevel(std::initializer_list Codes) { 169 | for (const char *C : Codes) 170 | checkToplevel(C); 171 | } 172 | 173 | template void checkFirstPPOn(StringRef Code, F &&f) { 174 | ParseResult Parsed(Code); 175 | if (Parsed.TU.PPDirectives.size() == 0) { 176 | dump(Parsed, Code); 177 | EXPECT_TRUE(Parsed.TU.PPDirectives.size() > 0); 178 | return; 179 | } 180 | if (!f(*Parsed.TU.PPDirectives[0], false)) { 181 | dump(Parsed, Code); 182 | EXPECT_TRUE(f(*Parsed.TU.PPDirectives[0], true)); 183 | } 184 | } 185 | 186 | template void checkFirstOn(StringRef Code, F &&f) { 187 | ParseResult Parsed(Code); 188 | if (Parsed.TU.children().size() == 0) { 189 | dump(Parsed, Code); 190 | EXPECT_TRUE(Parsed.TU.children().size() > 0); 191 | return; 192 | } 193 | if (!f(*Parsed.TU.Body[0], false)) { 194 | dump(Parsed, Code); 195 | EXPECT_TRUE(f(*Parsed.TU.Body[0], true)); 196 | } 197 | } 198 | 199 | template void checkFirst(StringRef Code) { 200 | checkFirstOn(Code, [&](const Stmt &S, bool Abort) { 201 | if (Abort) 202 | EXPECT_TRUE(isa(S)); 203 | else 204 | return isa(S); 205 | return true; 206 | }); 207 | } 208 | template 209 | void checkFirst(std::initializer_list Codes) { 210 | for (const char *C : Codes) 211 | checkFirst(C); 212 | } 213 | 214 | template void checkFirstPP(StringRef Code) { 215 | checkFirstPPOn(Code, [&](const PPDirective &P, bool Abort) { 216 | if (Abort) 217 | EXPECT_TRUE(isa(P)); 218 | else 219 | return isa(P); 220 | return true; 221 | }); 222 | } 223 | template 224 | void checkFirstPP(std::initializer_list Codes) { 225 | for (const char *C : Codes) 226 | checkFirstPP(C); 227 | } 228 | }; 229 | 230 | TEST_F(FuzzyParseTest, DeclStmtTest) { 231 | checkParse("int i;", checkTypeSeq()); 232 | checkParse("int i=5;", checkTypeSeq()); 234 | checkParse("int i=5,j;", 235 | checkTypeSeq()); 237 | checkParse( 238 | "int i=5,j=i;", 239 | checkTypeSeq()); 241 | checkParse( 242 | "int i,j,k,l,m,n,o,p;", 243 | checkTypeSeq()); 246 | 247 | checkParse("int *p;", 248 | checkTypeSeq()); 249 | checkParse("type &p;", 250 | checkTypeSeq()); 251 | 252 | checkParse( 253 | "int* p,* /*comment*/ ** * * q;", 254 | checkTypeSeq()); 257 | 258 | checkParse( 259 | "a b=c,*d=e,********f=****g**h;", 260 | checkTypeSeq()); 268 | 269 | checkToplevel({ "a b;", // 270 | "a b=c(d,e);", // 271 | "a b=c(d,e,*g),*h=*i;", 272 | // 273 | "int a;", // 274 | "unsigned long long int a;", // 275 | "signed char a;", // 276 | "double a;" }); 277 | 278 | checkParse("register const volatile constexpr int i;", 279 | checkTypeSeq()); 280 | 281 | checkUnparsable({ "int 1=2;", // 282 | "1 + !(unparsable!!!);" }); 283 | } 284 | 285 | TEST_F(FuzzyParseTest, ExprLineStmtTest) { 286 | checkToplevel({ "a*b*c;", // 287 | "a*b*c=d;", // 288 | "a*b*c==d;", // 289 | "f();", // 290 | "f(a,b,c);", // 291 | "f(1,2,3);", // 292 | "f(1)*g;", // 293 | "n::f(1)*g;", // 294 | "a+b;", // 295 | "a-b;", // 296 | "a*b*c;", // 297 | "a/b;", // 298 | "a&b&c;", // 299 | "a^b;", // 300 | "a|b;", // 301 | "a<>b;", // 303 | "ab;", // 305 | "~a;", // 306 | "!a;", // 307 | "-a;", // 308 | "--a;", // 309 | "++a;", // 310 | "++++~~~+~!~++++++!--++++++a;", // 311 | "\"string literal\";", // 312 | "nullptr;", // 313 | "this;", // 314 | "true;", // 315 | "false;", // 316 | "-1;", // 317 | "(1+-1)*(3+5);" }); 318 | checkUnparsable({ "1(a,b);", // 319 | "f(", // 320 | "f(," }); 321 | } 322 | 323 | TEST_F(FuzzyParseTest, QualifiedIDs) { 324 | checkToplevel( 325 | { "std::vector v;", // 326 | "::std::vector v1;", // 327 | "std::vector v2;", // 328 | "std::vector v3;", // 329 | "std::vector<> v4;", // 330 | "std::vector<1> v5;", // 331 | "std::tr1::stl::vector<> v6;", // 332 | "::vector<> v7;", // 333 | "::std::tr1::stl::vector, ::std::pair > v8;", 334 | "n::n::n::n::n::a,g > > > g;", 335 | "a::b ***e=f::g<1>*h::i<2,j>(::k::l);", 336 | "auto x = std::make_unique(0);" }); 337 | 338 | checkParse("auto x = std::make_unique(0);", 339 | checkTypeSeq()); 342 | checkToplevel({ "n::f(a::b());", // 343 | "n::f(a::b<2*3>());", // 344 | "t<1+b>();", // 345 | "t< 1<<2 >();", // 346 | "t< (1>2) >();" }); 347 | checkUnparsable("t<1> 2>();"); 348 | } 349 | 350 | TEST_F(FuzzyParseTest, FunctionDeclStmt) { 351 | const char *Tests[] = { 352 | "void f(int,int);", // 353 | "void g(int i=0);", // 354 | "static std::unique_ptr parseVarDecl(TokenFilter &TF," 355 | " Type *TypeName = 0," 356 | " bool NameOptional = false);", 357 | "void dismiss() { TF = nullptr; }", // 358 | "type func1();", "type func2() { 1+1; }", // 359 | "type func3(type a) { 1+1; }", "type func4(type a, type b) { 1+1; }", 360 | "static type func5();", 361 | "static std::unique_ptr parseExpression(TokenFilter &TF," 362 | " int Precedence," 363 | " bool StopAtGreater);", 364 | "static bool checkKind(TokenFilter &TF, tok::TokenKind Kind){}", 365 | }; 366 | for (const char *Code : Tests) 367 | checkFirst(Code); 368 | } 369 | 370 | TEST_F(FuzzyParseTest, ReturnStmt) { 371 | checkToplevel({ "return 1;", // 372 | "return a*b;", // 373 | "return;" }); 374 | checkUnparsable("return return;"); 375 | } 376 | 377 | TEST_F(FuzzyParseTest, StructDecl) { 378 | checkFirst({ "struct C;", // 379 | "union C;", // 380 | "class C{};", // 381 | "class C{ >< };" }); 382 | 383 | auto checkFirstIsFunctionDecl = [&](StringRef Code) { 384 | checkFirstOn(Code, [](const Stmt &S, bool Abort) { 385 | if (Abort) 386 | EXPECT_TRUE(isa(S)); 387 | else if (!isa(S)) 388 | return false; 389 | const auto &CD = cast(S); 390 | if (Abort) 391 | EXPECT_EQ(CD.Body.size(), (size_t)1); 392 | else if (CD.Body.size() != 1) 393 | return false; 394 | 395 | if (Abort) 396 | EXPECT_TRUE(isa(*CD.Body.front())); 397 | else if (!isa(*CD.Body.front())) 398 | return false; 399 | 400 | return true; 401 | }); 402 | }; 403 | 404 | checkFirstIsFunctionDecl("struct C { C(){} };"); 405 | checkFirstIsFunctionDecl("struct C { ~C(){} };"); 406 | checkFirstIsFunctionDecl("struct C { virtual void f() override =0; };"); 407 | checkFirstIsFunctionDecl( 408 | "struct C { static constexpr bool g() { return true; } };"); 409 | checkFirstIsFunctionDecl("struct C { C()=default; };"); 410 | checkFirstIsFunctionDecl("struct C { bool operator<(int o); };"); 411 | checkFirstIsFunctionDecl( 412 | "struct C { friend C operator==(C lhs, C rhs)=default; };"); 413 | } 414 | 415 | TEST_F(FuzzyParseTest, IfStmt) { 416 | const char *Tests[] = { 417 | "if (true) {}", // 418 | "if (0) do_sth();", // 419 | "if (int i=0) {}", // 420 | "if (int i=0) {} else do_sth_else();", 421 | "if (int*i=0) {} else if (false) {} else do_sth_else();", 422 | "if (int*i=0) {} else if (ns::t<4> x=4) {} else do_sth_else();", 423 | "if (int*i=0) {} else if (ns::t<4> x=4) {} else do_sth_else();", 424 | "if (1){}else if(1){}else if(1){}else if(1){}else if(1){}else " 425 | "if(1){}else{}", 426 | }; 427 | for (const char *Code : Tests) 428 | checkFirst(Code); 429 | 430 | checkUnparsable("else if (1);"); 431 | } 432 | 433 | TEST_F(FuzzyParseTest, IfStmtFuzzy) { 434 | checkFirst({ "if () {}", // 435 | "if (true {}", // 436 | "if (false)) {}", // 437 | "if ();", }); 438 | } 439 | 440 | TEST_F(FuzzyParseTest, WhileStmt) { 441 | checkFirst({ "while (true) {}", // 442 | "while (0) do_sth();", // 443 | "while (int i=0) {}", // 444 | }); 445 | } 446 | 447 | TEST_F(FuzzyParseTest, ForStmt) { 448 | checkFirst({ "for (;;) {}", // 449 | "for (;;);", // 450 | "for (int i=0;;) {}", // 451 | "for (T x=0,y=3;;) {}", // 452 | "for (T x,y,z;;) {}", // 453 | "for (int i=0;int j=0;) {}", // 454 | "for (int i=0;i<10;i=i+1) {}", // 455 | "for (;int j;);", // 456 | "for (;;i=i+1) {}", // 457 | }); 458 | } 459 | 460 | TEST_F(FuzzyParseTest, TemplateDecl) { 461 | const char *Tests[] = { 462 | "template void f();", 463 | "template void f();", 464 | "template void f();", 465 | "template void f();", 466 | //"template void f();", 467 | "template void f() {}", 468 | "template struct C;", 469 | "template struct C;", 470 | "template void f();", 471 | "template void f();", 472 | "template void f();", 473 | }; 474 | for (const char *Code : Tests) 475 | checkFirst(Code); 476 | } 477 | 478 | TEST_F(FuzzyParseTest, PPIf) { 479 | checkFirstPP({ "#if 1", // 480 | "#else", // 481 | "#elif 1", // 482 | "# if unparsable!", // 483 | "#else EXPR", // 484 | "#elif 1&1+1*3+f(3)", }); 485 | } 486 | 487 | TEST_F(FuzzyParseTest, PPInclude) { 488 | checkFirstPP({ "#include ", // 489 | "#include \"header.h\"", // 490 | "#include \"\"", // 491 | "#include <>", // 492 | "# /*comment*/ include ", 493 | // " /*comment*/ # /*comment*/ include ", 494 | "# include \"fancy/path!/???.h_\"", }); 495 | } 496 | 497 | } // end namespace fuzzy 498 | } // end namespace clang 499 | --------------------------------------------------------------------------------