├── .gitignore ├── CMakeLists.txt ├── README.md ├── analyze_conditionals.cpp ├── conditional_to_policy.cpp ├── qi_token.hpp └── style_detect.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Build control file for example PP-aware refactoring code 2 | # 3 | # Copyright (C) 2016 Jeff Trull 4 | # 5 | # Distributed under the Boost Software License, Version 1.0. (See accompanying 6 | # file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 | # 8 | # 9 | # 10 | 11 | cmake_minimum_required( VERSION 3.28 ) 12 | 13 | project( Octothorpe ) 14 | 15 | set( CMAKE_CXX_STANDARD 17 ) 16 | set( CMAKE_EXPORT_COMPILE_COMMANDS ON ) 17 | 18 | # We will pick up LLVM via the somewhat more modern "Config" route 19 | # so set CMAKE_PREFIX_PATH accordingly 20 | 21 | find_package( Clang REQUIRED CONFIG ) 22 | find_package( LLVM REQUIRED CONFIG ) 23 | 24 | set( Boost_USE_STATIC_LIBS ON ) 25 | find_package( Boost 1.60 REQUIRED COMPONENTS system wave thread filesystem ) 26 | 27 | if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")) 28 | add_definitions(-Wall -Wextra -Werror) # be conservative about checks 29 | if( CMAKE_COMPILER_IS_GNUCXX ) 30 | set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold" ) 31 | endif() 32 | endif() 33 | 34 | # without this we still get symbols but many things are optimized out 35 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0") 36 | 37 | # Clang-using example 38 | 39 | add_executable( c2p conditional_to_policy.cpp ) 40 | # Note Clang is normally compiled without exceptions or RTTI... tread carefully 41 | set_target_properties( c2p PROPERTIES 42 | COMPILE_FLAGS "${LLVM_CXXFLAGS} -fexceptions -fno-rtti" # LLVM doesn't like RTTI 43 | ) 44 | target_link_libraries( c2p clangAST clangASTMatchers clangTooling ) 45 | target_include_directories( c2p SYSTEM PRIVATE ${CLANG_INCLUDE_DIRS} ) 46 | 47 | # Conditional compilation analysis of whole text example, using Boost.Wave and SMT 48 | # Requires CVC5. User must ensure CVC5Config.cmake is in the CMAKE_PREFIX_PATH. 49 | 50 | find_package( cvc5 ) 51 | 52 | if( "${cvc5_FOUND}" STREQUAL "" ) 53 | message( WARNING "No value provided for CVC5_ROOT; the deadc0de executable will not be built" ) 54 | else() 55 | 56 | add_executable( deadc0de analyze_conditionals.cpp ) 57 | # set_target_properties( deadc0de PROPERTIES 58 | # COMPILE_DEFINITIONS BOOST_SPIRIT_DEBUG # Uncomment in case of emergency 59 | # ) 60 | target_link_libraries( deadc0de Boost::system Boost::wave Boost::boost cvc5::cvc5 ) 61 | 62 | endif() 63 | 64 | # Style detector example 65 | add_executable( sd style_detect.cpp ) 66 | target_link_libraries( sd Boost::wave Boost::boost ) 67 | set_target_properties( sd PROPERTIES 68 | # COMPILE_DEFINITIONS BOOST_SPIRIT_DEBUG # Uncomment in case of emergency 69 | CXX_STANDARD 11 # for Wave 70 | ) 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # octothorpe 2 | Source code for a presentation on refactoring C++ while accounting for preprocessor interactions 3 | -------------------------------------------------------------------------------- /analyze_conditionals.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Example of analyzing equations controlling sections of text in a source file 3 | * 4 | * Copyright (C) 2016 Jeff Trull 5 | * 6 | * Distributed under the Boost Software License, Version 1.0. (See accompanying 7 | * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 8 | * 9 | * 10 | */ 11 | 12 | #include "qi_token.hpp" 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | // CVC5 SMT engine includes 28 | #include 29 | 30 | // Parsing will produce text "sections": a set of lines and an associated condition 31 | BOOST_FUSION_DEFINE_STRUCT( 32 | (), 33 | text_section, 34 | (cvc5::Term, condition) 35 | (std::vector, body) 36 | (boost::wave::util::file_position_type, start) 37 | (boost::wave::util::file_position_type, end) 38 | ) 39 | 40 | // Proper use of cvc5 requires caching variables so we don't create two with the same name 41 | struct var_cache { 42 | var_cache(cvc5::Solver& slv) : slv_(slv) {} 43 | 44 | cvc5::Term get_defined_expr(std::string varname) { 45 | // see if we have cached this variable representing defined(varname) 46 | auto it = defined_vars_.find(varname); 47 | if (it != defined_vars_.end()) { 48 | return it->second; 49 | } 50 | // give it a different name than the integer variable representing its value 51 | cvc5::Term var = slv_.mkConst(slv_.getBooleanSort(), varname + "_defined"); 52 | defined_vars_.emplace(varname, var); 53 | return var; 54 | } 55 | 56 | // for building integer expressions 57 | cvc5::Term get_integer_var(std::string varname) { 58 | // check in cache first 59 | auto it = int_vars_.find(varname); 60 | if (it != int_vars_.end()) { 61 | return it->second; 62 | } 63 | cvc5::Term var = slv_.mkConst(slv_.getIntegerSort(), varname); 64 | int_vars_.emplace(varname, var); 65 | return var; 66 | } 67 | private: 68 | cvc5::Solver& slv_; 69 | std::map int_vars_; 70 | std::map defined_vars_; 71 | }; 72 | 73 | // Define a simple grammar using our adapted token iterator 74 | template 75 | struct skipper : boost::spirit::qi::grammar 76 | { 77 | skipper() : skipper::base_type(spaces) { 78 | spaces = +boost::spirit::qi::token(boost::wave::T_SPACE); 79 | } 80 | private: 81 | boost::spirit::qi::rule spaces; 82 | }; 83 | 84 | template 85 | struct cond_expr : boost::spirit::qi::grammar> 86 | { 87 | cond_expr(cvc5::Solver& slv, var_cache& vars) 88 | : cond_expr::base_type(bool_expr), slv_(slv), vars_(vars) { 89 | using boost::spirit::_1; 90 | using boost::spirit::_3; 91 | using boost::spirit::_a; 92 | using boost::spirit::_b; 93 | using boost::spirit::_r1; 94 | using boost::spirit::_val; 95 | using boost::spirit::_pass; 96 | using boost::spirit::qi::token; 97 | namespace phx = boost::phoenix; 98 | using namespace boost::wave; 99 | 100 | cond_inv = ( token(T_NOT) >> bool_term [ 101 | _val = phx::bind(&cond_expr::create_inverted_expr, 102 | this, _1)]) ; 103 | ident = token(T_IDENTIFIER); 104 | defined = ident [ 105 | _pass = ( _1 == std::string("defined") ) 106 | ] 107 | >> token(T_LEFTPAREN) 108 | >> ident [ 109 | _val = phx::bind(&var_cache::get_defined_expr, 110 | &vars_, _1) 111 | ] 112 | >> token(T_RIGHTPAREN) ; 113 | paren_term = token(T_LEFTPAREN) 114 | >> bool_expr [_val = _1] 115 | >> token(T_RIGHTPAREN) ; 116 | bool_term = cond_inv | defined | paren_term ; 117 | 118 | conj_term = bool_term [ _val = _1 ] 119 | >> *(token(T_ANDAND) >> bool_term [ 120 | _val = phx::bind(&cond_expr::create_binary_expr, 121 | this, cvc5::Kind::AND, _val, _1)]) ; 122 | disj_term = conj_term [ _val = _1 ] 123 | >> *(token(T_OROR) >> conj_term [ 124 | _val = phx::bind(&cond_expr::create_binary_expr, 125 | this, cvc5::Kind::OR, _val, _1)]) ; 126 | 127 | // parsing a subset of real expressions here, for now 128 | // we only compare ints, never compute with them 129 | int_ = token(T_INTLIT) ; 130 | int_term = ident[_val = phx::bind(&var_cache::get_integer_var, 131 | &vars_, _1)] | 132 | int_[_val = phx::bind(&cond_expr::create_integer_const, 133 | this, _1)] ; 134 | 135 | int_comp = 136 | (int_term >> token(T_EQUAL) >> int_term) [ 137 | _val = phx::bind(&cond_expr::create_binary_expr, 138 | this, cvc5::Kind::EQUAL, _1, _3) ] 139 | | (int_term >> token(T_LESS) >> int_term) [ 140 | _val = phx::bind(&cond_expr::create_binary_expr, 141 | this, cvc5::Kind::LT, _1, _3) ] 142 | | (int_term >> token(T_GREATER) >> int_term) [ 143 | _val = phx::bind(&cond_expr::create_binary_expr, 144 | this, cvc5::Kind::GT, _1, _3) ] 145 | | (int_term >> token(T_LESSEQUAL) >> int_term) [ 146 | _val = phx::bind(&cond_expr::create_binary_expr, 147 | this, cvc5::Kind::LEQ, _1, _3) ] 148 | | (int_term >> token(T_GREATEREQUAL) >> int_term) [ 149 | _val = phx::bind(&cond_expr::create_binary_expr, 150 | this, cvc5::Kind::GEQ, _1, _3) ] ; 151 | 152 | bool_expr = int_comp | disj_term ; 153 | 154 | BOOST_SPIRIT_DEBUG_NODE(ident); 155 | BOOST_SPIRIT_DEBUG_NODE(defined); 156 | BOOST_SPIRIT_DEBUG_NODE(bool_term); 157 | BOOST_SPIRIT_DEBUG_NODE(paren_term); 158 | BOOST_SPIRIT_DEBUG_NODE(cond_inv); 159 | BOOST_SPIRIT_DEBUG_NODE(int_term); 160 | BOOST_SPIRIT_DEBUG_NODE(int_comp); 161 | BOOST_SPIRIT_DEBUG_NODE(conj_term); 162 | BOOST_SPIRIT_DEBUG_NODE(disj_term); 163 | BOOST_SPIRIT_DEBUG_NODE(bool_expr); 164 | } 165 | 166 | private: 167 | boost::spirit::qi::rule ident, int_; 168 | using expr_rule_t = boost::spirit::qi::rule>; 169 | expr_rule_t defined, cond_inv, bool_term, conj_term, disj_term, int_term, int_comp, paren_term, bool_expr; 170 | 171 | // for building logical expressions 172 | cvc5::Solver& slv_; 173 | var_cache& vars_; 174 | 175 | cvc5::Term create_inverted_expr(cvc5::Term e) { 176 | return e.notTerm(); 177 | } 178 | cvc5::Term create_binary_expr(cvc5::Kind op, cvc5::Term e1, cvc5::Term e2) { 179 | return slv_.mkTerm(op, {std::move(e1), std::move(e2)}); 180 | } 181 | cvc5::Term create_integer_const(std::string int_literal) { 182 | return slv_.mkInteger(int_literal); 183 | } 184 | }; 185 | 186 | 187 | template 188 | struct cond_grammar : boost::spirit::qi::grammar(), 190 | skipper> 191 | { 192 | cond_grammar(cvc5::Solver& slv, var_cache& vars) 193 | : cond_grammar::base_type(tunit), slv_(slv), vars_(vars), expr_parser_(slv, vars_) { 194 | using boost::spirit::_1; 195 | using boost::spirit::_a; 196 | using boost::spirit::_b; 197 | using boost::spirit::_r1; 198 | using boost::spirit::_val; 199 | using boost::spirit::qi::token; 200 | using boost::spirit::qi::omit; 201 | using boost::spirit::qi::attr; 202 | using boost::spirit::qi::eps; 203 | namespace phx = boost::phoenix; 204 | using namespace boost::wave; 205 | 206 | line_end = token(T_NEWLINE) | token(T_CPPCOMMENT) ; // Wave absorbs trailing \n 207 | 208 | // semantic action to append string attributes 209 | auto append = [](auto & dst, auto const & src) 210 | { 211 | dst.insert(std::end(dst), std::begin(src), std::end(src)); 212 | }; 213 | 214 | pp_cond = token(T_PP_IF) | 215 | token(T_PP_IFDEF) | 216 | token(T_PP_IFNDEF) | 217 | token(T_PP_ELSE) | 218 | token(T_PP_ELIF) | 219 | token(T_PP_ENDIF); 220 | 221 | non_eol = (token - line_end) ; 222 | 223 | textline = !pp_cond >> 224 | (line_end[_val = _1] // empty or comment 225 | | (non_eol[_val = _1] 226 | // append additional tokens without changing start position 227 | >> *non_eol[phx::bind(append, phx::at_c<0>(_val), phx::at_c<0>(_1))] 228 | >> line_end[phx::bind(append, phx::at_c<0>(_val), phx::at_c<0>(_1))])) ; 229 | 230 | auto next_line = [](util::file_position_type loc) 231 | { 232 | return util::file_position_type(loc.get_file(), loc.get_line() + 1, 1); 233 | }; 234 | 235 | textblock = 236 | // conditional for a textblock is just whatever it inherited 237 | eps[phx::at_c<0>(_val) = phx::construct(_r1)] 238 | >> textline[phx::push_back(phx::at_c<1>(_val), phx::at_c<0>(_1)), 239 | // set the start position 240 | phx::at_c<2>(_val) = phx::at_c<1>(_1), 241 | // "one past the end" as is traditional: 242 | phx::at_c<3>(_val) = phx::bind(next_line, phx::at_c<1>(_1))] 243 | >> *textline[phx::push_back(phx::at_c<1>(_val), phx::at_c<0>(_1)), 244 | // update end position for new line 245 | phx::at_c<3>(_val) = phx::bind(next_line, phx::at_c<1>(_1))] ; 246 | 247 | cond_if = token(T_PP_IF) 248 | >> expr_parser_[_a = _r1, _b = _1] >> line_end 249 | // both the inherited condition and the new one must be true: 250 | >> *basic(phx::bind(&cond_grammar::create_binary_expr, 251 | this, cvc5::Kind::AND, _a, _b))[ 252 | phx::bind(append, _val, _1) 253 | ] 254 | // update "condition so far" 255 | >> eps[ 256 | _a = phx::bind(&cond_grammar::create_inv_qual_expr, 257 | this, _a, _b) 258 | ] 259 | >> *(token(T_PP_ELIF) 260 | >> expr_parser_[_b = _1] >> line_end 261 | >> *basic(phx::bind(&cond_grammar::create_binary_expr, 262 | this, cvc5::Kind::AND, _a, _b))[ 263 | phx::bind(append, _val, _1) 264 | ] 265 | >> eps[ 266 | // accumulate condition 267 | _a = phx::bind(&cond_grammar::create_inv_qual_expr, this, _a, _b) 268 | ]) 269 | >> -(token(T_PP_ELSE) >> line_end 270 | >> *basic(_a)[ 271 | phx::bind(append, _val, _1) 272 | ]) 273 | >> token(T_PP_ENDIF) >> line_end ; 274 | 275 | ident = token(T_IDENTIFIER); 276 | cond_ifdef = token(T_PP_IFDEF) 277 | >> ident[ 278 | _a = phx::bind(&var_cache::get_defined_expr, &vars_, _1) 279 | ] 280 | >> line_end 281 | >> *basic(phx::bind(&cond_grammar::create_binary_expr, 282 | this, cvc5::Kind::AND, _r1, _a))[ 283 | phx::bind(append, _val, _1) 284 | ] 285 | >> -(token(T_PP_ELSE) >> line_end 286 | >> *basic(phx::bind(&cond_grammar::create_inv_qual_expr, 287 | this, _r1, _a))[ 288 | phx::bind(append, _val, _1) 289 | ]) 290 | >> token(T_PP_ENDIF) >> line_end ; 291 | 292 | cond_ifndef = token(T_PP_IFNDEF) 293 | >> ident[ 294 | _a = phx::bind(&var_cache::get_defined_expr, &vars_, _1) 295 | ] 296 | >> line_end 297 | >> *basic(phx::bind(&cond_grammar::create_inv_qual_expr, 298 | this, _r1, _a))[ 299 | phx::bind(append, _val, _1) 300 | ] 301 | >> -(token(T_PP_ELSE) >> line_end 302 | >> *basic(phx::bind(&cond_grammar::create_binary_expr, 303 | this, cvc5::Kind::AND, _r1, _a))[ 304 | phx::bind(append, _val, _1) 305 | ]) 306 | >> token(T_PP_ENDIF) >> line_end ; 307 | 308 | basic = textblock(_r1) | cond_if(_r1) | cond_ifdef(_r1) | cond_ifndef(_r1); 309 | 310 | toplvl = basic(phx::bind(&cond_grammar::create_boolean_const, 311 | this, true)) 312 | >> -toplvl ; 313 | tunit = toplvl >> omit[token(T_EOF)] ; 314 | 315 | BOOST_SPIRIT_DEBUG_NODE(tunit); 316 | BOOST_SPIRIT_DEBUG_NODE(toplvl); 317 | BOOST_SPIRIT_DEBUG_NODE(basic); 318 | BOOST_SPIRIT_DEBUG_NODE(ident); 319 | BOOST_SPIRIT_DEBUG_NODE(pp_cond); 320 | BOOST_SPIRIT_DEBUG_NODE(non_eol); 321 | BOOST_SPIRIT_DEBUG_NODE(textline); 322 | BOOST_SPIRIT_DEBUG_NODE(line_end); 323 | BOOST_SPIRIT_DEBUG_NODE(textblock); 324 | 325 | BOOST_SPIRIT_DEBUG_NODE(cond_if); 326 | BOOST_SPIRIT_DEBUG_NODE(cond_ifdef); 327 | BOOST_SPIRIT_DEBUG_NODE(cond_ifndef); 328 | 329 | } 330 | 331 | private: 332 | boost::spirit::qi::rule ident; 333 | // text rules don't use skippers; we want to keep everything: 334 | boost::spirit::qi::rule()> line_end; 335 | boost::spirit::qi::rule pp_cond; 336 | boost::spirit::qi::rule()> non_eol; 337 | boost::spirit::qi::rule()> textline; 338 | // a textblock is a single section of non-conditional lines 339 | boost::spirit::qi::rule textblock; 340 | 341 | boost::spirit::qi::rule(), skipper> tunit, toplvl; 342 | boost::spirit::qi::rule(cvc5::Term), skipper> 343 | basic; 344 | 345 | // cond_ifdef/cond_ifndef need an attribute for remembering the macro name 346 | boost::spirit::qi::rule(cvc5::Term), skipper, 347 | boost::spirit::locals> cond_ifdef, cond_ifndef; 348 | 349 | // cond_if needs a local attribute for remembering conditions, and one for 350 | // accumulating conditions from elif's 351 | boost::spirit::qi::rule(cvc5::Term), skipper, 352 | boost::spirit::locals> cond_if; 353 | 354 | // for building logical expressions 355 | cvc5::Solver& slv_; 356 | var_cache& vars_; 357 | cond_expr expr_parser_; 358 | 359 | cvc5::Term create_binary_expr(cvc5::Kind op, cvc5::Term e1, cvc5::Term e2) { 360 | return slv_.mkTerm(op, {std::move(e1), std::move(e2)}); 361 | } 362 | // e1 && !e2 363 | // useful for "else" clauses 364 | cvc5::Term create_inv_qual_expr(cvc5::Term e1, cvc5::Term e2) { 365 | return e1.andTerm(e2.notTerm()); 366 | } 367 | cvc5::Term create_boolean_const(bool b) { 368 | return slv_.mkBoolean(b); 369 | } 370 | }; 371 | 372 | int main(int argc, char **argv) { 373 | using namespace std; 374 | using namespace boost::wave; 375 | 376 | if ((argc == 1) || (argc > 3)) { 377 | cerr << "usage: " << argv[0] << " [condition] path\n"; 378 | return 4; 379 | } 380 | 381 | char const* fn = ((argc == 2) ? argv[1] : argv[2]); 382 | ifstream cppfile(fn); 383 | if (!cppfile.is_open()) { 384 | cerr << "unable to open requested file " << fn << "\n"; 385 | return 5; 386 | } 387 | cppfile.unsetf(ios::skipws); 388 | boost::spirit::istream_iterator fbeg(cppfile); 389 | 390 | // Give it a try 391 | using token_t = qi_token<>; 392 | using position_t = token_t::position_type; 393 | position_t pos(fn); 394 | 395 | // create Spirit V2-compatible lexer token iterators from character iterators 396 | using cpplexer_iterator_t = qi_lex_iterator; 397 | cpplexer_iterator_t beg(fbeg, boost::spirit::istream_iterator(), pos, 398 | language_support(support_cpp|support_cpp0x)); 399 | cpplexer_iterator_t end; 400 | 401 | cvc5::Solver slv; 402 | var_cache vars(slv); // global so we can share with user expression parser 403 | cond_grammar fileparser(slv, vars); 404 | vector result; 405 | auto start = beg; 406 | bool pass = boost::spirit::qi::phrase_parse(beg, end, fileparser, 407 | skipper(), result); 408 | if (pass) { 409 | if (beg == start) { 410 | cout << "no input consumed!\n"; 411 | return 2; 412 | } else if (beg != end) { 413 | cout << "only some input consumed. Remaining:\n"; 414 | copy(beg, end, ostream_iterator>(cout, "")); 415 | return 2; 416 | } 417 | // make an assertion for the user input, if present 418 | if (argc == 3) { 419 | // an expression was supplied 420 | string expr(argv[1]); 421 | position_t epos("command-line input"); 422 | cpplexer_iterator_t ebeg(expr.begin(), expr.end(), pos, 423 | language_support(support_cpp|support_cpp0x)); 424 | cpplexer_iterator_t eend; 425 | 426 | cond_expr exprparser(slv, vars); 427 | cvc5::Term user_expr; 428 | pass = boost::spirit::qi::phrase_parse(ebeg, eend, exprparser, 429 | skipper(), user_expr); 430 | if (!pass) 431 | { 432 | std::cerr << "error parsing assume-expression \"" << argv[1] << "\"\n"; 433 | return 3; 434 | } 435 | slv.assertFormula(user_expr); 436 | } 437 | 438 | for (auto const & s : result) { 439 | if (slv.checkSatAssuming(s.condition).isUnsat()) { 440 | cout << "detected a dead code section in " << s.start.get_file(); 441 | if (s.start.get_line() == (s.end.get_line() - 1)) 442 | { 443 | cout << " on line " << s.start.get_line() << "\n"; 444 | } else { 445 | cout << " from line " << s.start.get_line(); 446 | cout << " to line " << (s.end.get_line() - 1) << "\n"; 447 | } 448 | cout << "with condition "; 449 | cout << slv.simplify(s.condition) << ":\n"; 450 | copy(s.body.begin(), s.body.end(), 451 | ostream_iterator(cout, "")); 452 | } 453 | } 454 | } else { 455 | cout << "parse failed\n"; 456 | return 1; 457 | } 458 | return 0; 459 | } 460 | 461 | #include 462 | -------------------------------------------------------------------------------- /conditional_to_policy.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Example of turning conditional compilation into policy classes 3 | * 4 | * Copyright (C) 2016 Jeff Trull 5 | * 6 | * Distributed under the Boost Software License, Version 1.0. (See accompanying 7 | * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 8 | * 9 | * 10 | */ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "clang/AST/AST.h" 18 | #include "clang/ASTMatchers/ASTMatchers.h" 19 | #include "clang/ASTMatchers/ASTMatchFinder.h" 20 | #include "clang/Basic/SourceManager.h" 21 | #include "clang/Basic/SourceLocation.h" 22 | #include "clang/Basic/FileManager.h" 23 | #include "clang/Tooling/CommonOptionsParser.h" 24 | #include "clang/Tooling/Refactoring.h" 25 | #include "clang/Tooling/Tooling.h" 26 | #include "clang/Lex/Lexer.h" 27 | #include "clang/Lex/Preprocessor.h" 28 | #include "clang/Frontend/CompilerInstance.h" 29 | #include "clang/Frontend/TextDiagnosticPrinter.h" 30 | #include "clang/Rewrite/Core/Rewriter.h" 31 | 32 | #include 33 | 34 | std::string get_source_range(clang::SourceManager const* sm, 35 | clang::LangOptions lopt, clang::SourceRange range) { 36 | auto beg = range.getBegin(); 37 | auto end = range.getEnd(); 38 | clang::SourceLocation true_end = 39 | clang::Lexer::getLocForEndOfToken(end, 0, *sm, lopt); 40 | return std::string(sm->getCharacterData(beg), 41 | sm->getCharacterData(true_end) - sm->getCharacterData(beg)); 42 | } 43 | 44 | void print_source_range(std::ostream& os, clang::SourceManager const* sm, 45 | clang::LangOptions lopt, clang::SourceRange range) { 46 | os << get_source_range(sm, lopt, range); 47 | } 48 | 49 | void print_source_range_info(std::ostream& os, clang::SourceManager const* sm, 50 | clang::SourceRange range) { 51 | auto beg = range.getBegin(); 52 | auto end = range.getEnd(); 53 | os << "from line " << sm->getExpansionLineNumber(beg); 54 | os << " column " << sm->getExpansionColumnNumber(beg); 55 | os << " to line " << sm->getExpansionLineNumber(end); 56 | os << " column " << sm->getExpansionColumnNumber(end) << ":\n"; 57 | } 58 | 59 | void print_decorated_source_range(std::ostream& os, clang::SourceManager const* sm, 60 | clang::LangOptions lopt, clang::SourceRange range) { 61 | print_source_range_info(os, sm, range); 62 | os << "===========\n"; 63 | print_source_range(os, sm, lopt, range); 64 | os << "\n===========\n"; 65 | } 66 | 67 | template // whether to find ranges where the symbols is defined 68 | struct PPActions : clang::PPCallbacks 69 | { 70 | PPActions(clang::LangOptions lopt, 71 | clang::SourceManager & sm, 72 | std::string mname, 73 | std::vector& source_ranges, 74 | std::vector& source_ranges_pp) 75 | : lopt_(lopt), sm_(sm), mname_(mname), 76 | source_ranges_(source_ranges), source_ranges_pp_(source_ranges_pp) {} 77 | 78 | void Ifdef(clang::SourceLocation loc, 79 | clang::Token const& tok, 80 | clang::MacroDefinition const&) override { 81 | // check for our target macro and sense 82 | if (tok.getIdentifierInfo()->getName().str() == mname_) { 83 | // determine where the #ifdef macro ends 84 | auto tok_end = clang::Lexer::getLocForEndOfToken(tok.getLocation(), 85 | 0, sm_, lopt_); 86 | cond_starts_.emplace(loc, std::make_pair(true, tok_end)); 87 | else_loc_ = std::nullopt; 88 | } 89 | } 90 | 91 | void Ifndef(clang::SourceLocation loc, 92 | clang::Token const& tok, 93 | clang::MacroDefinition const&) override { 94 | if (tok.getIdentifierInfo()->getName().str() == mname_) { 95 | auto tok_end = clang::Lexer::getLocForEndOfToken(tok.getLocation(), 96 | 0, sm_, lopt_); 97 | cond_starts_.emplace(loc, std::make_pair(false, tok_end)); 98 | else_loc_ = std::nullopt; 99 | } 100 | } 101 | 102 | // else and endif are reported as long as their corresponding if is not *within* a skipped region 103 | // Note in this area that source ranges are inclusive of their bounds, and the "end" location 104 | // may point to the start of a token, in which case the entire token is included. 105 | 106 | void Else(clang::SourceLocation elseloc, 107 | clang::SourceLocation ifloc) override { 108 | // see if this else is related to an ifdef/ifndef for our target macro 109 | auto start_it = cond_starts_.find(ifloc); 110 | if (start_it != cond_starts_.end()) { 111 | else_loc_ = clang::Lexer::getLocForEndOfToken(elseloc, 0, sm_, lopt_); 112 | if (start_it->second.first == Sense) { 113 | // this is the *end* of our range of interest 114 | // PP-inclusive range starts at hash, ends at trailing "e" of "else" 115 | source_ranges_pp_.emplace_back(ifloc.getLocWithOffset(-1), 116 | *else_loc_); 117 | // for PP-exclusive we use just past the end of the #ifdef/ifndef 118 | // which we stored when we found the statement 119 | source_ranges_.emplace_back(start_it->second.second, 120 | elseloc.getLocWithOffset(-2)); // *before* the hash 121 | } 122 | // otherwise this begins a range of interest which starts *after* the else 123 | } 124 | } 125 | 126 | void Endif(clang::SourceLocation endifloc, 127 | clang::SourceLocation ifloc) override { 128 | // is this endif related to an ifdef/ifndef of interest? 129 | auto start_it = cond_starts_.find(ifloc); 130 | if (start_it != cond_starts_.end()) { 131 | auto end_of_endif = clang::Lexer::getLocForEndOfToken(endifloc, 0, sm_, lopt_); 132 | // this endif may terminate: 133 | // - an if of the desired sense without an else (range is ifloc through here) 134 | if ((start_it->second.first == Sense) && !else_loc_) { 135 | source_ranges_.emplace_back(start_it->second.second, 136 | endifloc.getLocWithOffset(-2)); 137 | source_ranges_pp_.emplace_back(ifloc.getLocWithOffset(-1), end_of_endif); 138 | // - an if of the inverted sense with an else (range is else through here) 139 | } else if ((start_it->second.first != Sense) && else_loc_) { 140 | // else_loc_ is always "end of the else" 141 | // we use it for both purposes, assigning the #else always to the first 142 | // section for cleanup purposes 143 | source_ranges_.emplace_back(*else_loc_, 144 | endifloc.getLocWithOffset(-2)); 145 | source_ranges_pp_.emplace_back(*else_loc_, end_of_endif); 146 | // - an if of inverted sense without an else - empty range 147 | } else if (start_it->second.first != Sense) { 148 | // an empty range must have end before start... but some parts of Clang don't like 149 | // we will detect this case before passing it on to any part of Clang 150 | source_ranges_.emplace_back(clang::SourceRange()); 151 | source_ranges_pp_.emplace_back(clang::SourceRange()); 152 | } 153 | // - an if of desired sense with else (we found the range when we found the else) 154 | } 155 | } 156 | 157 | private: 158 | clang::LangOptions lopt_; 159 | clang::SourceManager const& sm_; 160 | std::string mname_; 161 | std::map > // where the if ends (last char of macro name) 165 | cond_starts_; 166 | 167 | std::optional else_loc_; // most recent "else", if any 168 | std::vector& source_ranges_; 169 | std::vector& source_ranges_pp_; 170 | 171 | }; 172 | 173 | template 174 | using RangeNodes = std::vector>; 175 | 176 | // We define our own type of "location" to be independent of any SourceManager 177 | // It's capable of being turned into a Replacement 178 | struct CondLocation 179 | { 180 | CondLocation(clang::SourceManager const& sm, 181 | clang::SourceLocation const& loc) 182 | : filename_(sm.getFilename(loc)), offset_(sm.getFileOffset(loc)) {} 183 | 184 | // so we can use in ordered containers 185 | bool operator<(const CondLocation& other) const { 186 | assert(other.filename_ == filename_); 187 | return (offset_ < other.offset_); 188 | } 189 | 190 | std::string const& getFilename() const { 191 | return filename_; 192 | } 193 | unsigned getFileOffset() const { 194 | return offset_; 195 | } 196 | 197 | private: 198 | std::string filename_; 199 | unsigned offset_; 200 | }; 201 | 202 | struct CondRange { 203 | CondRange(clang::SourceManager const& sm, 204 | clang::SourceRange const& range) 205 | : beg_(CondLocation(sm, range.getBegin())), 206 | end_(CondLocation(sm, range.getEnd())) {} 207 | 208 | CondLocation const& getBegin() const { 209 | return beg_; 210 | } 211 | 212 | CondLocation const& getEnd() const { 213 | return end_; 214 | } 215 | 216 | private: 217 | CondLocation beg_, end_; 218 | }; 219 | 220 | // a class representing a conditional region in the form of two enclosing regions 221 | // one which includes the preprocessor directives and which which doesn't 222 | struct CondRegion { 223 | 224 | CondRegion(CondRange const& contents, CondRange const& contents_incl_pp) 225 | : contents_(contents), contents_incl_pp_(contents_incl_pp) {} 226 | 227 | CondRange const& contents() const { 228 | return contents_; 229 | } 230 | 231 | CondRange const& contents_with_pp() const { 232 | return contents_incl_pp_; 233 | } 234 | 235 | private: 236 | CondRange contents_; // between #ifdef/#ifndef/#else and #else/#endif 237 | CondRange contents_incl_pp_; // including the enclosing directives (for cleanup) 238 | }; 239 | 240 | struct RegionStatementProperties { 241 | std::size_t count; // the number of statements found within the region 242 | }; 243 | 244 | template 245 | struct SourceFileHooks : clang::tooling::SourceFileCallbacks 246 | { 247 | SourceFileHooks(std::string mname, 248 | std::vector& source_ranges, 249 | std::vector& source_ranges_pp, 250 | std::vector>& cond_regions, 251 | RangeNodes const& decls, 252 | RangeNodes const& stmts, 253 | std::vector>& type_names, 254 | std::vector& stmt_props, 255 | clang::tooling::Replacements* replace, 256 | std::string& preamble) 257 | : mname_(mname), source_ranges_(source_ranges), source_ranges_pp_(source_ranges_pp), 258 | cond_regions_(cond_regions), ci_(nullptr), decls_(decls), stmts_(stmts), 259 | type_names_(type_names), stmt_props_(stmt_props), replace_(replace), preamble_(preamble) {} 260 | 261 | ~SourceFileHooks() override {} 262 | 263 | bool handleBeginSource(clang::CompilerInstance & ci) override { 264 | ci_ = &ci; 265 | // at this point the preprocessor has been initialized, so we cannot add definitions 266 | // we can, however, set up callbacks 267 | ci.getPreprocessor().addPPCallbacks( 268 | std::make_unique>(ci.getLangOpts(), ci.getSourceManager(), 269 | mname_, source_ranges_, source_ranges_pp_)); 270 | // when the preprocessor runs it will update source_ranges as it finds conditionals 271 | return true; 272 | } 273 | 274 | void handleEndSource() override { 275 | // return information about the source ranges we found and their contents 276 | // it seems that by the time the RefactoringTool finishes running some of the 277 | // compiler/ast information gets lost, so we do it here while we can still do lookups 278 | using namespace clang; 279 | SourceManager const* sm = &ci_->getSourceManager(); 280 | LangOptions lopt = ci_->getLangOpts(); 281 | 282 | // fill CondRegion container with default-constructed (therefore empty) ranges 283 | cond_regions_.resize(source_ranges_.size()); 284 | 285 | for ( std::size_t i = 0; i < source_ranges_.size(); ++i) { 286 | if (source_ranges_[i].isInvalid()) { 287 | // this means an empty range. We leave a placeholder here to sync 288 | // up the indices between senses. 289 | continue; 290 | } 291 | cond_regions_[i] = CondRegion(CondRange(*sm, source_ranges_[i]), 292 | CondRange(*sm, source_ranges_pp_[i])); 293 | std::cout << "The range "; 294 | print_decorated_source_range(std::cout, sm, lopt, source_ranges_[i]); 295 | if (source_ranges_[i].getBegin() == source_ranges_[i].getEnd()) { 296 | std::cout << " is empty\n"; 297 | continue; 298 | } 299 | if (decls_[i].empty() && stmts_[i].empty()) { 300 | std::cout << " contains nothing we are interested in\n"; 301 | } 302 | if (!stmts_[i].empty()) { 303 | std::cout << " contains " << stmts_[i].size() << " statements:\n"; 304 | for( Stmt const * stmt : stmts_[i] ) { 305 | print_source_range(std::cout, sm, lopt, stmt->getSourceRange()); 306 | std::cout << ";\n"; 307 | } 308 | } else if (!decls_[i].empty()) { 309 | std::cout << " contains declarations, but no statements\n"; 310 | } 311 | 312 | // create a replacement that removes this conditional range (including PP directives) 313 | if (replace_->add(tooling::Replacement(*sm, CharSourceRange(source_ranges_pp_[i], true), 314 | "", lopt))) { 315 | throw std::logic_error("unable to create Replacement to remove conditional range"); 316 | } 317 | } 318 | 319 | // post-process bits of the AST we gathered to produce refactoring info that persists 320 | // after this tool completes 321 | type_names_.resize(decls_.size()); 322 | for (std::size_t i = 0; i < decls_.size(); ++i) { 323 | for (auto decl : decls_[i]) { 324 | if (clang::TypedefDecl const* td = llvm::dyn_cast(decl)) { 325 | type_names_[i].emplace_back(td->getName()); 326 | } else if (auto ud = llvm::dyn_cast(decl)) { 327 | type_names_[i].emplace_back(ud->getName()); 328 | } 329 | // there should be no other types, actually, as we restrict to the 330 | // above two in the Matcher 331 | } 332 | } 333 | 334 | // figure out how many statements there are in each region 335 | // we won't add closures around code without statements 336 | stmt_props_.resize(stmts_.size()); 337 | for (std::size_t i = 0; i < stmts_.size(); ++i) { 338 | stmt_props_[i].count = stmts_[i].size(); 339 | // this is where we can record the statement text 340 | } 341 | 342 | // create a specialization for this sense of the target macro 343 | std::string cond_class; 344 | if (Sense) { 345 | // base template is the "true" case 346 | // Leveraging the fact that we run the "true" case first... 347 | cond_class += std::string("template\nstruct "); 348 | cond_class += (mname_ + "_class {\n"); 349 | } else { 350 | cond_class = std::string("template<>\nstruct "); 351 | cond_class += (mname_ + "_class {\n"); 352 | } 353 | for ( auto const& declgroup : decls_ ) { 354 | for ( auto decl : declgroup ) { 355 | cond_class += " "; 356 | cond_class += get_source_range(&decl->getASTContext().getSourceManager(), 357 | decl->getASTContext().getLangOpts(), 358 | decl->getSourceRange()); 359 | cond_class += ";\n"; 360 | } 361 | } 362 | cond_class += "};\n"; 363 | preamble_ += cond_class; 364 | 365 | } 366 | 367 | private: 368 | std::string mname_; 369 | std::vector& source_ranges_, source_ranges_pp_; 370 | std::vector>& cond_regions_; 371 | clang::CompilerInstance* ci_; 372 | RangeNodes const & decls_; 373 | RangeNodes const & stmts_; 374 | std::vector>& type_names_; 375 | std::vector& stmt_props_; 376 | clang::tooling::Replacements * replace_; 377 | std::string& preamble_; 378 | 379 | }; 380 | 381 | namespace custom_matchers { 382 | 383 | // define an AST matcher for a source location range 384 | // it will match all statements whose associated start/end locations are within the range 385 | AST_MATCHER_P(clang::Stmt, statementInRange, 386 | clang::SourceRange, range) { 387 | // true if the statement node is entirely within the supplied range 388 | // i.e. they can be coterminous but the statement cannot start before or end after 389 | clang::SourceManager const& sm = Finder->getASTContext().getSourceManager(); 390 | return !sm.isBeforeInTranslationUnit(Node.getBeginLoc(), range.getBegin()) && 391 | !sm.isBeforeInTranslationUnit(range.getEnd(), Node.getEndLoc()); 392 | } 393 | 394 | AST_MATCHER_P(clang::Decl, declInRange, 395 | clang::SourceRange, range) { 396 | clang::SourceManager const& sm = Finder->getASTContext().getSourceManager(); 397 | return !sm.isBeforeInTranslationUnit(Node.getBeginLoc(), range.getBegin()) && 398 | !sm.isBeforeInTranslationUnit(range.getEnd(), Node.getEndLoc()); 399 | } 400 | 401 | // BOZO can we do the above polymorphically in the node type (decl/stmt)? 402 | 403 | } 404 | 405 | // action for when we find a node within a source range 406 | template 407 | struct RangeMatchHandler : clang::ast_matchers::MatchFinder::MatchCallback { 408 | 409 | RangeMatchHandler(std::vector& nodes) : nodes_(nodes) {} 410 | 411 | virtual void run(clang::ast_matchers::MatchFinder::MatchResult const& result) { 412 | AstNode const * node = result.Nodes.getNodeAs("target"); 413 | nodes_.push_back(node); 414 | } 415 | 416 | private: 417 | std::vector& nodes_; // a place to store nodes within our range 418 | }; 419 | 420 | // Test hook to install matchers 421 | // We don't know which source ranges we want to find until preprocessing completes, 422 | // which means we have to set up matchers after parsing begins but before AST traversal 423 | // it's a little weird to use this test hook but it's exactly what we need 424 | struct MatcherInstaller : clang::ast_matchers::MatchFinder::ParsingDoneTestCallback { 425 | 426 | ~MatcherInstaller() override {} 427 | 428 | MatcherInstaller(clang::ast_matchers::MatchFinder& finder, 429 | std::vector const& ranges, 430 | RangeNodes& decl_nodes, 431 | RangeNodes& stmt_nodes, 432 | std::vector>& type_names) 433 | : finder_(finder), ranges_(ranges), 434 | decl_nodes_(decl_nodes), stmt_nodes_(stmt_nodes), type_names_(type_names) {} 435 | 436 | virtual void run() override { 437 | // install matchers for the given ranges into the finder 438 | using namespace clang::ast_matchers; 439 | using namespace custom_matchers; 440 | 441 | // ensure handler vectors don't resize, invalidating pointers 442 | decl_handlers_.reserve(ranges_.size()); 443 | stmt_handlers_.reserve(ranges_.size()); 444 | decl_nodes_.reserve(ranges_.size()); 445 | stmt_nodes_.reserve(ranges_.size()); 446 | 447 | for( auto const& range : ranges_ ) { 448 | // typedef matcher 449 | decl_nodes_.emplace_back(); 450 | type_names_.emplace_back(); 451 | decl_handlers_.emplace_back(decl_nodes_.back()); 452 | // statement matcher 453 | stmt_nodes_.emplace_back(); 454 | stmt_handlers_.emplace_back(stmt_nodes_.back()); 455 | 456 | if (range.isInvalid()) { 457 | // one of our empty ranges. Do not install finder/matcher 458 | // but keep placeholders so ranges line up between defined/undefined conditions 459 | continue; 460 | } 461 | 462 | // install range finder for conditional declarations 463 | finder_.addMatcher( 464 | decl( 465 | isExpansionInMainFile(), // not in an included header 466 | declInRange(range), // within target range 467 | anyOf(typedefDecl(), usingDecl()), // a type declaration 468 | decl().bind("target")), 469 | &decl_handlers_.back()); 470 | 471 | // install range finder for conditional statements 472 | finder_.addMatcher( 473 | stmt( // statement requirements: 474 | isExpansionInMainFile(), 475 | statementInRange(range), 476 | hasParent( 477 | compoundStmt()), // part of compound statement 478 | unless(declStmt( // not a type declaration 479 | hasSingleDecl( 480 | anyOf(typedefDecl(), usingDecl())))), 481 | stmt().bind("target")), // remember it 482 | &stmt_handlers_.back()); 483 | } 484 | } 485 | 486 | private: 487 | clang::ast_matchers::MatchFinder& finder_; 488 | std::vector const& ranges_; 489 | 490 | template 491 | using HandlerVec = std::vector>; 492 | HandlerVec decl_handlers_; 493 | HandlerVec stmt_handlers_; 494 | RangeNodes& decl_nodes_; 495 | RangeNodes& stmt_nodes_; 496 | // names of the types defined in each range 497 | // for "using" statement 498 | std::vector>& type_names_; 499 | }; 500 | 501 | // run a tool (usually a Finder) on an input file 502 | // Make the supplied macro be defined on the command line if Sense is true 503 | // optionally add source file callbacks to hook the beginning and end of each file processed 504 | template 505 | int runToolOnFile(FactoryT* consumerFactory, 506 | std::string mname, 507 | std::string fileName, 508 | clang::tooling::SourceFileCallbacks* cb = nullptr) { 509 | using namespace clang::tooling; 510 | // create a fake command line of type Clang tools accept 511 | std::vector args; 512 | args.push_back("c2p"); 513 | args.push_back(fileName.c_str()); 514 | // append -D for the "macro defined" case 515 | args.push_back("--"); 516 | args.push_back("-std=c++14"); 517 | if (Sense) { 518 | std::string define_macro("-D"); 519 | define_macro += mname; 520 | args.push_back(define_macro.c_str()); 521 | } 522 | // prepare tool arguments 523 | // avoiding the use of CommonOptionsParser, which uses statics... 524 | int args_c = args.size(); 525 | std::string error_msg; 526 | std::unique_ptr 527 | compdb(FixedCompilationDatabase::loadFromCommandLine(args_c, args.data(), error_msg)); 528 | std::vector comp_file_list(1, fileName); 529 | 530 | // define the tool from those options 531 | RefactoringTool tool(*compdb, llvm::ArrayRef(comp_file_list)); 532 | 533 | return tool.run(newFrontendActionFactory(consumerFactory, cb).get()); 534 | } 535 | 536 | template 537 | int runToolOnString(FactoryT* consumerFactory, 538 | std::string mname, 539 | std::string contents, 540 | clang::tooling::SourceFileCallbacks* cb = nullptr) { 541 | using namespace clang::tooling; 542 | // create a fake command line of type Clang tools accept 543 | std::vector args; 544 | // append -D for the "macro defined" case 545 | args.push_back("-std=c++14"); 546 | if (Sense) { 547 | std::string define_macro("-D"); 548 | define_macro += mname; 549 | args.push_back(define_macro.c_str()); 550 | } 551 | 552 | int result = runToolOnCodeWithArgs(newFrontendActionFactory(consumerFactory, cb)->create(), 553 | contents, args); 554 | return result; 555 | } 556 | 557 | template 558 | int FindConditionalNodes(std::string mname, 559 | std::string fileName, 560 | // result storage 561 | std::vector>& cond_regions, 562 | std::vector>& typedefs, 563 | std::vector& stmt_props, 564 | clang::tooling::Replacements& replacements, 565 | std::string& preamble) 566 | { 567 | using namespace clang; 568 | using namespace clang::tooling; 569 | using namespace clang::ast_matchers; 570 | 571 | // strictly for communication between match callbacks and the source file hooks 572 | // which are defined in this scope. This data becomes invalid after the tool is run: 573 | std::vector source_ranges, source_ranges_pp; 574 | RangeNodes decls; 575 | RangeNodes stmts; 576 | 577 | // non-Clang stuff can and will outlive the tool though: 578 | std::vector> 579 | type_names; // types defined in each range 580 | 581 | // create callbacks for storing the conditional ranges as the preprocessor finds them 582 | SourceFileHooks source_hooks(mname, source_ranges, source_ranges_pp, 583 | cond_regions, decls, stmts, 584 | type_names, stmt_props, 585 | &replacements, preamble); 586 | 587 | // use test hook to set up range matchers: after preprocessing, but before AST visitation 588 | MatchFinder finder; 589 | MatcherInstaller set_up_source_ranges(finder, source_ranges, decls, stmts, type_names); 590 | finder.registerTestCallbackAfterParsing(&set_up_source_ranges); 591 | 592 | 593 | std::cout << "Conditional source ranges for when FOO is "; 594 | std::cout << (Sense ? "defined" : "not defined") << ":\n"; 595 | // run the tool 596 | if (int result = runToolOnFile(&finder, mname, fileName, &source_hooks)) { 597 | return result; 598 | } 599 | 600 | if (!Sense) { 601 | // choose a specialization 602 | std::string choose_condition("#ifdef "); 603 | choose_condition += (mname + "\n"); 604 | choose_condition += (" using " + mname + "_t = " + mname + "_class;\n"); 605 | choose_condition += "#else\n"; 606 | choose_condition += (" using " + mname + "_t = " + mname + "_class;\n"); 607 | choose_condition += "#endif\n"; 608 | 609 | preamble += choose_condition; 610 | } 611 | 612 | // remember the types that were defined in this condition 613 | typedefs.resize(type_names.size()); 614 | for( std::size_t i = 0; i < type_names.size(); ++i) { 615 | // uniquify by range via set insertion 616 | std::copy(type_names[i].begin(), type_names[i].end(), 617 | std::inserter(typedefs[i], typedefs[i].end())); 618 | } 619 | 620 | return 0; 621 | } 622 | 623 | // a list of variable names and types found captured by a single lambda 624 | struct capture_t { 625 | std::string varname; 626 | std::string vartype; 627 | bool is_const; // presently unimplemented/unused 628 | }; 629 | using capture_list_t = std::vector; 630 | 631 | // trick: instantiate one of these for each lambda we insert 632 | // that way we don't have to deal with interpreting the lambda name 633 | // callback will keep a reference to the statement region properties' capture list 634 | struct CaptureReporter : clang::ast_matchers::MatchFinder::MatchCallback { 635 | CaptureReporter(capture_list_t& captures) : captures_(captures) {} 636 | virtual void run(clang::ast_matchers::MatchFinder::MatchResult const& result) override { 637 | using namespace clang; 638 | LambdaExpr const * lambda = result.Nodes.getNodeAs("lambda"); 639 | // record each of its captures 640 | for (clang::LambdaCapture const & lc : lambda->captures()) { 641 | if (lc.capturesVariable()) { 642 | capture_t capture; 643 | capture.varname = lc.getCapturedVar()->getQualifiedNameAsString(); 644 | capture.vartype = lc.getCapturedVar()->getType().getAsString(); 645 | std::cerr << "recording variable " << capture.varname << " type " << capture.vartype << "\n"; 646 | captures_.push_back(capture); 647 | } 648 | } 649 | } 650 | private: 651 | capture_list_t& captures_; 652 | }; 653 | 654 | // Edit text to surround conditional regions containing statements with a lambda capture 655 | using cond_region_list_t = std::vector>; 656 | std::string 657 | annotate_conditionals_with_lambdas(std::string const& body, 658 | std::vector const& stmt_props, 659 | cond_region_list_t const & cond_regions) { 660 | // create replacements list to add lambdas for this sense 661 | using namespace clang::tooling; 662 | Replacements replacements; 663 | std::size_t lambda_counter = 0; 664 | for (auto region : cond_regions) { 665 | // the region may be empty, which indicates that it wasn't present, i.e. 666 | // is the other sense of something that lacked an "else". We want the indices 667 | // to stay in sync with the other sense, so: 668 | if (!region || 669 | !stmt_props[lambda_counter].count) { // also: no statements inside this region 670 | lambda_counter++; 671 | continue; 672 | } 673 | 674 | std::string closure_name("_cond_statement_closure_"); 675 | closure_name += std::to_string(lambda_counter++); 676 | std::string lambda_start = "\nauto " + closure_name + " = [&]() {\n"; 677 | // a replacement to insert this at the beginning of the conditional region 678 | auto cond_range = region->contents(); 679 | if (replacements.add( 680 | Replacement(cond_range.getBegin().getFilename(), 681 | cond_range.getBegin().getFileOffset(), 682 | 0, lambda_start))) { 683 | throw std::logic_error("unable to create Replacement from lambda preamble"); 684 | } 685 | // close the end 686 | std::string lambda_end("};\n"); 687 | lambda_end += closure_name + "();\n"; // execute lambda to retain semantic equivalence 688 | if (replacements.add( 689 | Replacement(cond_range.getEnd().getFilename(), 690 | cond_range.getEnd().getFileOffset() + 1, 691 | 0, lambda_end))) { 692 | throw std::logic_error("unable to create Replacement from lambda execution"); 693 | } 694 | } 695 | 696 | // perform replacements on string 697 | auto result = applyAllReplacements(body, replacements); 698 | if (!result) { 699 | throw std::logic_error("unable to create Replacement from lambda execution"); 700 | } 701 | return result.get(); 702 | } 703 | 704 | // take lambda-annotated code and produce a list of captures 705 | template 706 | std::vector 707 | analyze_captures(std::string const& annotated_code, 708 | std::string const& mname, 709 | std::vector const& stmt_props, 710 | cond_region_list_t const& cond_regions) { 711 | 712 | using namespace clang::tooling; 713 | using namespace clang::ast_matchers; 714 | 715 | std::vector result(cond_regions.size()); 716 | std::vector matchers; 717 | std::vector reporters; 718 | 719 | // create a set of matchers for the generated closures, with related callbacks 720 | // to record each captured variable 721 | MatchFinder finder; 722 | for (std::size_t i = 0; i < cond_regions.size(); ++i) { 723 | matchers.emplace_back( 724 | varDecl( 725 | hasType(autoType()), 726 | matchesName("_cond_statement_closure_" + std::to_string(i)), 727 | hasInitializer( 728 | cxxConstructExpr( 729 | hasDescendant( // BOZO prefer to use something more specific? 730 | lambdaExpr().bind("lambda")))))); 731 | reporters.emplace_back(result[i]); 732 | 733 | if (!cond_regions[i] || !stmt_props[i].count) { 734 | // a nonexistent region or one with no statements 735 | continue; 736 | } 737 | // run matcher to map lambda names to capture list 738 | finder.addMatcher(matchers[i], &reporters[i]); 739 | } 740 | 741 | // run tool with matchers and callbacks to produce result 742 | runToolOnString(&finder, mname, annotated_code); 743 | 744 | // return capture list 745 | return result; 746 | } 747 | 748 | 749 | // figure out the parameters of the static member function we will generate 750 | // by running matchers etc. 751 | template 752 | std::vector 753 | vars_used(std::string const& mname, 754 | std::string filename, 755 | std::vector const& stmt_props, 756 | cond_region_list_t const& cond_regions) { 757 | 758 | // get file into a string 759 | std::ifstream file(filename); 760 | if (!file.is_open()) { 761 | throw std::runtime_error("could not open file " + filename); 762 | } 763 | file.unsetf(std::ios::skipws); 764 | std::string contents{std::istream_iterator(file), 765 | std::istream_iterator()}; 766 | 767 | // annotate file contents with lambdas surrounding conditionals containing statements 768 | std::string annotatedContents = annotate_conditionals_with_lambdas(contents, stmt_props, cond_regions); 769 | 770 | // Run capture analysis on the annotated contents to collect statements 771 | return analyze_captures(annotatedContents, mname, stmt_props, cond_regions); 772 | } 773 | 774 | // actually generate a string containing the static member function 775 | // to implement 776 | template 777 | std::string 778 | fn_wrap_statements(std::string statements, 779 | std::string fn_name, // also macro name? or class name? 780 | capture_list_t const & params); 781 | 782 | 783 | int main(int argc, char const **argv) { 784 | 785 | using namespace clang; 786 | 787 | if (argc != 3) { 788 | std::cerr << "usage: " << argv[0] << " MACRO filename\n"; 789 | return 1; 790 | } 791 | 792 | /* 793 | * Evaluate macro defined condition 794 | */ 795 | 796 | // when tool run completes we will have the following data: 797 | tooling::Replacements replacements; // modification instructions 798 | std::string preamble; // definitions inserted at top of file 799 | 800 | // build and run for "defined" case 801 | cond_region_list_t cond_regions_defined; // source region for each ifdef 802 | std::vector> typedefs_defined; 803 | std::vector stmt_props_defined; 804 | if (int result = FindConditionalNodes(argv[1], argv[2], cond_regions_defined, 805 | typedefs_defined, stmt_props_defined, 806 | replacements, preamble)) { 807 | return result; 808 | } 809 | 810 | // and the same for the "undefined" case: 811 | cond_region_list_t cond_regions_undefined; 812 | std::vector> typedefs_undefined; 813 | std::vector stmt_props_undefined; 814 | if (int result = FindConditionalNodes(argv[1], argv[2], cond_regions_undefined, 815 | typedefs_undefined, stmt_props_undefined, 816 | replacements, preamble)) { 817 | return result; 818 | } 819 | 820 | // analyze statements in each conditional region 821 | auto vars_defined = vars_used(argv[1], argv[2], stmt_props_defined, cond_regions_defined); 822 | std::cout << "TRUE case:\n"; 823 | for (std::size_t i = 0; i < vars_defined.size(); ++i) { 824 | if (cond_regions_defined[i] && stmt_props_defined[i].count) { 825 | std::cout << "capture expression " << i << " produced the following parameters:\n"; 826 | for (auto capture: vars_defined[i]) { 827 | std::cout << capture.varname << " type " << capture.vartype << "\n"; 828 | } 829 | } 830 | } 831 | 832 | auto vars_undefined = vars_used(argv[1], argv[2], stmt_props_undefined, cond_regions_undefined); 833 | std::cout << "FALSE case:\n"; 834 | for (std::size_t i = 0; i < vars_undefined.size(); ++i) { 835 | if (cond_regions_undefined[i] && stmt_props_undefined[i].count) { 836 | std::cout << "capture expression " << i << " produced the following parameters:\n"; 837 | for (auto capture: vars_undefined[i]) { 838 | std::cout << capture.varname << " type " << capture.vartype << "\n"; 839 | } 840 | } 841 | } 842 | 843 | // if any conditional regions have matching (in name) type declarations, 844 | // replace with a single one referring to the chosen specialization 845 | for (std::size_t i = 0; i < cond_regions_defined.size(); ++i) { 846 | if (!cond_regions_defined[i] || !cond_regions_undefined[i]) { 847 | continue; 848 | } 849 | // put using statement right before directive that starts the conditional region 850 | CondLocation start = std::min(cond_regions_defined[i]->contents_with_pp().getBegin(), 851 | cond_regions_undefined[i]->contents_with_pp().getBegin()); 852 | std::string mname(argv[1]); 853 | std::set_intersection( 854 | typedefs_defined[i].begin(), typedefs_defined[i].end(), 855 | typedefs_undefined[i].begin(), typedefs_undefined[i].end(), 856 | // for each type declared in BOTH configurations: 857 | boost::make_function_output_iterator( 858 | [&](std::string const& t) { 859 | // insert a using statement in the body: 860 | std::string tdef(" using " + t + " = " + mname + "_t::" + t + ";\n"); 861 | if (replacements.add( 862 | tooling::Replacement(start.getFilename(), 863 | start.getFileOffset(), 864 | 0, tdef))) { 865 | throw std::logic_error("unable to create Replacement from using statement for type"); 866 | }; 867 | })); 868 | } 869 | 870 | // finally add the class definition, specialization, and using statements 871 | if (replacements.add( 872 | tooling::Replacement(clang::tooling::getAbsolutePath(argv[2]), 0, 0, preamble))) { 873 | throw std::logic_error("failed to insert preamble as a Replacement"); 874 | } 875 | 876 | std::cerr << "replacements:\n"; 877 | for ( auto const& rep : replacements ) { 878 | std::cerr << rep.toString() << "\n"; 879 | } 880 | 881 | // apply all replacements to original source file 882 | // (code from RefactoringTool::runAndSave) 883 | IntrusiveRefCntPtr diag_opts = new DiagnosticOptions(); 884 | TextDiagnosticPrinter tdp(llvm::errs(), &*diag_opts); 885 | DiagnosticsEngine diagnostics( 886 | IntrusiveRefCntPtr(new DiagnosticIDs()), 887 | &*diag_opts, &tdp, false); 888 | FileManager fm{FileSystemOptions()}; 889 | SourceManager sources(diagnostics, fm); 890 | 891 | LangOptions DefaultLangOptions; 892 | Rewriter rewriter(sources, DefaultLangOptions); 893 | if (!tooling::applyAllReplacements(replacements, rewriter)) { 894 | std::cerr << "rewriting of source file failed\n"; 895 | return 1; 896 | } 897 | if (rewriter.overwriteChangedFiles()) { 898 | std::cerr << "failed to save results\n"; 899 | return 2; 900 | } 901 | 902 | 903 | return 0; 904 | } 905 | -------------------------------------------------------------------------------- /qi_token.hpp: -------------------------------------------------------------------------------- 1 | // Support for Spirit V2 (Qi) parsing with Wave tokens 2 | // Copyright (c) 2021 Jeffrey Trull 3 | // Copyright (c) 2001-2012 Hartmut Kaiser 4 | // 5 | // Distributed under the Boost Software License, Version 1.0. (See accompanying 6 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | // we need to wrap cpplexer's tokens so they can be used as Spirit V2 Lex 18 | // tokens compatible with qi::token 19 | template 20 | class qi_token : private boost::wave::cpplexer::lex_token 21 | { 22 | // pretend to be a lexertl token with flexible attributes 23 | // model: lex::lexertl::token, 25 | // mpl::false_> 26 | 27 | typedef typename boost::wave::cpplexer::lex_token base_type; 28 | 29 | public: 30 | typedef typename base_type::string_type base_string_t; 31 | typedef typename base_type::string_type string_type; 32 | typedef typename string_type::const_iterator base_string_iter_t; 33 | typedef PositionT position_type; 34 | 35 | // requirements from Spirit V2 36 | typedef boost::wave::token_id id_type; 37 | typedef base_string_iter_t iterator_type; 38 | typedef boost::mpl::false_ has_state; 39 | typedef std::pair token_value_type; 40 | 41 | qi_token() {} 42 | qi_token(int dummy) : base_type(dummy) {} 43 | qi_token(id_type id, string_type const & value, PositionT pos) 44 | : base_type(id, value, pos) {} 45 | 46 | id_type id() const { 47 | // apply user-defined conversion to id_type 48 | return static_cast(*this); 49 | } 50 | operator id_type() const { return id(); } 51 | 52 | bool eoi() const { 53 | return static_cast(*this).is_eoi(); 54 | } 55 | 56 | // returns the Qi token value (get_value() supplies the Wave value) 57 | token_value_type value() const { 58 | return std::pair( 59 | get_value(), static_cast(this)->get_position()); 60 | } 61 | 62 | // Wave requirements delegated to base class 63 | 64 | bool operator==(qi_token const & other) const { 65 | return static_cast(*this) == static_cast(other); 66 | } 67 | 68 | string_type const & get_value() const { 69 | return static_cast(*this).get_value(); 70 | } 71 | 72 | bool is_valid() const { 73 | return static_cast(*this).is_valid(); 74 | } 75 | 76 | // Spirit V2 debugging 77 | 78 | #if defined(BOOST_SPIRIT_DEBUG) 79 | friend std::ostream& 80 | operator<< (std::ostream &os, qi_token const & tok) { 81 | using namespace boost::wave; 82 | auto id = token_id(tok); 83 | os << get_token_name(id) << "("; 84 | if (id == T_NEWLINE) { 85 | os << "\\n"; 86 | } else { 87 | os << tok.get_value(); 88 | } 89 | os << ")" ; 90 | return os; 91 | } 92 | #endif 93 | }; 94 | 95 | // 96 | // Spirit V2 helper function requirements for token (see lex/lexer/lexertl/token.hpp) 97 | // 98 | 99 | template 100 | inline bool 101 | token_is_valid(qi_token const & t) 102 | { 103 | return t.is_valid(); 104 | } 105 | 106 | // 107 | // Spirit.Qi customization points 108 | // 109 | 110 | namespace boost { namespace spirit { namespace traits 111 | { 112 | 113 | // 114 | // Teach Spirit how to get data from our token into attributes 115 | // 116 | 117 | // generally following Spirit.Lex's lexertl/token.hpp here 118 | 119 | // string or range requests the underlying char data 120 | template 121 | struct assign_to_attribute_from_value > 122 | { 123 | static void 124 | call(qi_token const & tok, StringT & attr) 125 | { 126 | // use the Wave accessor to get the string data 127 | attr = StringT(boost::begin(tok.value().first), 128 | boost::end(tok.value().first)); 129 | 130 | } 131 | }; 132 | template 133 | struct assign_to_container_from_value > 134 | : assign_to_attribute_from_value > 135 | {}; 136 | 137 | // if the user wants position data instead 138 | template 139 | struct assign_to_attribute_from_value > 140 | { 141 | static void 142 | call(qi_token const & tok, PositionT & attr) 143 | { 144 | attr = tok.value().second; 145 | } 146 | }; 147 | // we don't support assigning positions to "containers" 148 | 149 | // if the user wants both position and string value 150 | template 151 | struct assign_to_attribute_from_value< 152 | std::pair, qi_token > 153 | { 154 | static void 155 | call(qi_token const & tok, std::pair & attr) 156 | { 157 | // delegate to existing handlers 158 | assign_to_attribute_from_value >::call(tok, attr.first); 159 | assign_to_attribute_from_value >::call(tok, attr.second); 160 | } 161 | }; 162 | 163 | // Support debug output 164 | template 165 | struct token_printer_debug > 166 | { 167 | typedef qi_token token_type; 168 | 169 | template 170 | static void print(Out& out, token_type const & val) 171 | { 172 | out << '[' << val << ']'; 173 | } 174 | }; 175 | 176 | }}} 177 | 178 | // Adapt underlying token iterator from cpplexer (Wave) to one compatible with Spirit V2 179 | // requires adding a special typedef and returning Spirit-compatible tokens 180 | template 181 | struct qi_lex_iterator : boost::wave::cpplexer::lex_iterator 182 | { 183 | using base_type = boost::wave::cpplexer::lex_iterator; 184 | using position_type = typename TokenT::position_type; 185 | 186 | // add the typedef that qi::token requires 187 | using base_iterator_type = typename TokenT::string_type::const_iterator; 188 | 189 | // forward constructors 190 | qi_lex_iterator() {} 191 | template 192 | qi_lex_iterator(IteratorT beg, IteratorT end, position_type pos, 193 | boost::wave::language_support lang) 194 | : base_type(beg, end, pos, lang) {} 195 | 196 | }; 197 | 198 | -------------------------------------------------------------------------------- /style_detect.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Sample program demonstrating extracting position information from C++ code 3 | * 4 | * Copyright (C) 2021 Jeff Trull 5 | * 6 | * Distributed under the Boost Software License, Version 1.0. (See accompanying 7 | * file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 8 | * 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | // this code is intended to only require C++11; if you can, use the C++17 versions instead: 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #ifdef BOOST_SPIRIT_DEBUG 25 | #include 26 | #include 27 | #endif 28 | 29 | #include "qi_token.hpp" 30 | 31 | #include 32 | 33 | 34 | template 35 | struct skipper : boost::spirit::qi::grammar 36 | { 37 | skipper() : skipper::base_type(skipped) 38 | { 39 | using namespace boost::spirit::qi; 40 | using namespace boost::wave; 41 | 42 | skipped = +(token(T_CCOMMENT) | token(T_CPPCOMMENT) | token(T_SPACE) | token(T_NEWLINE)); 43 | } 44 | private: 45 | boost::spirit::qi::rule skipped; 46 | }; 47 | 48 | // forward declaration of statement types, so we can use them in recursive structures 49 | template 50 | struct empty_stmt_t; 51 | 52 | template 53 | struct expr_stmt_t; 54 | 55 | template 56 | struct if_stmt_t; 57 | 58 | template 59 | struct else_t; // the else clause of an if 60 | 61 | template 62 | struct while_stmt_t; 63 | 64 | template 65 | struct for_stmt_t; 66 | 67 | // a type for "any statement" we can refer to in defining child statements 68 | template 69 | using simple_stmt_t = boost::variant, 70 | if_stmt_t, 71 | while_stmt_t, 72 | for_stmt_t, 73 | expr_stmt_t>; 74 | 75 | // handle compound (braced list) statements 76 | template 77 | struct compound_stmt_t; 78 | 79 | template 80 | using stmt_t = boost::variant, 81 | compound_stmt_t>; 82 | 83 | BOOST_FUSION_DEFINE_TPL_STRUCT( 84 | (Position), 85 | , 86 | compound_stmt_t, 87 | (Position, initial_brace) 88 | (std::vector>, statements) 89 | (Position, final_brace) 90 | ) 91 | 92 | BOOST_FUSION_DEFINE_TPL_STRUCT( 93 | (Position), 94 | , 95 | empty_stmt_t, 96 | (Position, semi) // location of the semicolon 97 | ) 98 | 99 | BOOST_FUSION_DEFINE_TPL_STRUCT( 100 | (Position), 101 | , 102 | expr_stmt_t, 103 | (Position, start) // location of the first token 104 | ) 105 | 106 | // attribute for if statements 107 | BOOST_FUSION_DEFINE_TPL_STRUCT( 108 | (Position), 109 | , 110 | if_stmt_t, 111 | (Position, kwd) // location of "if" 112 | (boost::recursive_wrapper>, stmt) // location of true branch statement 113 | (boost::optional>, else_clause) // else clause, if present 114 | ) 115 | 116 | // attribute for else clause of if 117 | BOOST_FUSION_DEFINE_TPL_STRUCT( 118 | (Position), 119 | , 120 | else_t, 121 | (Position, kwd) // location of "else" 122 | (boost::recursive_wrapper>, stmt) // location of statement 123 | ) 124 | 125 | // attribute for while loops 126 | BOOST_FUSION_DEFINE_TPL_STRUCT( 127 | (Position), 128 | , 129 | while_stmt_t, 130 | (Position, kwd) // where we found "while" 131 | (boost::recursive_wrapper>, stmt) 132 | ) 133 | 134 | // and for for loops 135 | BOOST_FUSION_DEFINE_TPL_STRUCT( 136 | (Position), 137 | , 138 | for_stmt_t, 139 | (Position, kwd) // where we found "for" 140 | (boost::recursive_wrapper>, stmt) 141 | ) 142 | 143 | // functions 144 | BOOST_FUSION_DEFINE_TPL_STRUCT( 145 | (Position), 146 | , 147 | func_t, 148 | (boost::optional, tmpl_expr) 149 | (Position, retval) 150 | (Position, name) 151 | (Position, lparen) 152 | (std::vector, params) 153 | (Position, rparen) 154 | (compound_stmt_t, body) 155 | ) 156 | 157 | // namespaces: contain functions and other namespaces 158 | template struct ns_t; 159 | // create an alias to get around multiple comma issue in Fusion (due to macros) 160 | template 161 | using ns_var_t = boost::variant, ns_t>; 162 | 163 | BOOST_FUSION_DEFINE_TPL_STRUCT( 164 | (Position), 165 | , 166 | ns_t, 167 | (Position, ns) 168 | (boost::optional, name) 169 | (Position, lbrace) 170 | (std::vector >, contents) // use our alias here 171 | (Position, rbrace) 172 | ) 173 | 174 | #ifdef BOOST_SPIRIT_DEBUG 175 | // supply printers for special types we use 176 | using boost::fusion::operators::operator<<; 177 | 178 | namespace boost { 179 | 180 | template 181 | Out& operator<<(Out& out, recursive_wrapper const & val) 182 | { 183 | out << val.get(); 184 | return out; 185 | } 186 | 187 | } 188 | 189 | namespace std { 190 | 191 | template 192 | Out& operator<<(Out& out, std::vector const & val) 193 | { 194 | out << "["; 195 | std::copy(std::begin(val), std::end(val), std::ostream_iterator(cout, " ")); 196 | out << "]"; 197 | return out; 198 | } 199 | 200 | } 201 | 202 | #endif // BOOST_SPIRIT_DEBUG 203 | 204 | template 205 | using result_t = 206 | std::vector< 207 | boost::variant, 208 | ns_t>>; 209 | 210 | template 211 | struct cpp_indent : boost::spirit::qi::grammar, result_t()> 212 | { 213 | cpp_indent() : cpp_indent::base_type(cppfile) 214 | { 215 | using namespace boost::wave; 216 | using namespace boost::spirit; 217 | namespace phx = boost::phoenix; 218 | using qi::token; 219 | using qi::tokenid_mask; 220 | using qi::omit; 221 | 222 | qi::as as_position; 223 | 224 | // operators are, in token mask terms, a subset of keywords 225 | kwd = tokenid_mask(KeywordTokenType) - tokenid_mask(OperatorTokenType) ; 226 | integral_type_kwd = 227 | token(T_BOOL) | token(T_INT) | token(T_CHAR) | token(T_LONG) | 228 | token(T_SHORT) | token(T_UNSIGNED) ; 229 | type_kwd = 230 | integral_type_kwd | 231 | token(T_FLOAT) | token(T_DOUBLE) | token(T_VOID) | token(T_AUTO) ; 232 | 233 | any_token = token(~0); // treated internally as an "all mask" 234 | // did not use tokenid_mask here because it only exposes the tokenid, not the position! 235 | 236 | // thoughts 237 | // we need at the top level: 238 | // 1) rules for if, while, etc. that inherit the original indentation or calculate it 239 | // based on the position of the keyword? Probably the latter. 240 | // 2) rules for braced-expression, unbraced, etc. or just "expression" that count 241 | // parens/braces but are otherwise pretty generic 242 | // 3) crap out if we detect a tab 243 | // 4) if we don't know what we are looking at, skip current token and try again, 244 | // possibly with adjustments if it's a brace or paren? Or maybe not. 245 | // also skip any spaces/newlines after it. 246 | 247 | ident = token(T_IDENTIFIER) ; 248 | 249 | name = 250 | as_position[ident] >> *omit[ident | token(T_COLON_COLON) | token(T_TYPENAME) | 251 | token(T_LESS) | token(T_GREATER) ] ; 252 | 253 | type_expr = 254 | ( type_kwd >> *omit[type_kwd] ) | 255 | name | 256 | ( token(T_DECLTYPE) >> omit[token(T_LEFTPAREN) >> expr >> token(T_RIGHTPAREN)] ) ; 257 | 258 | // an expression is a series of tokens not including semicolons or keywords, 259 | // with balanced parens/braces 260 | // its attribute is the position of the first token 261 | plain_expr_tok = 262 | (any_token - token(T_SEMICOLON) - kwd 263 | - token(T_LEFTPAREN) - token(T_LEFTBRACE) 264 | - token(T_RIGHTPAREN) - token(T_RIGHTBRACE)) ; 265 | 266 | expr = 267 | (as_position[token(T_LEFTPAREN)][_val = _1] >> expr >> token(T_RIGHTPAREN)) | 268 | (as_position[token(T_LEFTBRACE)][_val = _1] >> expr >> token(T_RIGHTBRACE)) | 269 | // function call - does not cover all possibilities, just plain identifier: 270 | (as_position[token(T_IDENTIFIER)][_val = _1] >> 271 | token(T_LEFTPAREN) >> -expr >> token(T_RIGHTPAREN)) | 272 | (plain_expr_tok[_val = _1] >> *plain_expr_tok) ; 273 | 274 | empty_stmt = as_position[token(T_SEMICOLON)] ; 275 | 276 | expr_stmt = expr >> omit[*expr] >> token(T_SEMICOLON) ; 277 | 278 | if_stmt = token(T_IF) >> 279 | omit[token(T_LEFTPAREN) >> expr >> token(T_RIGHTPAREN)] >> 280 | stmt >> -else_clause ; 281 | 282 | else_clause = token(T_ELSE) >> stmt ; 283 | 284 | while_stmt = token(T_WHILE) >> 285 | omit[ token(T_LEFTPAREN) >> expr >> token(T_RIGHTPAREN) ] >> 286 | stmt ; 287 | 288 | for_stmt = token(T_FOR) >> 289 | omit[token(T_LEFTPAREN) >> 290 | -(-type_kwd >> expr) >> token(T_SEMICOLON) >> 291 | -expr >> token(T_SEMICOLON) >> 292 | -expr >> token(T_RIGHTPAREN)] >> 293 | stmt ; 294 | 295 | simple_stmt = 296 | empty_stmt | if_stmt | while_stmt | for_stmt | expr_stmt ; 297 | 298 | compound_stmt = 299 | token(T_LEFTBRACE) >> (*stmt) >> token(T_RIGHTBRACE); 300 | 301 | stmt = simple_stmt | compound_stmt ; 302 | 303 | func = 304 | -(token(T_TEMPLATE) >> 305 | omit[(token(T_LESS) >> 306 | (((token(T_TYPENAME) | token(T_CLASS)) >> ident) | // type 307 | (integral_type_kwd >> ident)) // integral constant 308 | % token(T_COMMA)) >> 309 | token(T_GREATER)]) >> 310 | as_position[type_expr] >> as_position[name] >> 311 | token(T_LEFTPAREN) >> 312 | -((as_position[type_expr] >> -omit[name]) % token(T_COMMA)) >> 313 | token(T_RIGHTPAREN) >> 314 | compound_stmt ; 315 | 316 | ns = 317 | token(T_NAMESPACE) >> -token(T_IDENTIFIER) >> token(T_LEFTBRACE) >> 318 | // within namespaces we expect functions and other namespaces 319 | *(func | ns | 320 | // but also, if we get confused we will skip a token and retry 321 | omit[any_token - token(T_RIGHTBRACE)]) >> 322 | token(T_RIGHTBRACE) ; 323 | 324 | cppfile = *(ns | func | // something we understood, or 325 | omit[any_token - token(T_EOF)]) // a catchall to skip one token and retry 326 | >> omit[token(T_EOF)]; // consume all input 327 | 328 | BOOST_SPIRIT_DEBUG_NODE(any_token); 329 | BOOST_SPIRIT_DEBUG_NODE(name); 330 | BOOST_SPIRIT_DEBUG_NODE(empty_stmt); 331 | BOOST_SPIRIT_DEBUG_NODE(type_expr); 332 | BOOST_SPIRIT_DEBUG_NODE(expr_stmt); 333 | BOOST_SPIRIT_DEBUG_NODE(if_stmt); 334 | BOOST_SPIRIT_DEBUG_NODE(else_clause); 335 | BOOST_SPIRIT_DEBUG_NODE(while_stmt); 336 | BOOST_SPIRIT_DEBUG_NODE(for_stmt); 337 | BOOST_SPIRIT_DEBUG_NODE(plain_expr_tok); 338 | BOOST_SPIRIT_DEBUG_NODE(expr); 339 | BOOST_SPIRIT_DEBUG_NODE(simple_stmt); 340 | BOOST_SPIRIT_DEBUG_NODE(compound_stmt); 341 | BOOST_SPIRIT_DEBUG_NODE(stmt); 342 | BOOST_SPIRIT_DEBUG_NODE(func); 343 | BOOST_SPIRIT_DEBUG_NODE(ns); 344 | BOOST_SPIRIT_DEBUG_NODE(cppfile); 345 | 346 | } 347 | private: 348 | using position_t = typename Iterator::position_type; 349 | 350 | template 351 | using rule = boost::spirit::qi::rule, Attr()> ; 352 | template 353 | using rule_no_skipper = boost::spirit::qi::rule ; 354 | using rule_no_attr = boost::spirit::qi::rule> ; 355 | 356 | rule, ns_t>>> cppfile; 357 | 358 | rule> ns; 359 | 360 | rule any_token; 361 | rule plain_expr_tok; 362 | rule expr; 363 | rule type_kwd; 364 | rule integral_type_kwd; 365 | rule type_expr; 366 | rule_no_skipper ident; 367 | rule_no_skipper name; 368 | 369 | rule> simple_stmt; 370 | rule> compound_stmt; 371 | rule> stmt; 372 | 373 | rule> empty_stmt; 374 | rule> if_stmt; 375 | rule> else_clause; 376 | rule> while_stmt; 377 | rule> for_stmt; 378 | rule> expr_stmt; 379 | 380 | rule> func; 381 | 382 | rule_no_attr kwd; 383 | }; 384 | 385 | struct stat_reporter : boost::static_visitor 386 | { 387 | stat_reporter() 388 | : num_ns(0), num_funcs(0), num_empty_stmts(0), 389 | num_if_stmts(0), num_while_stmts(0), num_for_stmts(0), num_expr_stmts(0) 390 | {} 391 | 392 | template 393 | void operator()(empty_stmt_t const &) { ++num_empty_stmts; } 394 | 395 | template 396 | void operator()(expr_stmt_t const &) { ++num_expr_stmts; } 397 | 398 | template 399 | void operator()(if_stmt_t const & s) 400 | { 401 | ++num_if_stmts; 402 | boost::apply_visitor(*this, s.stmt.get()); 403 | if (s.else_clause) 404 | boost::apply_visitor(*this, (*s.else_clause).stmt.get()); 405 | } 406 | 407 | template 408 | void operator()(while_stmt_t const & s) 409 | { 410 | ++num_while_stmts; 411 | boost::apply_visitor(*this, s.stmt.get()); 412 | } 413 | 414 | template 415 | void operator()(for_stmt_t const & s) 416 | { 417 | ++num_for_stmts; 418 | boost::apply_visitor(*this, s.stmt.get()); 419 | } 420 | 421 | template 422 | void operator()(simple_stmt_t const & s) { boost::apply_visitor(*this, s); } 423 | 424 | template 425 | void operator()(compound_stmt_t const & s) 426 | { 427 | for (auto const & s : s.statements) 428 | boost::apply_visitor(*this, s); 429 | } 430 | 431 | // function 432 | template 433 | void operator()(func_t const & f) 434 | { 435 | ++num_funcs; 436 | for (auto const & s : f.body.statements) 437 | boost::apply_visitor(*this, s); 438 | } 439 | 440 | // namespace - a collection of functions and namespaces 441 | template 442 | void operator()(ns_t const & ns) 443 | { 444 | ++num_ns; 445 | for (auto const & f_or_ns : ns.contents) 446 | boost::apply_visitor(*this, f_or_ns); 447 | } 448 | 449 | void report() 450 | { 451 | std::cout << num_ns << " namespaces and "; 452 | std::cout << num_funcs << " functions, containing "; 453 | std::cout << num_expr_stmts << " plain statements, "; 454 | std::cout << num_empty_stmts << " empty statements, "; 455 | std::cout << num_if_stmts << " if statements, "; 456 | std::cout << num_while_stmts << " while loops, and "; 457 | std::cout << num_for_stmts << " for loops.\n"; 458 | } 459 | 460 | private: 461 | std::size_t num_ns; 462 | std::size_t num_funcs; 463 | std::size_t num_empty_stmts; 464 | std::size_t num_if_stmts; 465 | std::size_t num_while_stmts; 466 | std::size_t num_for_stmts; 467 | std::size_t num_expr_stmts; 468 | }; 469 | 470 | 471 | 472 | int main(int argc, char **argv) { 473 | using namespace std; 474 | using namespace boost::wave; 475 | 476 | if (argc != 2) { 477 | cerr << "usage: " << argv[0] << " path\n"; 478 | return 1; 479 | } 480 | 481 | char const * fn = argv[1]; 482 | 483 | ifstream cppfile(fn); 484 | if (!cppfile.is_open()) { 485 | cerr << "unable to open requested file " << fn << "\n"; 486 | return 5; 487 | } 488 | cppfile.unsetf(ios::skipws); 489 | boost::spirit::istream_iterator fbeg(cppfile); 490 | 491 | // Give it a try 492 | using token_t = qi_token<>; 493 | using position_t = token_t::position_type; 494 | position_t pos(fn); 495 | 496 | // create Spirit V2-compatible lexer token iterators from character iterators 497 | using cpplexer_iterator_t = qi_lex_iterator; 498 | cpplexer_iterator_t beg(fbeg, boost::spirit::istream_iterator(), pos, 499 | language_support(support_cpp|support_cpp0x)); 500 | cpplexer_iterator_t end; 501 | 502 | cpp_indent fileparser; 503 | result_t result; 504 | auto start = beg; 505 | bool pass = boost::spirit::qi::phrase_parse(beg, end, fileparser, 506 | skipper(), result); 507 | if (pass) { 508 | if (beg == start) { 509 | cout << "no input consumed!\n"; 510 | return 2; 511 | } else if (beg != end) { 512 | cout << "only some input consumed. Remaining:\n"; 513 | copy(beg, end, ostream_iterator>(cout, "")); 514 | return 2; 515 | } 516 | } else { 517 | cout << "parse failed\n"; 518 | return 1; 519 | } 520 | 521 | stat_reporter rptr; 522 | for (auto const & r : result) 523 | boost::apply_visitor(rptr, r); 524 | std::cout << result.size() << " top-level functions or namespaces, containing:\n"; 525 | rptr.report(); 526 | 527 | } 528 | 529 | #include 530 | --------------------------------------------------------------------------------