├── .github └── workflows │ └── regression.yml ├── .gitignore ├── CMakeLists.txt ├── COPYING ├── Doxyfile.in ├── Makefile.am ├── README.org ├── TODO.org ├── ast.png ├── boring_sqlstates.txt ├── cmake └── modules │ ├── FeatureTests.cmake │ ├── FindMonetDB.cmake │ └── FindPQXX.cmake ├── config_h.cmake.in ├── configure.ac ├── debian ├── .gitignore ├── changelog ├── control ├── copyright ├── rules ├── source │ └── format ├── tests │ ├── control │ └── sqlsmith ├── upstream │ └── metadata └── watch ├── dump.cc ├── dump.hh ├── dump.xsl ├── dut.hh ├── expr.cc ├── expr.hh ├── grammar.cc ├── grammar.hh ├── impedance.cc ├── impedance.hh ├── known.txt ├── known_re.txt ├── log-v1.0-to-v1.2.sql ├── log.cc ├── log.hh ├── log.sql ├── logo.png ├── monetdb.cc ├── monetdb.hh ├── postgres.cc ├── postgres.hh ├── prod.cc ├── prod.hh ├── random.cc ├── random.hh ├── relmodel.cc ├── relmodel.hh ├── schema.cc ├── schema.hh ├── sqlite.cc ├── sqlite.hh ├── sqlsmith.cc └── util.hh /.github/workflows/regression.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | defaults: 10 | run: 11 | shell: sh 12 | 13 | strategy: 14 | matrix: 15 | configure: 16 | - autoconf 17 | - cmake 18 | 19 | steps: 20 | - name: checkout 21 | uses: actions/checkout@v2 22 | 23 | - name: install build dependencies 24 | run: | 25 | sudo apt-get build-dep -y . 26 | sudo apt-get install -y postgresql 27 | 28 | - name: configure 29 | run: | 30 | mkdir build 31 | case ${{ matrix.configure }} in 32 | autoconf) 33 | autoreconf -i 34 | cd build && ../configure 35 | ;; 36 | cmake) 37 | sudo apt-get install -y cmake 38 | cd build && cmake .. 39 | ;; 40 | esac 41 | 42 | - name: build 43 | run: | 44 | make -C build 45 | 46 | - name: test 47 | run: | 48 | pg_virtualenv build/sqlsmith --max-queries=100 --verbose 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Makefile.local 2 | config.h 3 | *.o 4 | sqlsmith 5 | *.xml 6 | TAGS 7 | html/ 8 | .deps/ 9 | Doxyfile 10 | INSTALL 11 | Makefile 12 | Makefile.in 13 | aclocal.m4 14 | autom4te.cache/ 15 | compilation.log 16 | compile 17 | config.guess 18 | config.h.in 19 | config.h.in~ 20 | config.log 21 | config.status 22 | config.sub 23 | configure 24 | depcomp 25 | gitrev.h 26 | install-sh 27 | latex/ 28 | missing 29 | stamp-h1 30 | sqlsmith-*.tar.gz 31 | doxygen_sqlite3.db 32 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.14) 2 | project(SQLsmith LANGUAGES CXX) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules) 6 | 7 | include(FeatureTests) 8 | 9 | Option(USE_UPSTREAM_LIBPQXX "Fetch and use libpqxx from Github.") 10 | 11 | 12 | 13 | # define our executable early, so we can set the properties, based on our dependencies 14 | add_executable(sqlsmith) 15 | target_sources(sqlsmith PRIVATE 16 | relmodel.cc schema.cc random.cc prod.cc expr.cc grammar.cc log.cc dump.cc impedance.cc sqlsmith.cc postgres.cc) 17 | 18 | target_link_libraries(sqlsmith PostgreSQL::PostgreSQL) 19 | 20 | # Workaround for outdated cmake packages on debian 21 | set(PostgreSQL_ADDITIONAL_VERSIONS 11 12 13 14) 22 | find_package(PostgreSQL REQUIRED) 23 | 24 | if (NOT USE_UPSTREAM_LIBPQXX) 25 | # If we use the system pqxx library, we have to check for libpq. 26 | # This check is done by the CMakeLists.txt of the libpqxx project otherwise 27 | find_package(PQXX REQUIRED) 28 | 29 | if (PQXX_VERSION GREATER 7.0) 30 | set(HAVE_LIBPQXX7 TRUE) 31 | endif (PQXX_VERSION GREATER 7.0) 32 | 33 | target_link_libraries(sqlsmith PQXX::PQXX) 34 | else (NOT USE_UPSTREAM_LIBPQXX) 35 | set(SKIP_BUILD_TEST TRUE) 36 | include(FetchContent) 37 | FetchContent_Declare( 38 | libpqxx 39 | GIT_REPOSITORY https://github.com/jtv/libpqxx.git 40 | GIT_TAG 7.6.0 41 | ) 42 | FetchContent_MakeAvailable(libpqxx) 43 | FetchContent_GetProperties(libpqxx) 44 | unset(SKIP_BUILD_TEST) 45 | 46 | set(HAVE_LIBPQXX7 TRUE) 47 | 48 | target_include_directories(sqlsmith PRIVATE ${libpqxx_SOURCE_DIR}/include) 49 | target_link_libraries(sqlsmith pqxx) 50 | endif (NOT USE_UPSTREAM_LIBPQXX) 51 | 52 | if(NOT std_regex_ok) 53 | find_package(Boost REQUIRED COMPONENTS regex) 54 | set(REGEX_LIBRARY Boost::regex) 55 | set(HAVE_BOOST TRUE) 56 | set(HAVE_BOOST_REGEX TRUE) 57 | endif(NOT std_regex_ok) 58 | 59 | ## optional dependencies 60 | # sqlite 61 | find_package(SQLite3) 62 | if (SQLite3_FOUND) 63 | set(HAVE_LIBSQLITE3 TRUE) 64 | target_sources(sqlsmith PRIVATE sqlite.cc) 65 | target_link_libraries(sqlsmith SQLite::SQLite3) 66 | endif () 67 | 68 | # monetdb 69 | find_package(MonetDB) 70 | if (MonetDB_FOUND) 71 | set(HAVE_MONETDB 1) 72 | target_sources(sqlsmith PRIVATE monetdb.cc) 73 | target_link_libraries(sqlsmith MonetDB::mapi) 74 | endif () 75 | 76 | 77 | ## target configuration 78 | string(TOLOWER ${PROJECT_NAME} PACKAGE) 79 | set(PACKAGE_NAME ${PROJECT_NAME}) 80 | configure_file(config_h.cmake.in config.h) 81 | 82 | 83 | # get our git tag 84 | set(GIT_TAG UNRELEASED) 85 | execute_process( 86 | COMMAND git describe --exclude=debian* --dirty --tags --always 87 | WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} 88 | OUTPUT_VARIABLE GIT_TAG 89 | OUTPUT_STRIP_TRAILING_WHITESPACE 90 | ) 91 | 92 | # our gitrev.h 93 | file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/gitrev.h "#define GITREV \"${GIT_TAG}\"") 94 | 95 | # make sure our target finds gitrev.h and config.h 96 | target_include_directories(sqlsmith PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) 97 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | -include Makefile.local 2 | 3 | BUILT_SOURCES = gitrev.h 4 | 5 | bin_PROGRAMS = sqlsmith 6 | 7 | DUT = postgres.cc 8 | 9 | if DUT_MONETDB 10 | DUT += monetdb.cc 11 | endif 12 | 13 | if DUT_SQLITE 14 | DUT += sqlite.cc 15 | endif 16 | 17 | 18 | sqlsmith_SOURCES = relmodel.cc schema.cc $(DUT) \ 19 | random.cc prod.cc expr.cc grammar.cc log.cc dump.cc impedance.cc \ 20 | sqlsmith.cc 21 | 22 | sqlsmith_LDADD = $(LIBPQXX_LIBS) $(MONETDB_MAPI_LIBS) $(BOOST_REGEX_LIB) $(POSTGRESQL_LIBS) 23 | 24 | AM_LDFLAGS = $(BOOST_LDFLAGS) $(POSTGRESQL_LDFLAGS) 25 | AM_CPPFLAGS = $(BOOST_CPPFLAGS) $(LIBPQXX_CFLAGS) $(POSTGRESQL_CPPFLAGS) $(MONETDB_MAPI_CFLAGS) -Wall -Wextra 26 | 27 | 28 | EXTRA_DIST = gitrev.h dump.hh expr.hh grammar.hh log.hh prod.hh \ 29 | random.hh relmodel.hh schema.hh impedance.hh known.txt known_re.txt log.sql \ 30 | README.org TODO.org ast.png logo.png dump.xsl util.hh sqlite.hh \ 31 | dut.hh postgres.hh monetdb.hh log-v1.0-to-v1.2.sql boring_sqlstates.txt 32 | 33 | gitrev.h: $(HEADERS) $(SOURCES) 34 | -if git describe --exclude='debian*' --dirty --tags --always > /dev/null ; then \ 35 | echo "#define GITREV \"$$(git describe --exclude='debian*' --dirty --tags --always)\"" > $@ ;\ 36 | else \ 37 | echo "#define GITREV \"unreleased\"" > $@ ;\ 38 | fi 39 | 40 | filterdump: 41 | psql -Xc 'copy (select error from known) to stdout' |sort -u > known.txt 42 | psql -Xc 'copy (select re from known_re) to stdout' |sort -u > known_re.txt 43 | psql -Xc 'copy (select sqlstate from boring_sqlstates) to stdout' |sort -u > boring_sqlstates.txt 44 | 45 | .PHONY: filterdump 46 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | [[logo.png]] 2 | * SQLsmith 3 | 4 | : "I love the smell of coredumps in the morning" 5 | 6 | ** Description 7 | SQLsmith is a random SQL query generator. Its paragon is [[https://embed.cs.utah.edu/csmith/][Csmith]], 8 | which proved valuable for quality assurance in C compilers. 9 | 10 | It currently supports generating queries for PostgreSQL, SQLite 3 and 11 | MonetDB. To add support for another RDBMS, you need to implement two 12 | classes providing schema information about and connectivity to the 13 | device under test. 14 | 15 | Besides developers of the RDBMS products, users developing extensions 16 | might also be interested in exposing their code to SQLsmith's random 17 | workload. 18 | 19 | Since 2015, it found 118 bugs in alphas, betas and releases in the 20 | aforementioned products, including security vulnerabilities in 21 | released versions. Additional bugs were squashed in extensions and 22 | libraries such as orafce and glibc. 23 | 24 | https://github.com/anse1/sqlsmith/wiki#score-list 25 | 26 | ** Dependencies 27 | - C++11 28 | - libpqxx 29 | 30 | optional: 31 | - boost::regex in case your std::regex is broken 32 | - SQLite3 33 | - monetdb_mapi 34 | 35 | ** Building on Debian 36 | 37 | : apt-get install build-essential autoconf autoconf-archive libpqxx-dev libboost-regex-dev libsqlite3-dev 38 | : cd sqlsmith 39 | : autoreconf -i # Avoid when building from a release tarball 40 | : ./configure 41 | : make 42 | 43 | ** Building on OSX 44 | 45 | In order to build on Mac OSX, assuming you use Homebrew, run the following 46 | 47 | : brew install libpqxx automake libtool autoconf autoconf-archive pkg-config 48 | : cd sqlsmith 49 | : autoreconf -i # Avoid when building from a release tarball 50 | : ./configure 51 | : make 52 | 53 | ** Usage 54 | 55 | SQLsmith connects to the target database to retrieve the schema for 56 | query generation and to send the generated queries to. Currently, all 57 | generated statements are rolled back. Beware that SQLsmith does call 58 | functions that could possibly have side-effects 59 | (e.g. pg_terminate_backend). Use a suitably *underprivileged user* 60 | for its connection to avoid this. 61 | 62 | Example invocations: 63 | 64 | : # testing Postgres 65 | : sqlsmith --verbose --target="host=/tmp port=65432 dbname=regression" 66 | : # testing SQLite 67 | : sqlsmith --verbose --sqlite="file:$HOME/.mozilla/firefox/places.sqlite?mode=ro" 68 | : # testing MonetDB 69 | : sqlsmith --verbose --monetdb="mapi:monetdb://localhost:50000/smith" 70 | 71 | The following options are currently supported: 72 | 73 | | =--target=connstr= | target postgres database (default: libpq defaults) | 74 | | =--sqlite=URI= | target SQLite3 database | 75 | | =--monetdb=URI= | target MonetDB database | 76 | | =--log-to=connstr= | postgres db for logging errors into (default: don't log) | 77 | | =--verbose= | emit progress output | 78 | | =--version= | show version information | 79 | | =--seed=int= | seed RNG with specified integer instead of PID | 80 | | =--dry-run= | print queries instead of executing them | 81 | | =--max-queries=long= | terminate after generating this many queries | 82 | | =--exclude-catalog= | don't generate queries using catalog relations | 83 | | =--dump-all-queries= | dump queries as they are generated | 84 | | =--dump-all-graphs= | dump generated ASTs for debugging | 85 | | =--rng-state=string= | deserialize dumped rng state | 86 | 87 | Sample output: 88 | 89 | =--verbose= makes sqlsmith emit some progress indication to stderr. A 90 | symbol is output for each query sent to the server. Currently the 91 | following ones are generated: 92 | 93 | | symbol | meaning | details | 94 | |--------+-------------------+-----------------------------------------------| 95 | | . | ok | Query generated and executed with ok sqlstate | 96 | | S | syntax error | These are bugs in sqlsmith - please report | 97 | | t | timeout | SQLsmith sets a statement timeout of 1s | 98 | | C | broken connection | These happen when a query crashes the server | 99 | | e | other error | | 100 | 101 | When you test against a RDBMS that doesn't support some of SQLsmith's 102 | grammar, there will be a burst of syntax errors on startup. These 103 | should disappear after some time as SQLsmith blacklists productions 104 | that consistently lead to errors. 105 | 106 | =--verbose= will also periodically emit error reports. In the 107 | following example, these are mostly caused by the primitive type 108 | system. 109 | 110 | : queries: 39000 (202.399 gen/s, 298.942 exec/s) 111 | : AST stats (avg): height = 5.599 nodes = 37.8489 112 | : 82 ERROR: invalid regular expression: quantifier operand invalid 113 | : 70 ERROR: canceling statement due to statement timeout 114 | : 44 ERROR: operator does not exist: point = point 115 | : 27 ERROR: operator does not exist: xml = xml 116 | : 22 ERROR: cannot compare arrays of different element types 117 | : 11 ERROR: could not determine which collation to use for string comparison 118 | : 5 ERROR: invalid regular expression: nfa has too many states 119 | : 4 ERROR: cache lookup failed for index 2619 120 | : 4 ERROR: invalid regular expression: brackets [] not balanced 121 | : 3 ERROR: operator does not exist: polygon = polygon 122 | : 2 ERROR: invalid regular expression: parentheses () not balanced 123 | : 1 ERROR: invalid regular expression: invalid character range 124 | : error rate: 0.00705128 125 | 126 | The only one that looks interesting here is the cache lookup one. 127 | Taking a closer look at it reveals that it happens when you query a 128 | certain catalog view like this: 129 | 130 | : self=# select indexdef from pg_catalog.pg_indexes where indexdef is not NULL; 131 | : FEHLER: cache lookup failed for index 2619 132 | 133 | This is because the planner then puts =pg_get_indexdef(oid)= in a 134 | context where it sees non-index-oids, which causes it to croak: 135 | 136 | : QUERY PLAN 137 | : ------------------------------------------------------------------------------------ 138 | : Hash Join (cost=17.60..30.65 rows=9 width=4) 139 | : Hash Cond: (i.oid = x.indexrelid) 140 | : -> Seq Scan on pg_class i (cost=0.00..12.52 rows=114 width=8) 141 | : Filter: ((pg_get_indexdef(oid) IS NOT NULL) AND (relkind = 'i'::"char")) 142 | : -> Hash (cost=17.31..17.31 rows=23 width=4) 143 | : -> Hash Join (cost=12.52..17.31 rows=23 width=4) 144 | : Hash Cond: (x.indrelid = c.oid) 145 | : -> Seq Scan on pg_index x (cost=0.00..4.13 rows=113 width=8) 146 | : -> Hash (cost=11.76..11.76 rows=61 width=8) 147 | : -> Seq Scan on pg_class c (cost=0.00..11.76 rows=61 width=8) 148 | : Filter: (relkind = ANY ('{r,m}'::"char"[])) 149 | 150 | Now this is more of a curiosity than a bug, but still illustrating how 151 | debugging with the help of SQLsmith might look like. 152 | 153 | ** Large-scale testing 154 | 155 | =--log-to= allows logging of hundreds of sqlsmith instances into a 156 | central PostgreSQL database. [[./log.sql]] contains the schema sqlsmith 157 | expects and some additional views to generate reports on the logged 158 | contents. 159 | 160 | It also contains a trigger to filter boring/known errors based on the 161 | contents of the tables known and known_re. I periodically COPY my 162 | filter tables for testing PostgreSQL into the files [[./known_re.txt]] and 163 | [[./known.txt]] to serve as a starting point. 164 | 165 | ** Resources 166 | 167 | - [[https://www.postgresql.eu/events/pgconfeu2018/sessions/session/2221/slides/145/sqlsmith-talk.pdf][Slides from PGConf.EU 2018]] 168 | - [[https://anse1.github.io/sqlsmith-doc/structsqltype.html][Doxygen output for SQLsmith]] 169 | 170 | ** License 171 | 172 | SQLsmith is available under GPLv3. Use it at your own risk. It may 173 | *damage your database* (one of the purposes of this tool /is/ to try 174 | and break things). See the file [[COPYING]] for details. 175 | 176 | ** Authors 177 | 178 | Andreas Seltenreich 179 | 180 | Bo Tang 181 | 182 | Sjoerd Mullender 183 | 184 | [[ast.png]] 185 | -------------------------------------------------------------------------------- /TODO.org: -------------------------------------------------------------------------------- 1 | #+CATEGORY: sqlsmith 2 | 3 | * TODO-List 4 | ** Driver 5 | *** TODO allow selecting/filtering schemas 6 | *** TODO report SQLSTATE 7 | 8 | need to patch libpqxx for that 9 | http://pqxx.org/development/libpqxx/ticket/219 10 | 11 | It's probably better to ditch libpqxx for the testing connection and 12 | use a custom class instead that abstracts different products 13 | 14 | ** grammar 15 | *** TODO [#A] add proper identifier quoting 16 | sqlsmiths fails horribly with databases containing identifiers that 17 | require quoting. 18 | *** TODO [#C] Generate data for literal use in queries 19 | *** TODO Improve random generation 20 | - Add Attributes to rules so factories can pick candidates in a 21 | weighted fashion. 22 | - This attribute could then also be used to blacklist productions for 23 | incompatible RDBMs 24 | - Factor in graph level and AST node count in decisions productions 25 | with high "fan-out". This should be made customizeable. 26 | E.g. target depth/target node count. Or better use a single scale 27 | factor instead 28 | ** schema + type system 29 | *** TODO DTRT with arrays 30 | - review standard so we don't drift into non-standard pg stuff 31 | *** TODO composite/record types 32 | ** relmodel 33 | *** TODO [#C] operations on Tuples 34 | instead of hacking up tuples inside productions it's more sensible to 35 | implement operators in relmodel.cc join() project() union() select() 36 | *** TODO Load samples at startup to have a pool of values for literals 37 | - how to do it in a reproducible fashion? TABLESAMPLE? ORDER BY? 38 | - maybe use atomic value subselects to fetch compatible values 39 | ** Performance 40 | | revision | queries/s | nodes | comment | 41 | |---------------+-----------+-------+----------------------------------------| 42 | | ee9c94f-dirty | 208 | ? | | 43 | | 4547909-dirty | 125 | 72 | | 44 | | 7fa25c6-dirty | 156 | 54 | | 45 | | 32a5d2a | 188 | 54 | | 46 | | 3a29a40 | 238 | 54 | | 47 | | 57101e2 | 193 | 54 | | 48 | | 52c5b92 | 212 | 37 | | 49 | | efca827 | 205 | 37 | changed RNG to 64-Bit Mersenne Twister | 50 | | 9099e07 | 185 | 37 | coalesce production | 51 | 52 | : time ./sqlsmith --verbose --target='dbname=regression' --dry-run --max-queries=10000 > /dev/null 53 | 54 | ** Postgresql Line Coverage 55 | 56 | | sqlsmith | overall | parser | 57 | |----------+---------+--------| 58 | | a4c1989 | 26.0 | 20.4 | 59 | | ee099e6 | 33.8 | 25.8 | 60 | | 231c88a | 34.65 | 28.1 | 61 | | 7ffac2d | 39.8 | 30.3 | 62 | | dad2ce0 | 34.5 | 29.5 | 63 | 64 | | combined testing | overall | parser | 65 | |---------------------+---------+--------| 66 | | sqlsmith+make check | 65.1 | 80.4 | 67 | | make check | 62 | 80.2 | 68 | | sqlsmith 7ffac2d | 39.8 | 30.3 | 69 | 70 | Reference: 71 | | | overall | parser | 72 | |-------------------+---------+--------| 73 | | pg_ctl start/stop | 5.8 | 0.5 | 74 | | --max-queries=0 | 16.6 | 14.6 | 75 | 76 | : ./configure --enable-coverage 77 | : 78 | : make install 79 | : initdb /tmp/gcov 80 | : pg_ctl -D /tmp/gcov start 81 | : make installcheck 82 | : pg_ctl -D /tmp/gcov stop 83 | : make coverage-clean 84 | : pg_ctl -D /tmp/gcov start 85 | : # since 7ffac2d: 4 instances w/25000 each instead 1 instance w/10000 queries 86 | : sqlsmith --target='dbname=regression' --max-queries=25000 & 87 | : sqlsmith --target='dbname=regression' --max-queries=25000 & 88 | : sqlsmith --target='dbname=regression' --max-queries=25000 & 89 | : sqlsmith --target='dbname=regression' --max-queries=25000 & 90 | : wait 91 | : pg_ctl -D /tmp/gcov stop 92 | : make coverage-html 93 | -------------------------------------------------------------------------------- /ast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anse1/sqlsmith/46c1df710ea0217d87247bb1fc77f4a09bca77f7/ast.png -------------------------------------------------------------------------------- /boring_sqlstates.txt: -------------------------------------------------------------------------------- 1 | 0A000 2 | 22000 3 | 22001 4 | 22003 5 | 22004 6 | 22007 7 | 22008 8 | 22011 9 | 22012 10 | 2201B 11 | 2201E 12 | 22023 13 | 22025 14 | 2202E 15 | 22P02 16 | 23502 17 | 23503 18 | 23505 19 | 23514 20 | 2F005 21 | 3D000 22 | 3F000 23 | 42501 24 | 42601 25 | 42602 26 | 42704 27 | 42710 28 | 42725 29 | 42804 30 | 42809 31 | 42846 32 | 42883 33 | 428C9 34 | 42P01 35 | 42P03 36 | 42P07 37 | 53400 38 | 54001 39 | 55000 40 | 55P02 41 | 57014 42 | 57P01 43 | 58P01 44 | P0001 45 | -------------------------------------------------------------------------------- /cmake/modules/FeatureTests.cmake: -------------------------------------------------------------------------------- 1 | # borked std::regex 2 | 3 | include(CheckCXXSourceCompiles) 4 | 5 | # check for a broken std::regex 6 | check_cxx_source_compiles([=[ 7 | #include 8 | #include 9 | 10 | int main() 11 | { 12 | std::regex re(".*OK.*"); 13 | auto ret = std::regex_match("This should be OK", re); 14 | if (!ret) { 15 | std::cout << "not_matched" << std::endl; 16 | return 1; 17 | } 18 | 19 | return 0; 20 | } 21 | ]=] std_regex_ok 22 | FAIL_REGEX not_matched) -------------------------------------------------------------------------------- /cmake/modules/FindMonetDB.cmake: -------------------------------------------------------------------------------- 1 | # Distributed under the GPL-3.0 as part of SQLSmith. 2 | #[=======================================================================[.rst: 3 | FindMonetDB 4 | ------- 5 | 6 | Finds the monetdb-mapi library. 7 | 8 | Imported Targets 9 | ^^^^^^^^^^^^^^^^ 10 | 11 | This module provides the following imported targets, if found: 12 | 13 | ``MonetDB::mapi`` 14 | The monetdb-mapi library 15 | 16 | Result Variables 17 | ^^^^^^^^^^^^^^^^ 18 | 19 | This will define the following variables: 20 | 21 | ``MonetDB_FOUND`` 22 | True if the system has the monetdb-mapi library. 23 | ``MonetDB_VERSION`` 24 | The version of the monetdb-mapi library which was found. 25 | ``MonetDB_INCLUDE_DIRS`` 26 | Include directories needed to use monetdb-mapi. 27 | ``MonetDB_LIBRARIES`` 28 | Libraries needed to link to monetdb-mapi. 29 | 30 | Cache Variables 31 | ^^^^^^^^^^^^^^^ 32 | 33 | The following cache variables may also be set: 34 | 35 | ``MonetDB_INCLUDE_DIR`` 36 | The directory containing ``foo.h``. 37 | ``MonetDB_LIBRARY`` 38 | The path to the Foo library. 39 | 40 | #]=======================================================================] 41 | 42 | find_package(PkgConfig) 43 | pkg_check_modules(PC_MonetDB QUIET monetdb-mapi) 44 | 45 | find_path(MonetDB_INCLUDE_DIR 46 | NAMES mapi.h 47 | PATHS ${PC_MonetDB_INCLUDE_DIRS} 48 | PATH_SUFFIXES monetdb 49 | ) 50 | 51 | find_library(MonetDB_LIBRARY 52 | NAMES mapi 53 | PATHS ${PC_MonetDB_LIBRARY_DIRS} 54 | ) 55 | 56 | set(MonetDB_VERSION ${PC_MonetDB_VERSION}) 57 | 58 | include(FindPackageHandleStandardArgs) 59 | find_package_handle_standard_args(MonetDB 60 | FOUND_VAR MonetDB_FOUND 61 | REQUIRED_VARS 62 | MonetDB_LIBRARY 63 | MonetDB_INCLUDE_DIR 64 | VERSION_VAR MonetDB_VERSION 65 | ) 66 | 67 | if (MonetDB_FOUND) 68 | set(MonetDB_LIBRARIES ${MonetDB_LIBRARY}) 69 | set(MonetDB_INCLUDE_DIRS ${MonetDB_INCLUDE_DIR}) 70 | set(MonetDB_DEFINITIONS ${PC_MonetDB_CFLAGS_OTHER}) 71 | 72 | add_library(MonetDB::mapi UNKNOWN IMPORTED) 73 | set_target_properties(MonetDB::mapi PROPERTIES 74 | IMPORTED_LOCATION "${MonetDB_LIBRARY}" 75 | INTERFACE_COMPILE_OPTIONS "${PC_MonetDB_CFLAGS_OTHER}" 76 | INTERFACE_INCLUDE_DIRECTORIES "${MonetDB_INCLUDE_DIR}" 77 | ) 78 | endif () 79 | 80 | -------------------------------------------------------------------------------- /cmake/modules/FindPQXX.cmake: -------------------------------------------------------------------------------- 1 | # Distributed under the GPL-3.0 as part of SQLSmith. 2 | #[=======================================================================[.rst: 3 | FindPQXX 4 | ------- 5 | 6 | Finds the libpqxx library. 7 | 8 | Imported Targets 9 | ^^^^^^^^^^^^^^^^ 10 | 11 | This module provides the following imported targets, if found: 12 | 13 | ``PQXX::PQXX`` 14 | The libpqxx library 15 | 16 | Result Variables 17 | ^^^^^^^^^^^^^^^^ 18 | 19 | This will define the following variables: 20 | 21 | ``PQXX_FOUND`` 22 | True if the system has the libpqxx library. 23 | ``PQXX_VERSION`` 24 | The version of the libpqxx library which was found. 25 | ``PQXX_INCLUDE_DIRS`` 26 | Include directories needed to use libpqxx. 27 | ``PQXX_LIBRARIES`` 28 | Libraries needed to link to libpqxx. 29 | 30 | Cache Variables 31 | ^^^^^^^^^^^^^^^ 32 | 33 | The following cache variables may also be set: 34 | 35 | ``PQXX_INCLUDE_DIR`` 36 | The directory containing ``foo.h``. 37 | ``PQXX_LIBRARY`` 38 | The path to the Foo library. 39 | 40 | #]=======================================================================] 41 | 42 | find_package(PkgConfig) 43 | pkg_check_modules(PC_PQXX QUIET libpqxx) 44 | 45 | find_path(PQXX_INCLUDE_DIR 46 | NAMES pqxx 47 | PATHS ${PC_PQXX_INCLUDE_DIRS} 48 | ) 49 | 50 | find_library(PQXX_LIBRARY 51 | NAMES pqxx 52 | PATHS ${PC_PQXX_LIBRARY_DIRS} 53 | ) 54 | 55 | set(PQXX_VERSION ${PC_PQXX_VERSION}) 56 | 57 | include(FindPackageHandleStandardArgs) 58 | find_package_handle_standard_args(PQXX 59 | FOUND_VAR PQXX_FOUND 60 | REQUIRED_VARS 61 | PQXX_LIBRARY 62 | PQXX_INCLUDE_DIR 63 | VERSION_VAR PQXX_VERSION 64 | ) 65 | 66 | if (PQXX_FOUND) 67 | set(PQXX_LIBRARIES ${PQXX_LIBRARY}) 68 | set(PQXX_INCLUDE_DIRS ${PQXX_INCLUDE_DIR}) 69 | set(PQXX_DEFINITIONS ${PC_PQXX_CFLAGS_OTHER}) 70 | 71 | add_library(PQXX::PQXX UNKNOWN IMPORTED) 72 | set_target_properties(PQXX::PQXX PROPERTIES 73 | IMPORTED_LOCATION "${PQXX_LIBRARY}" 74 | INTERFACE_COMPILE_OPTIONS "${PC_PQXX_CFLAGS_OTHER}" 75 | INTERFACE_INCLUDE_DIRECTORIES "${PQXX_INCLUDE_DIR}" 76 | ) 77 | endif () 78 | 79 | -------------------------------------------------------------------------------- /config_h.cmake.in: -------------------------------------------------------------------------------- 1 | /* we know these paramters as they are required during the CMake configure phase */ 2 | #define HAVE_CXX17 1 3 | 4 | 5 | 6 | #cmakedefine HAVE_BOOST 1 7 | #cmakedefine HAVE_BOOST_REGEX 1 8 | 9 | #cmakedefine HAVE_LIBPQXX7 1 10 | 11 | #cmakedefine HAVE_LIBSQLITE3 1 12 | #cmakedefine HAVE_MONETDB 1 13 | 14 | #cmakedefine PACKAGE "@PACKAGE@" 15 | #cmakedefine PACKAGE_NAME "@PACKAGE_NAME@" 16 | 17 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT(SQLsmith, 1.4, seltenreich@gmx.de, sqlsmith, https://github.com/anse1/sqlsmith/) 2 | AC_LANG(C++) 3 | 4 | AM_INIT_AUTOMAKE(-Wall -Werror foreign) 5 | AC_PROG_CXX 6 | 7 | AX_LIB_POSTGRESQL() 8 | 9 | PKG_CHECK_MODULES(LIBPQXX, libpqxx < 7.0, 10 | [AX_CXX_COMPILE_STDCXX_11(noext,mandatory)], 11 | [PKG_CHECK_MODULES([LIBPQXX], [libpqxx >= 7.0], 12 | [libpqxx7=yes], 13 | [] 14 | )] 15 | ) 16 | 17 | AS_IF([test "x$libpqxx7" = "xyes"], AC_MSG_NOTICE([libpqxx version >= 7 detected]), []) 18 | AS_IF([test "x$libpqxx7" = "xyes"], [AC_DEFINE([HAVE_LIBPQXX7], [1], [define if libpqxx >= 7 is used])], []) 19 | AS_IF([test "x$libpqxx7" = "xyes"], [AX_CXX_COMPILE_STDCXX_17(noext,mandatory)], []) 20 | 21 | PKG_CHECK_MODULES(MONETDB_MAPI, monetdb-mapi >= 11.23.0, 22 | [AC_DEFINE([HAVE_MONETDB], [1], [define if the MonetDB client library is available])], 23 | [] 24 | ) 25 | 26 | AM_CONDITIONAL([DUT_MONETDB], [test x$pkg_failed = xno]) 27 | 28 | AX_BOOST_BASE() 29 | AX_BOOST_REGEX 30 | 31 | AC_SUBST(LIBPQXX_CFLAGS) 32 | AC_SUBST(LIBPQXX_LIBS) 33 | 34 | AC_SUBST(CONFIG_GIT_REVISION, 35 | [m4_esyscmd_s([git describe --dirty --tags --always])]) 36 | 37 | AC_CHECK_LIB(sqlite3, sqlite3_open_v2) 38 | AM_CONDITIONAL([DUT_SQLITE], [test x$ac_cv_lib_sqlite3_sqlite3_open_v2 = xyes]) 39 | 40 | AC_SUBST(REVISION) 41 | 42 | AC_CONFIG_HEADERS(config.h) 43 | AC_CONFIG_FILES([ 44 | Makefile 45 | Doxyfile 46 | ]) 47 | AC_OUTPUT 48 | -------------------------------------------------------------------------------- /debian/.gitignore: -------------------------------------------------------------------------------- 1 | autoreconf.* 2 | *debhelper* 3 | files 4 | *.substvars 5 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | sqlsmith (1.4-2) UNRELEASED; urgency=medium 2 | 3 | * Remove constraints unnecessary since buster: 4 | + Build-Depends: Drop versioned constraint on libpqxx-dev. 5 | 6 | -- Debian Janitor Thu, 17 Mar 2022 12:49:00 -0000 7 | 8 | sqlsmith (1.4-1) unstable; urgency=medium 9 | 10 | * New upstream release. 11 | * Remove libboost-regex-dev B-D, not required with modern g++. 12 | 13 | -- Christoph Berg Thu, 03 Mar 2022 09:33:23 +0100 14 | 15 | sqlsmith (1.3-1) unstable; urgency=medium 16 | 17 | * New upstream release. 18 | 19 | -- Christoph Berg Tue, 11 Jan 2022 11:20:16 +0100 20 | 21 | sqlsmith (1.2.1-2) unstable; urgency=medium 22 | 23 | [ Debian Janitor ] 24 | * Use secure copyright file specification URI. 25 | * Bump debhelper from deprecated 9 to 12. 26 | * Set debhelper-compat version in Build-Depends. 27 | * Set upstream metadata fields: Bug-Database, Bug-Submit, Repository, 28 | Repository-Browse. 29 | 30 | [ Christoph Berg ] 31 | * Support PG14's multirange types. 32 | * B-D on libssl-dev. 33 | 34 | -- Christoph Berg Wed, 06 Oct 2021 17:14:29 +0200 35 | 36 | sqlsmith (1.2.1-1) unstable; urgency=medium 37 | 38 | * New upstream bugfix release. 39 | 40 | -- Christoph Berg Fri, 04 May 2018 19:37:14 +0200 41 | 42 | sqlsmith (1.2-1) unstable; urgency=medium 43 | 44 | * SQLsmith v1.2 is released. 45 | * Adds support for testing sqlite3. 46 | * Built without support for MonetDB since the latter is not packaged 47 | yet. 48 | 49 | -- Andreas Seltenreich Thu, 03 May 2018 08:46:07 +0200 50 | 51 | sqlsmith (1.0-1) unstable; urgency=medium 52 | 53 | * Initial release. 54 | 55 | -- Christoph Berg Mon, 30 May 2016 10:17:16 +0200 56 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: sqlsmith 2 | Section: database 3 | Priority: optional 4 | Maintainer: Andreas Seltenreich 5 | Uploaders: Christoph Berg 6 | Standards-Version: 4.6.0 7 | Homepage: https://github.com/anse1/sqlsmith 8 | Vcs-Browser: https://github.com/anse1/sqlsmith 9 | Vcs-Git: https://github.com/anse1/sqlsmith.git -b debian 10 | Build-Depends: 11 | autoconf, 12 | autoconf-archive, 13 | debhelper-compat (= 12), 14 | libpq-dev, 15 | libpqxx-dev, 16 | libsqlite3-dev, 17 | libssl-dev, 18 | 19 | Package: sqlsmith 20 | Architecture: any 21 | Depends: ${misc:Depends}, ${shlibs:Depends} 22 | Description: random SQL query generator 23 | SQLsmith is a random SQL query generator. Its paragon is Csmith, which proved 24 | valuable for quality assurance in C compilers. 25 | . 26 | It currently supports generating queries for PostgreSQL 9.5+ and sqlite3. 27 | . 28 | Besides PostgreSQL developers, users developing extensions of PostgreSQL might 29 | also be interested in exposing their code to SQLsmith's random workload. 30 | . 31 | During its prototyping stage, it already found about thirty bugs in 32 | PostgreSQL alphas, betas and releases, including security vulnerabilities in 33 | released versions. 34 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Contact: Andreas Seltenreich 3 | 4 | Files: * 5 | Copyright: 2015-2018 Andreas Seltenreich 6 | License: GPL-3 7 | 8 | Files: monetdb.* 9 | Copyright: 2016-2017 Sjoerd Mullender , Bo Tang 10 | License: GPL-3 11 | 12 | License: GPL-3 13 | This program is free software; you can redistribute it and/or modify 14 | it under the terms of the GNU General Public License as published by 15 | the Free Software Foundation; version 3 of the License. 16 | . 17 | The full text of the GPL is distributed as in 18 | /usr/share/common-licenses/GPL-3 on Debian systems. 19 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | %: 4 | dh $@ 5 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /debian/tests/control: -------------------------------------------------------------------------------- 1 | Depends: @, postgresql 2 | Tests: sqlsmith 3 | Restrictions: allow-stderr 4 | -------------------------------------------------------------------------------- /debian/tests/sqlsmith: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pg_virtualenv sqlsmith --max-queries=100 --verbose 4 | -------------------------------------------------------------------------------- /debian/upstream/metadata: -------------------------------------------------------------------------------- 1 | Bug-Database: https://github.com/anse1/sqlsmith/issues 2 | Bug-Submit: https://github.com/anse1/sqlsmith/issues/new 3 | Repository: https://github.com/anse1/sqlsmith.git 4 | Repository-Browse: https://github.com/anse1/sqlsmith 5 | -------------------------------------------------------------------------------- /debian/watch: -------------------------------------------------------------------------------- 1 | version=4 2 | https://github.com/anse1/sqlsmith/tags .*/v(.*).tar.gz 3 | -------------------------------------------------------------------------------- /dump.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "dump.hh" 5 | #include "util.hh" 6 | 7 | using namespace std; 8 | 9 | std::string graphml_dumper::id(struct prod *p) 10 | { 11 | ostringstream os; 12 | os << pretty_type(p) << "_" << p; 13 | return os.str(); 14 | } 15 | 16 | graphml_dumper::graphml_dumper(ostream &out) 17 | : o(out) 18 | { 19 | o << "" << endl << 20 | "" << endl; 24 | 25 | o << "" << endl; 27 | o << "" << endl; 29 | o << "" << endl; 31 | 32 | o << "" << endl; 33 | 34 | } 35 | 36 | void graphml_dumper::visit(struct prod *p) 37 | { 38 | o << ""; 39 | o << "" << p->retries << ""; 40 | o << "" << pretty_type(p) << ""; 41 | o << "" << p->scope << ""; 42 | o << "" << endl; 43 | if (p->pprod) { 44 | o << "pprod) << "\"/>"; 45 | } 46 | o << endl; 47 | } 48 | 49 | graphml_dumper::~graphml_dumper() 50 | { 51 | o << "" << endl; 52 | } 53 | 54 | void ast_logger::generated(prod &query) 55 | { 56 | string filename(""); 57 | filename += "sqlsmith-"; 58 | filename += to_string(queries); 59 | filename += ".xml"; 60 | ofstream os(filename); 61 | graphml_dumper visitor(os); 62 | query.accept(&visitor); 63 | queries++; 64 | } 65 | -------------------------------------------------------------------------------- /dump.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief Dump syntax trees as GraphML 3 | #ifndef DUMP_HH 4 | #define DUMP_HH 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "prod.hh" 11 | #include "log.hh" 12 | 13 | struct graphml_dumper : prod_visitor { 14 | std::ostream &o; 15 | virtual void visit(struct prod *p); 16 | graphml_dumper(std::ostream &out); 17 | std::string id(prod *p); 18 | std::string type(struct prod *p); 19 | virtual ~graphml_dumper(); 20 | }; 21 | 22 | struct ast_logger : logger { 23 | int queries = 0; 24 | virtual void generated(prod &query); 25 | }; 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /dump.xsl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | 13 | digraph ast { 14 | 15 | } 16 | 17 | 18 | 19 | 20 | 21 | label= 22 | 23 | , 24 | 25 | 26 | 27 | 28 | fillcolor=firebrick,style=filled 29 | 30 | 31 | fillcolor=red,style=filled 32 | 33 | 34 | fillcolor=salmon,style=filled 35 | 36 | 37 | fillcolor=wheat,style=filled 38 | 39 | 40 | fillcolor=springgreen,style=filled 41 | 42 | 43 | fillcolor=gainsboro,style=filled 44 | 45 | 46 | fillcolor=white,style=filled 47 | 48 | 49 | , 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | [ 59 | 60 | ]; 61 | 62 | 63 | 64 | 65 | 66 | 67 | -> 68 | 69 | ; 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /dut.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief Base class for device under test 3 | 4 | #ifndef DUT_HH 5 | #define DUT_HH 6 | #include 7 | #include 8 | 9 | #include "prod.hh" 10 | 11 | namespace dut { 12 | 13 | struct failure : public std::exception { 14 | std::string errstr; 15 | std::string sqlstate; 16 | const char* what() const throw() 17 | { 18 | return errstr.c_str(); 19 | } 20 | failure(const char *s, const char *sqlstate_ = "") throw() 21 | : errstr(), sqlstate() { 22 | errstr = s; 23 | sqlstate = sqlstate_; 24 | }; 25 | }; 26 | 27 | struct broken : failure { 28 | broken(const char *s, const char *sqlstate_ = "") throw() 29 | : failure(s, sqlstate_) { } 30 | }; 31 | 32 | struct timeout : failure { 33 | timeout(const char *s, const char *sqlstate_ = "") throw() 34 | : failure(s, sqlstate_) { } 35 | }; 36 | 37 | struct syntax : failure { 38 | syntax(const char *s, const char *sqlstate_ = "") throw() 39 | : failure(s, sqlstate_) { } 40 | }; 41 | 42 | } 43 | 44 | struct dut_base { 45 | std::string version; 46 | virtual void test(const std::string &stmt) = 0; 47 | }; 48 | 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /expr.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "random.hh" 8 | #include "relmodel.hh" 9 | #include "grammar.hh" 10 | #include "schema.hh" 11 | #include "impedance.hh" 12 | #include "expr.hh" 13 | 14 | using namespace std; 15 | using impedance::matched; 16 | 17 | shared_ptr value_expr::factory(prod *p, sqltype *type_constraint) 18 | { 19 | try { 20 | if (1 == d20() && p->level < d6() && window_function::allowed(p)) 21 | return make_shared(p, type_constraint); 22 | else if (1 == d42() && p->level < d6()) 23 | return make_shared(p, type_constraint); 24 | else if (1 == d42() && p->level < d6()) 25 | return make_shared(p, type_constraint); 26 | else if (p->level < d6() && d6() == 1) 27 | return make_shared(p, type_constraint); 28 | else if (d12()==1) 29 | return make_shared(p, type_constraint); 30 | else if (p->level< d6() && d9()==1) 31 | return make_shared(p, type_constraint); 32 | else if (p->scope->refs.size() && d20() > 1) 33 | return make_shared(p, type_constraint); 34 | else 35 | return make_shared(p, type_constraint); 36 | } catch (runtime_error &e) { 37 | } 38 | p->retry(); 39 | return factory(p, type_constraint); 40 | } 41 | 42 | case_expr::case_expr(prod *p, sqltype *type_constraint) 43 | : value_expr(p) 44 | { 45 | condition = bool_expr::factory(this); 46 | true_expr = value_expr::factory(this, type_constraint); 47 | false_expr = value_expr::factory(this, true_expr->type); 48 | 49 | if(false_expr->type != true_expr->type) { 50 | /* Types are consistent but not identical. Try to find a more 51 | concrete one for a better match. */ 52 | if (true_expr->type->consistent(false_expr->type)) 53 | true_expr = value_expr::factory(this, false_expr->type); 54 | else 55 | false_expr = value_expr::factory(this, true_expr->type); 56 | } 57 | type = true_expr->type; 58 | } 59 | 60 | void case_expr::out(std::ostream &out) 61 | { 62 | out << "case when " << *condition; 63 | out << " then " << *true_expr; 64 | out << " else " << *true_expr; 65 | out << " end"; 66 | indent(out); 67 | } 68 | 69 | void case_expr::accept(prod_visitor *v) 70 | { 71 | v->visit(this); 72 | condition->accept(v); 73 | true_expr->accept(v); 74 | false_expr->accept(v); 75 | } 76 | 77 | column_reference::column_reference(prod *p, sqltype *type_constraint) : value_expr(p) 78 | { 79 | if (type_constraint) { 80 | auto pairs = scope->refs_of_type(type_constraint); 81 | auto picked = random_pick(pairs); 82 | reference += picked.first->ident() 83 | + "." + picked.second.name; 84 | type = picked.second.type; 85 | assert(type_constraint->consistent(type)); 86 | } else { 87 | named_relation *r = random_pick(scope->refs); 88 | 89 | reference += r->ident() + "."; 90 | column &c = random_pick(r->columns()); 91 | type = c.type; 92 | reference += c.name; 93 | } 94 | } 95 | 96 | shared_ptr bool_expr::factory(prod *p) 97 | { 98 | try { 99 | if (p->level > d100()) 100 | return make_shared(p); 101 | if(d6() < 4) 102 | return make_shared(p); 103 | else if (d6() < 4) 104 | return make_shared(p); 105 | else if (d6() < 4) 106 | return make_shared(p); 107 | else if (d6() < 4) 108 | return make_shared(p); 109 | else 110 | return make_shared(p); 111 | // return make_shared(q); 112 | } catch (runtime_error &e) { 113 | } 114 | p->retry(); 115 | return factory(p); 116 | 117 | } 118 | 119 | exists_predicate::exists_predicate(prod *p) : bool_expr(p) 120 | { 121 | subquery = make_shared(this, scope); 122 | } 123 | 124 | void exists_predicate::accept(prod_visitor *v) 125 | { 126 | v->visit(this); 127 | subquery->accept(v); 128 | } 129 | 130 | void exists_predicate::out(std::ostream &out) 131 | { 132 | out << "EXISTS ("; 133 | indent(out); 134 | out << *subquery << ")"; 135 | } 136 | 137 | distinct_pred::distinct_pred(prod *p) : bool_binop(p) 138 | { 139 | lhs = make_shared(this); 140 | rhs = make_shared(this, lhs->type); 141 | } 142 | 143 | comparison_op::comparison_op(prod *p) : bool_binop(p) 144 | { 145 | auto &idx = p->scope->schema->operators_returning_type; 146 | 147 | auto iters = idx.equal_range(scope->schema->booltype); 148 | oper = random_pick(random_pick(iters)->second); 149 | 150 | lhs = value_expr::factory(this, oper->left); 151 | rhs = value_expr::factory(this, oper->right); 152 | 153 | if (oper->left == oper->right 154 | && lhs->type != rhs->type) { 155 | 156 | if (lhs->type->consistent(rhs->type)) 157 | lhs = value_expr::factory(this, rhs->type); 158 | else 159 | rhs = value_expr::factory(this, lhs->type); 160 | } 161 | } 162 | 163 | coalesce::coalesce(prod *p, sqltype *type_constraint, const char *abbrev) 164 | : value_expr(p), abbrev_(abbrev) 165 | { 166 | auto first_expr = value_expr::factory(this, type_constraint); 167 | auto second_expr = value_expr::factory(this, first_expr->type); 168 | 169 | retry_limit = 20; 170 | while(first_expr->type != second_expr->type) { 171 | retry(); 172 | if (first_expr->type->consistent(second_expr->type)) 173 | first_expr = value_expr::factory(this, second_expr->type); 174 | else 175 | second_expr = value_expr::factory(this, first_expr->type); 176 | } 177 | type = second_expr->type; 178 | 179 | value_exprs.push_back(first_expr); 180 | value_exprs.push_back(second_expr); 181 | } 182 | 183 | void coalesce::out(std::ostream &out) 184 | { 185 | out << "cast(" << abbrev_ << "("; 186 | for (auto expr = value_exprs.begin(); expr != value_exprs.end(); expr++) { 187 | out << **expr; 188 | if (expr+1 != value_exprs.end()) 189 | out << ",", indent(out); 190 | } 191 | out << ")"; 192 | out << " as " << type->name << ")"; 193 | } 194 | 195 | const_expr::const_expr(prod *p, sqltype *type_constraint) 196 | : value_expr(p), expr("") 197 | { 198 | type = type_constraint ? type_constraint : scope->schema->inttype; 199 | 200 | if (type == scope->schema->inttype) 201 | expr = to_string(d100()); 202 | else if (type == scope->schema->booltype) 203 | expr += (d6() > 3) ? scope->schema->true_literal : scope->schema->false_literal; 204 | else if (dynamic_cast(p) && (d6() > 3)) 205 | expr += "default"; 206 | else 207 | expr += "cast(null as " + type->name + ")"; 208 | } 209 | 210 | funcall::funcall(prod *p, sqltype *type_constraint, bool agg) 211 | : value_expr(p), is_aggregate(agg) 212 | { 213 | if (type_constraint == scope->schema->internaltype) 214 | fail("cannot call functions involving internal type"); 215 | 216 | auto &idx = agg ? p->scope->schema->aggregates_returning_type 217 | : (4 < d6()) ? 218 | p->scope->schema->routines_returning_type 219 | : p->scope->schema->parameterless_routines_returning_type; 220 | 221 | retry: 222 | 223 | if (!type_constraint) { 224 | proc = random_pick(random_pick(idx.begin(), idx.end())->second); 225 | } else { 226 | auto iters = idx.equal_range(type_constraint); 227 | proc = random_pick(random_pick(iters)->second); 228 | if (proc && !type_constraint->consistent(proc->restype)) { 229 | retry(); 230 | goto retry; 231 | } 232 | } 233 | 234 | if (!proc) { 235 | retry(); 236 | goto retry; 237 | } 238 | 239 | if (type_constraint) 240 | type = type_constraint; 241 | else 242 | type = proc->restype; 243 | 244 | if (type == scope->schema->internaltype) { 245 | retry(); 246 | goto retry; 247 | } 248 | 249 | for (auto type : proc->argtypes) 250 | if (type == scope->schema->internaltype 251 | || type == scope->schema->arraytype) { 252 | retry(); 253 | goto retry; 254 | } 255 | 256 | for (auto argtype : proc->argtypes) { 257 | assert(argtype); 258 | auto expr = value_expr::factory(this, argtype); 259 | parms.push_back(expr); 260 | } 261 | } 262 | 263 | void funcall::out(std::ostream &out) 264 | { 265 | out << proc->ident() << "("; 266 | for (auto expr = parms.begin(); expr != parms.end(); expr++) { 267 | indent(out); 268 | out << "cast(" << **expr << " as " << (*expr)->type->name << ")"; 269 | if (expr+1 != parms.end()) 270 | out << ","; 271 | } 272 | 273 | if (is_aggregate && (parms.begin() == parms.end())) 274 | out << "*"; 275 | out << ")"; 276 | } 277 | 278 | atomic_subselect::atomic_subselect(prod *p, sqltype *type_constraint) 279 | : value_expr(p), offset((d6() == 6) ? d100() : d6()) 280 | { 281 | match(); 282 | if (d6() < 3) { 283 | if (type_constraint) { 284 | auto idx = scope->schema->aggregates_returning_type; 285 | auto iters = idx.equal_range(type_constraint); 286 | agg = random_pick(random_pick(iters)->second); 287 | } else { 288 | agg = &random_pick<>(scope->schema->aggregates); 289 | } 290 | if (agg->argtypes.size() != 1) 291 | agg = 0; 292 | else 293 | type_constraint = agg->argtypes[0]; 294 | } else { 295 | agg = 0; 296 | } 297 | 298 | if (type_constraint) { 299 | auto idx = scope->schema->tables_with_columns_of_type; 300 | col = 0; 301 | auto iters = idx.equal_range(type_constraint); 302 | tab = random_pick(random_pick(iters)->second); 303 | 304 | for (auto &cand : tab->columns()) { 305 | if (type_constraint->consistent(cand.type)) { 306 | col = &cand; 307 | break; 308 | } 309 | } 310 | assert(col); 311 | } else { 312 | tab = &random_pick<>(scope->schema->tables); 313 | col = &random_pick<>(tab->columns()); 314 | } 315 | 316 | type = agg ? agg->restype : col->type; 317 | } 318 | 319 | void atomic_subselect::out(std::ostream &out) 320 | { 321 | out << "(select "; 322 | 323 | if (agg) 324 | out << agg->ident() << "(" << col->name << ")"; 325 | else 326 | out << col->name; 327 | 328 | out << " from " << tab->ident(); 329 | 330 | if (!agg) 331 | out << " limit 1 offset " << offset; 332 | 333 | out << ")"; 334 | indent(out); 335 | } 336 | 337 | void window_function::out(std::ostream &out) 338 | { 339 | indent(out); 340 | out << *aggregate << " over (partition by "; 341 | 342 | for (auto ref = partition_by.begin(); ref != partition_by.end(); ref++) { 343 | out << **ref; 344 | if (ref+1 != partition_by.end()) 345 | out << ","; 346 | } 347 | 348 | out << " order by "; 349 | 350 | for (auto ref = order_by.begin(); ref != order_by.end(); ref++) { 351 | out << **ref; 352 | if (ref+1 != order_by.end()) 353 | out << ","; 354 | } 355 | 356 | out << ")"; 357 | } 358 | 359 | window_function::window_function(prod *p, sqltype *type_constraint) 360 | : value_expr(p) 361 | { 362 | match(); 363 | aggregate = make_shared(this, type_constraint, true); 364 | type = aggregate->type; 365 | partition_by.push_back(make_shared(this)); 366 | while(d6() > 4) 367 | partition_by.push_back(make_shared(this)); 368 | 369 | order_by.push_back(make_shared(this)); 370 | while(d6() > 4) 371 | order_by.push_back(make_shared(this)); 372 | } 373 | 374 | bool window_function::allowed(prod *p) 375 | { 376 | if (dynamic_cast(p)) 377 | return dynamic_cast(p->pprod) ? true : false; 378 | if (dynamic_cast(p)) 379 | return false; 380 | if (dynamic_cast(p)) 381 | return allowed(p->pprod); 382 | return false; 383 | } 384 | -------------------------------------------------------------------------------- /expr.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief grammar: Value expression productions 3 | 4 | #ifndef EXPR_HH 5 | #define EXPR_HH 6 | 7 | #include "prod.hh" 8 | #include 9 | 10 | using std::shared_ptr; 11 | using std::vector; 12 | using std::string; 13 | 14 | struct value_expr: prod { 15 | sqltype *type; 16 | virtual void out(std::ostream &out) = 0; 17 | virtual ~value_expr() { } 18 | value_expr(prod *p) : prod(p) { } 19 | static shared_ptr factory(prod *p, sqltype *type_constraint = 0); 20 | }; 21 | 22 | struct case_expr : value_expr { 23 | shared_ptr condition; 24 | shared_ptr true_expr; 25 | shared_ptr false_expr; 26 | case_expr(prod *p, sqltype *type_constraint = 0); 27 | virtual void out(std::ostream &out); 28 | virtual void accept(prod_visitor *v); 29 | }; 30 | 31 | struct funcall : value_expr { 32 | routine *proc; 33 | bool is_aggregate; 34 | vector > parms; 35 | virtual void out(std::ostream &out); 36 | virtual ~funcall() { } 37 | funcall(prod *p, sqltype *type_constraint = 0, bool agg = 0); 38 | virtual void accept(prod_visitor *v) { 39 | v->visit(this); 40 | for (auto p : parms) 41 | p->accept(v); 42 | } 43 | }; 44 | 45 | struct atomic_subselect : value_expr { 46 | table *tab; 47 | column *col; 48 | int offset; 49 | routine *agg; 50 | atomic_subselect(prod *p, sqltype *type_constraint = 0); 51 | virtual void out(std::ostream &out); 52 | }; 53 | 54 | struct const_expr: value_expr { 55 | std::string expr; 56 | const_expr(prod *p, sqltype *type_constraint = 0); 57 | virtual void out(std::ostream &out) { out << expr; } 58 | virtual ~const_expr() { } 59 | }; 60 | 61 | struct column_reference: value_expr { 62 | column_reference(prod *p, sqltype *type_constraint = 0); 63 | virtual void out(std::ostream &out) { out << reference; } 64 | std::string reference; 65 | virtual ~column_reference() { } 66 | }; 67 | 68 | struct coalesce : value_expr { 69 | const char *abbrev_; 70 | vector > value_exprs; 71 | virtual ~coalesce() { }; 72 | coalesce(prod *p, sqltype *type_constraint = 0, const char *abbrev = "coalesce"); 73 | virtual void out(std::ostream &out); 74 | virtual void accept(prod_visitor *v) { 75 | v->visit(this); 76 | for (auto p : value_exprs) 77 | p->accept(v); 78 | } 79 | }; 80 | 81 | struct nullif : coalesce { 82 | virtual ~nullif() { }; 83 | nullif(prod *p, sqltype *type_constraint = 0) 84 | : coalesce(p, type_constraint, "nullif") 85 | { }; 86 | }; 87 | 88 | struct bool_expr : value_expr { 89 | virtual ~bool_expr() { } 90 | bool_expr(prod *p) : value_expr(p) { type = scope->schema->booltype; } 91 | static shared_ptr factory(prod *p); 92 | }; 93 | 94 | struct truth_value : bool_expr { 95 | virtual ~truth_value() { } 96 | const char *op; 97 | virtual void out(std::ostream &out) { out << op; } 98 | truth_value(prod *p) : bool_expr(p) { 99 | op = ( (d6() < 4) ? scope->schema->true_literal : scope->schema->false_literal); 100 | } 101 | }; 102 | 103 | struct null_predicate : bool_expr { 104 | virtual ~null_predicate() { } 105 | const char *negate; 106 | shared_ptr expr; 107 | null_predicate(prod *p) : bool_expr(p) { 108 | negate = ((d6()<4) ? "not " : ""); 109 | expr = value_expr::factory(this); 110 | } 111 | virtual void out(std::ostream &out) { 112 | out << *expr << " is " << negate << "NULL"; 113 | } 114 | virtual void accept(prod_visitor *v) { 115 | v->visit(this); 116 | expr->accept(v); 117 | } 118 | }; 119 | 120 | struct exists_predicate : bool_expr { 121 | shared_ptr subquery; 122 | virtual ~exists_predicate() { } 123 | exists_predicate(prod *p); 124 | virtual void out(std::ostream &out); 125 | virtual void accept(prod_visitor *v); 126 | }; 127 | 128 | struct bool_binop : bool_expr { 129 | shared_ptr lhs, rhs; 130 | bool_binop(prod *p) : bool_expr(p) { } 131 | virtual void out(std::ostream &out) = 0; 132 | virtual void accept(prod_visitor *v) { 133 | v->visit(this); 134 | lhs->accept(v); 135 | rhs->accept(v); 136 | } 137 | }; 138 | 139 | struct bool_term : bool_binop { 140 | virtual ~bool_term() { } 141 | const char *op; 142 | virtual void out(std::ostream &out) { 143 | out << "(" << *lhs << ") "; 144 | indent(out); 145 | out << op << " (" << *rhs << ")"; 146 | } 147 | bool_term(prod *p) : bool_binop(p) 148 | { 149 | op = ((d6()<4) ? "or" : "and"); 150 | lhs = bool_expr::factory(this); 151 | rhs = bool_expr::factory(this); 152 | } 153 | }; 154 | 155 | struct distinct_pred : bool_binop { 156 | distinct_pred(prod *p); 157 | virtual ~distinct_pred() { }; 158 | virtual void out(std::ostream &o) { 159 | o << *lhs << " is distinct from " << *rhs; 160 | } 161 | }; 162 | 163 | struct comparison_op : bool_binop { 164 | op *oper; 165 | comparison_op(prod *p); 166 | virtual ~comparison_op() { }; 167 | virtual void out(std::ostream &o) { 168 | o << *lhs << " " << oper->name << " " << *rhs; 169 | } 170 | }; 171 | 172 | struct window_function : value_expr { 173 | virtual void out(std::ostream &out); 174 | virtual ~window_function() { } 175 | window_function(prod *p, sqltype *type_constraint); 176 | vector > partition_by; 177 | vector > order_by; 178 | shared_ptr aggregate; 179 | static bool allowed(prod *pprod); 180 | virtual void accept(prod_visitor *v) { 181 | v->visit(this); 182 | aggregate->accept(v); 183 | for (auto p : partition_by) 184 | p->accept(v); 185 | for (auto p : order_by) 186 | p->accept(v); 187 | } 188 | }; 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /grammar.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "random.hh" 8 | #include "relmodel.hh" 9 | #include "grammar.hh" 10 | #include "schema.hh" 11 | #include "impedance.hh" 12 | 13 | using namespace std; 14 | 15 | shared_ptr table_ref::factory(prod *p) { 16 | try { 17 | if (p->level < 3 + d6()) { 18 | if (d6() > 3 && p->level < d6()) 19 | return make_shared(p); 20 | if (d6() > 3) 21 | return make_shared(p); 22 | } 23 | if (d6() > 3) 24 | return make_shared(p); 25 | else 26 | return make_shared(p); 27 | } catch (runtime_error &e) { 28 | p->retry(); 29 | } 30 | return factory(p); 31 | } 32 | 33 | table_or_query_name::table_or_query_name(prod *p) : table_ref(p) { 34 | t = random_pick(scope->tables); 35 | refs.push_back(make_shared(scope->stmt_uid("ref"), t)); 36 | } 37 | 38 | void table_or_query_name::out(std::ostream &out) { 39 | out << t->ident() << " as " << refs[0]->ident(); 40 | } 41 | 42 | target_table::target_table(prod *p, table *victim) : table_ref(p) 43 | { 44 | while (! victim 45 | || victim->schema == "pg_catalog" 46 | || !victim->is_base_table 47 | || !victim->columns().size()) { 48 | struct named_relation *pick = random_pick(scope->tables); 49 | victim = dynamic_cast(pick); 50 | retry(); 51 | } 52 | victim_ = victim; 53 | refs.push_back(make_shared(scope->stmt_uid("target"), victim)); 54 | } 55 | 56 | void target_table::out(std::ostream &out) { 57 | out << victim_->ident() << " as " << refs[0]->ident(); 58 | } 59 | 60 | table_sample::table_sample(prod *p) : table_ref(p) { 61 | match(); 62 | retry_limit = 1000; /* retries are cheap here */ 63 | do { 64 | auto pick = random_pick(scope->schema->base_tables); 65 | t = dynamic_cast(pick); 66 | retry(); 67 | } while (!t || !t->is_base_table); 68 | 69 | refs.push_back(make_shared(scope->stmt_uid("sample"), t)); 70 | percent = 0.1 * d100(); 71 | method = (d6() > 2) ? "system" : "bernoulli"; 72 | } 73 | 74 | void table_sample::out(std::ostream &out) { 75 | out << t->ident() << 76 | " as " << refs[0]->ident() << 77 | " tablesample " << method << 78 | " (" << percent << ") "; 79 | } 80 | 81 | table_subquery::table_subquery(prod *p, bool lateral) 82 | : table_ref(p), is_lateral(lateral) { 83 | query = make_shared(this, scope, lateral); 84 | string alias = scope->stmt_uid("subq"); 85 | relation *aliased_rel = &query->select_list->derived_table; 86 | refs.push_back(make_shared(alias, aliased_rel)); 87 | } 88 | 89 | table_subquery::~table_subquery() { } 90 | 91 | void table_subquery::accept(prod_visitor *v) { 92 | query->accept(v); 93 | v->visit(this); 94 | } 95 | 96 | shared_ptr join_cond::factory(prod *p, table_ref &lhs, table_ref &rhs) 97 | { 98 | try { 99 | if (d6() < 6) 100 | return make_shared(p, lhs, rhs); 101 | else 102 | return make_shared(p, lhs, rhs); 103 | } catch (runtime_error &e) { 104 | p->retry(); 105 | } 106 | return factory(p, lhs, rhs); 107 | } 108 | 109 | simple_join_cond::simple_join_cond(prod *p, table_ref &lhs, table_ref &rhs) 110 | : join_cond(p, lhs, rhs) 111 | { 112 | retry: 113 | named_relation *left_rel = &*random_pick(lhs.refs); 114 | 115 | if (!left_rel->columns().size()) 116 | { retry(); goto retry; } 117 | 118 | named_relation *right_rel = &*random_pick(rhs.refs); 119 | 120 | column &c1 = random_pick(left_rel->columns()); 121 | 122 | for (auto c2 : right_rel->columns()) { 123 | if (c1.type == c2.type) { 124 | condition += 125 | left_rel->ident() + "." + c1.name + " = " + right_rel->ident() + "." + c2.name + " "; 126 | break; 127 | } 128 | } 129 | if (condition == "") { 130 | retry(); goto retry; 131 | } 132 | } 133 | 134 | void simple_join_cond::out(std::ostream &out) { 135 | out << condition; 136 | } 137 | 138 | expr_join_cond::expr_join_cond(prod *p, table_ref &lhs, table_ref &rhs) 139 | : join_cond(p, lhs, rhs), joinscope(p->scope) 140 | { 141 | scope = &joinscope; 142 | for (auto ref: lhs.refs) 143 | joinscope.refs.push_back(&*ref); 144 | for (auto ref: rhs.refs) 145 | joinscope.refs.push_back(&*ref); 146 | search = bool_expr::factory(this); 147 | } 148 | 149 | void expr_join_cond::out(std::ostream &out) { 150 | out << *search; 151 | } 152 | 153 | joined_table::joined_table(prod *p) : table_ref(p) { 154 | lhs = table_ref::factory(this); 155 | rhs = table_ref::factory(this); 156 | 157 | condition = join_cond::factory(this, *lhs, *rhs); 158 | 159 | if (d6()<4) { 160 | type = "inner"; 161 | } else if (d6()<4) { 162 | type = "left"; 163 | } else { 164 | type = "right"; 165 | } 166 | 167 | for (auto ref: lhs->refs) 168 | refs.push_back(ref); 169 | for (auto ref: rhs->refs) 170 | refs.push_back(ref); 171 | } 172 | 173 | void joined_table::out(std::ostream &out) { 174 | out << *lhs; 175 | indent(out); 176 | out << type << " join " << *rhs; 177 | indent(out); 178 | out << "on (" << *condition << ")"; 179 | } 180 | 181 | void table_subquery::out(std::ostream &out) { 182 | if (is_lateral) 183 | out << "lateral "; 184 | out << "(" << *query << ") as " << refs[0]->ident(); 185 | } 186 | 187 | void from_clause::out(std::ostream &out) { 188 | if (! reflist.size()) 189 | return; 190 | out << "from "; 191 | 192 | for (auto r = reflist.begin(); r < reflist.end(); r++) { 193 | indent(out); 194 | out << **r; 195 | if (r + 1 != reflist.end()) 196 | out << ","; 197 | } 198 | } 199 | 200 | from_clause::from_clause(prod *p) : prod(p) { 201 | reflist.push_back(table_ref::factory(this)); 202 | for (auto r : reflist.back()->refs) 203 | scope->refs.push_back(&*r); 204 | 205 | while (d6() > 5) { 206 | // add a lateral subquery 207 | if (!impedance::matched(typeid(lateral_subquery))) 208 | break; 209 | reflist.push_back(make_shared(this)); 210 | for (auto r : reflist.back()->refs) 211 | scope->refs.push_back(&*r); 212 | } 213 | } 214 | 215 | select_list::select_list(prod *p) : prod(p) 216 | { 217 | do { 218 | shared_ptr e = value_expr::factory(this); 219 | value_exprs.push_back(e); 220 | ostringstream name; 221 | name << "c" << columns++; 222 | sqltype *t=e->type; 223 | assert(t); 224 | derived_table.columns().push_back(column(name.str(), t)); 225 | } while (d6() > 1); 226 | } 227 | 228 | void select_list::out(std::ostream &out) 229 | { 230 | int i = 0; 231 | for (auto expr = value_exprs.begin(); expr != value_exprs.end(); expr++) { 232 | indent(out); 233 | out << **expr << " as " << derived_table.columns()[i].name; 234 | i++; 235 | if (expr+1 != value_exprs.end()) 236 | out << ", "; 237 | } 238 | } 239 | 240 | void query_spec::out(std::ostream &out) { 241 | out << "select " << set_quantifier << " " 242 | << *select_list; 243 | indent(out); 244 | out << *from_clause; 245 | indent(out); 246 | out << "where "; 247 | out << *search; 248 | if (limit_clause.length()) { 249 | indent(out); 250 | out << limit_clause; 251 | } 252 | } 253 | 254 | struct for_update_verify : prod_visitor { 255 | virtual void visit(prod *p) { 256 | if (dynamic_cast(p)) 257 | throw("window function"); 258 | joined_table* join = dynamic_cast(p); 259 | if (join && join->type != "inner") 260 | throw("outer join"); 261 | query_spec* subquery = dynamic_cast(p); 262 | if (subquery) 263 | subquery->set_quantifier = ""; 264 | table_or_query_name* tab = dynamic_cast(p); 265 | if (tab) { 266 | table *actual_table = dynamic_cast(tab->t); 267 | if (actual_table && !actual_table->is_insertable) 268 | throw("read only"); 269 | if (actual_table->name.find("pg_")) 270 | throw("catalog"); 271 | } 272 | table_sample* sample = dynamic_cast(p); 273 | if (sample) { 274 | table *actual_table = dynamic_cast(sample->t); 275 | if (actual_table && !actual_table->is_insertable) 276 | throw("read only"); 277 | if (actual_table->name.find("pg_")) 278 | throw("catalog"); 279 | } 280 | } ; 281 | }; 282 | 283 | 284 | select_for_update::select_for_update(prod *p, struct scope *s, bool lateral) 285 | : query_spec(p,s,lateral) 286 | { 287 | static const char *modes[] = { 288 | "update", 289 | "share", 290 | "no key update", 291 | "key share", 292 | }; 293 | 294 | try { 295 | for_update_verify v1; 296 | this->accept(&v1); 297 | 298 | } catch (const char* reason) { 299 | lockmode = 0; 300 | return; 301 | } 302 | lockmode = modes[d6()%(sizeof(modes)/sizeof(*modes))]; 303 | set_quantifier = ""; // disallow distinct 304 | } 305 | 306 | void select_for_update::out(std::ostream &out) { 307 | query_spec::out(out); 308 | if (lockmode) { 309 | indent(out); 310 | out << " for " << lockmode; 311 | } 312 | } 313 | 314 | query_spec::query_spec(prod *p, struct scope *s, bool lateral) : 315 | prod(p), myscope(s) 316 | { 317 | scope = &myscope; 318 | scope->tables = s->tables; 319 | 320 | if (lateral) 321 | scope->refs = s->refs; 322 | 323 | from_clause = make_shared(this); 324 | select_list = make_shared(this); 325 | 326 | set_quantifier = (d100() == 1) ? "distinct" : ""; 327 | 328 | search = bool_expr::factory(this); 329 | 330 | if (d6() > 2) { 331 | ostringstream cons; 332 | cons << "limit " << d100() + d100(); 333 | limit_clause = cons.str(); 334 | } 335 | } 336 | 337 | long prepare_stmt::seq; 338 | 339 | void modifying_stmt::pick_victim() 340 | { 341 | do { 342 | struct named_relation *pick = random_pick(scope->tables); 343 | victim = dynamic_cast(pick); 344 | retry(); 345 | } while (! victim 346 | || victim->schema == "pg_catalog" 347 | || !victim->is_base_table 348 | || !victim->columns().size()); 349 | } 350 | 351 | modifying_stmt::modifying_stmt(prod *p, struct scope *s, table *victim) 352 | : prod(p), myscope(s) 353 | { 354 | scope = &myscope; 355 | scope->tables = s->tables; 356 | 357 | if (!victim) 358 | pick_victim(); 359 | } 360 | 361 | 362 | delete_stmt::delete_stmt(prod *p, struct scope *s, table *v) 363 | : modifying_stmt(p,s,v) { 364 | scope->refs.push_back(victim); 365 | search = bool_expr::factory(this); 366 | } 367 | 368 | delete_returning::delete_returning(prod *p, struct scope *s, table *victim) 369 | : delete_stmt(p, s, victim) { 370 | match(); 371 | select_list = make_shared(this); 372 | } 373 | 374 | insert_stmt::insert_stmt(prod *p, struct scope *s, table *v) 375 | : modifying_stmt(p, s, v) 376 | { 377 | match(); 378 | 379 | for (auto col : victim->columns()) { 380 | auto expr = value_expr::factory(this, col.type); 381 | assert(expr->type == col.type); 382 | value_exprs.push_back(expr); 383 | } 384 | } 385 | 386 | void insert_stmt::out(std::ostream &out) 387 | { 388 | out << "insert into " << victim->ident() << " "; 389 | 390 | if (!value_exprs.size()) { 391 | out << "default values"; 392 | return; 393 | } 394 | 395 | out << "values ("; 396 | 397 | for (auto expr = value_exprs.begin(); 398 | expr != value_exprs.end(); 399 | expr++) { 400 | indent(out); 401 | out << **expr; 402 | if (expr+1 != value_exprs.end()) 403 | out << ", "; 404 | } 405 | out << ")"; 406 | } 407 | 408 | set_list::set_list(prod *p, table *target) : prod(p) 409 | { 410 | do { 411 | for (auto col : target->columns()) { 412 | if (d6() < 4) 413 | continue; 414 | auto expr = value_expr::factory(this, col.type); 415 | value_exprs.push_back(expr); 416 | names.push_back(col.name); 417 | } 418 | } while (!names.size()); 419 | } 420 | 421 | void set_list::out(std::ostream &out) 422 | { 423 | assert(names.size()); 424 | out << " set "; 425 | for (size_t i = 0; i < names.size(); i++) { 426 | indent(out); 427 | out << names[i] << " = " << *value_exprs[i]; 428 | if (i+1 != names.size()) 429 | out << ", "; 430 | } 431 | } 432 | 433 | update_stmt::update_stmt(prod *p, struct scope *s, table *v) 434 | : modifying_stmt(p, s, v) { 435 | scope->refs.push_back(victim); 436 | search = bool_expr::factory(this); 437 | set_list = make_shared(this, victim); 438 | } 439 | 440 | void update_stmt::out(std::ostream &out) 441 | { 442 | out << "update " << victim->ident() << *set_list; 443 | } 444 | 445 | update_returning::update_returning(prod *p, struct scope *s, table *v) 446 | : update_stmt(p, s, v) { 447 | match(); 448 | 449 | select_list = make_shared(this); 450 | } 451 | 452 | 453 | upsert_stmt::upsert_stmt(prod *p, struct scope *s, table *v) 454 | : insert_stmt(p,s,v) 455 | { 456 | match(); 457 | 458 | if (!victim->constraints.size()) 459 | fail("need table w/ constraint for upsert"); 460 | 461 | set_list = std::make_shared(this, victim); 462 | search = bool_expr::factory(this); 463 | constraint = random_pick(victim->constraints); 464 | } 465 | 466 | shared_ptr statement_factory(struct scope *s) 467 | { 468 | try { 469 | s->new_stmt(); 470 | if (d42() == 1) 471 | return make_shared((struct prod *)0, s); 472 | if (d42() == 1) 473 | return make_shared((struct prod *)0, s); 474 | else if (d42() == 1) 475 | return make_shared((struct prod *)0, s); 476 | else if (d42() == 1) { 477 | return make_shared((struct prod *)0, s); 478 | } else if (d42() == 1) 479 | return make_shared((struct prod *)0, s); 480 | else if (d6() > 4) 481 | return make_shared((struct prod *)0, s); 482 | else if (d6() > 5) 483 | return make_shared((struct prod *)0, s); 484 | return make_shared((struct prod *)0, s); 485 | } catch (runtime_error &e) { 486 | return statement_factory(s); 487 | } 488 | } 489 | 490 | void common_table_expression::accept(prod_visitor *v) 491 | { 492 | v->visit(this); 493 | for(auto q : with_queries) 494 | q->accept(v); 495 | query->accept(v); 496 | } 497 | 498 | common_table_expression::common_table_expression(prod *parent, struct scope *s) 499 | : prod(parent), myscope(s) 500 | { 501 | scope = &myscope; 502 | do { 503 | shared_ptr query = make_shared(this, s); 504 | with_queries.push_back(query); 505 | string alias = scope->stmt_uid("jennifer"); 506 | relation *relation = &query->select_list->derived_table; 507 | auto aliased_rel = make_shared(alias, relation); 508 | refs.push_back(aliased_rel); 509 | scope->tables.push_back(&*aliased_rel); 510 | 511 | } while (d6() > 2); 512 | 513 | retry: 514 | do { 515 | auto pick = random_pick(s->tables); 516 | scope->tables.push_back(pick); 517 | } while (d6() > 3); 518 | try { 519 | query = make_shared(this, scope); 520 | } catch (runtime_error &e) { 521 | retry(); 522 | goto retry; 523 | } 524 | 525 | } 526 | 527 | void common_table_expression::out(std::ostream &out) 528 | { 529 | out << "WITH " ; 530 | for (size_t i = 0; i < with_queries.size(); i++) { 531 | indent(out); 532 | out << refs[i]->ident() << " AS " << "(" << *with_queries[i] << ")"; 533 | if (i+1 != with_queries.size()) 534 | out << ", "; 535 | indent(out); 536 | } 537 | out << *query; 538 | indent(out); 539 | } 540 | 541 | merge_stmt::merge_stmt(prod *p, struct scope *s, table *v) 542 | : modifying_stmt(p,s,v) { 543 | match(); 544 | target_table_ = make_shared(this, victim); 545 | data_source = table_ref::factory(this); 546 | // join_condition = join_cond::factory(this, *target_table_, *data_source); 547 | join_condition = make_shared(this, *target_table_, *data_source); 548 | 549 | 550 | /* Put data_source into scope but not target_table. Visibility of 551 | the latter varies depending on kind of when clause. */ 552 | // for (auto r : data_source->refs) 553 | // scope->refs.push_back(&*r); 554 | 555 | clauselist.push_back(when_clause::factory(this)); 556 | while (d6()>4) 557 | clauselist.push_back(when_clause::factory(this)); 558 | } 559 | 560 | void merge_stmt::out(std::ostream &out) 561 | { 562 | out << "MERGE INTO " << *target_table_; 563 | indent(out); 564 | out << "USING " << *data_source; 565 | indent(out); 566 | out << "ON " << *join_condition; 567 | indent(out); 568 | for (auto p : clauselist) { 569 | out << *p; 570 | indent(out); 571 | } 572 | } 573 | 574 | void merge_stmt::accept(prod_visitor *v) 575 | { 576 | v->visit(this); 577 | target_table_->accept(v); 578 | data_source->accept(v); 579 | join_condition->accept(v); 580 | for (auto p : clauselist) 581 | p->accept(v); 582 | 583 | } 584 | 585 | when_clause::when_clause(merge_stmt *p) 586 | : prod(p) 587 | { 588 | condition = bool_expr::factory(this); 589 | matched = d6() > 3; 590 | } 591 | 592 | void when_clause::out(std::ostream &out) 593 | { 594 | out << (matched ? "WHEN MATCHED " : "WHEN NOT MATCHED"); 595 | indent(out); 596 | out << "AND " << *condition; 597 | indent(out); 598 | out << " THEN "; 599 | out << (matched ? "DELETE" : "DO NOTHING"); 600 | } 601 | 602 | void when_clause::accept(prod_visitor *v) 603 | { 604 | v->visit(this); 605 | condition->accept(v); 606 | } 607 | 608 | when_clause_update::when_clause_update(merge_stmt *p) 609 | : when_clause(p), myscope(p->scope) 610 | { 611 | myscope.tables = scope->tables; 612 | myscope.refs = scope->refs; 613 | scope = &myscope; 614 | scope->refs.push_back(&*(p->target_table_->refs[0])); 615 | 616 | set_list = std::make_shared(this, p->victim); 617 | } 618 | 619 | void when_clause_update::out(std::ostream &out) { 620 | out << "WHEN MATCHED AND " << *condition; 621 | indent(out); 622 | out << " THEN UPDATE " << *set_list; 623 | } 624 | 625 | void when_clause_update::accept(prod_visitor *v) 626 | { 627 | v->visit(this); 628 | set_list->accept(v); 629 | } 630 | 631 | 632 | when_clause_insert::when_clause_insert(struct merge_stmt *p) 633 | : when_clause(p) 634 | { 635 | for (auto col : p->victim->columns()) { 636 | auto expr = value_expr::factory(this, col.type); 637 | assert(expr->type == col.type); 638 | exprs.push_back(expr); 639 | } 640 | } 641 | 642 | void when_clause_insert::out(std::ostream &out) { 643 | out << "WHEN NOT MATCHED AND " << *condition; 644 | indent(out); 645 | out << " THEN INSERT VALUES ( "; 646 | 647 | for (auto expr = exprs.begin(); 648 | expr != exprs.end(); 649 | expr++) { 650 | out << **expr; 651 | if (expr+1 != exprs.end()) 652 | out << ", "; 653 | } 654 | out << ")"; 655 | 656 | } 657 | 658 | void when_clause_insert::accept(prod_visitor *v) 659 | { 660 | v->visit(this); 661 | for (auto p : exprs) 662 | p->accept(v); 663 | } 664 | 665 | shared_ptr when_clause::factory(struct merge_stmt *p) 666 | { 667 | try { 668 | switch(d6()) { 669 | case 1: 670 | case 2: 671 | return make_shared(p); 672 | case 3: 673 | case 4: 674 | return make_shared(p); 675 | default: 676 | return make_shared(p); 677 | } 678 | } catch (runtime_error &e) { 679 | p->retry(); 680 | } 681 | return factory(p); 682 | } 683 | 684 | -------------------------------------------------------------------------------- /grammar.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief grammar: Top-level and unsorted grammar productions 3 | 4 | #ifndef GRAMMAR_HH 5 | #define GRAMMAR_HH 6 | 7 | #include 8 | #include "relmodel.hh" 9 | #include 10 | #include "schema.hh" 11 | 12 | #include "prod.hh" 13 | #include "expr.hh" 14 | 15 | using std::shared_ptr; 16 | 17 | struct table_ref : prod { 18 | vector > refs; 19 | static shared_ptr factory(prod *p); 20 | table_ref(prod *p) : prod(p) { } 21 | virtual ~table_ref() { } 22 | }; 23 | 24 | struct table_or_query_name : table_ref { 25 | virtual void out(std::ostream &out); 26 | table_or_query_name(prod *p); 27 | virtual ~table_or_query_name() { } 28 | named_relation *t; 29 | }; 30 | 31 | struct target_table : table_ref { 32 | virtual void out(std::ostream &out); 33 | target_table(prod *p, table *victim = 0); 34 | virtual ~target_table() { } 35 | table *victim_; 36 | }; 37 | 38 | struct table_sample : table_ref { 39 | virtual void out(std::ostream &out); 40 | table_sample(prod *p); 41 | virtual ~table_sample() { } 42 | struct table *t; 43 | private: 44 | string method; 45 | double percent; 46 | }; 47 | 48 | struct table_subquery : table_ref { 49 | bool is_lateral; 50 | virtual void out(std::ostream &out); 51 | shared_ptr query; 52 | table_subquery(prod *p, bool lateral = false); 53 | virtual ~table_subquery(); 54 | virtual void accept(prod_visitor *v); 55 | }; 56 | 57 | struct lateral_subquery : table_subquery { 58 | lateral_subquery(prod *p) 59 | : table_subquery(p, true) { } 60 | }; 61 | 62 | struct join_cond : prod { 63 | static shared_ptr factory(prod *p, table_ref &lhs, table_ref &rhs); 64 | join_cond(prod *p, table_ref &lhs, table_ref &rhs) 65 | : prod(p) { (void) lhs; (void) rhs;} 66 | }; 67 | 68 | struct simple_join_cond : join_cond { 69 | std::string condition; 70 | simple_join_cond(prod *p, table_ref &lhs, table_ref &rhs); 71 | virtual void out(std::ostream &out); 72 | }; 73 | 74 | struct expr_join_cond : join_cond { 75 | struct scope joinscope; 76 | shared_ptr search; 77 | expr_join_cond(prod *p, table_ref &lhs, table_ref &rhs); 78 | virtual void out(std::ostream &out); 79 | virtual void accept(prod_visitor *v) { 80 | search->accept(v); 81 | v->visit(this); 82 | } 83 | }; 84 | 85 | struct joined_table : table_ref { 86 | virtual void out(std::ostream &out); 87 | joined_table(prod *p); 88 | std::string type; 89 | std::string alias; 90 | virtual std::string ident() { return alias; } 91 | shared_ptr lhs; 92 | shared_ptr rhs; 93 | shared_ptr condition; 94 | virtual ~joined_table() { 95 | } 96 | virtual void accept(prod_visitor *v) { 97 | lhs->accept(v); 98 | rhs->accept(v); 99 | condition->accept(v); 100 | v->visit(this); 101 | } 102 | }; 103 | 104 | struct from_clause : prod { 105 | std::vector > reflist; 106 | virtual void out(std::ostream &out); 107 | from_clause(prod *p); 108 | ~from_clause() { } 109 | virtual void accept(prod_visitor *v) { 110 | v->visit(this); 111 | for (auto p : reflist) 112 | p->accept(v); 113 | } 114 | }; 115 | 116 | struct select_list : prod { 117 | std::vector > value_exprs; 118 | relation derived_table; 119 | int columns = 0; 120 | select_list(prod *p); 121 | virtual void out(std::ostream &out); 122 | ~select_list() { } 123 | virtual void accept(prod_visitor *v) { 124 | v->visit(this); 125 | for (auto p : value_exprs) 126 | p->accept(v); 127 | } 128 | }; 129 | 130 | struct query_spec : prod { 131 | std::string set_quantifier; 132 | shared_ptr from_clause; 133 | shared_ptr select_list; 134 | shared_ptr search; 135 | std::string limit_clause; 136 | struct scope myscope; 137 | virtual void out(std::ostream &out); 138 | query_spec(prod *p, struct scope *s, bool lateral = 0); 139 | virtual void accept(prod_visitor *v) { 140 | v->visit(this); 141 | select_list->accept(v); 142 | from_clause->accept(v); 143 | search->accept(v); 144 | } 145 | }; 146 | 147 | struct select_for_update : query_spec { 148 | const char *lockmode; 149 | virtual void out(std::ostream &out); 150 | select_for_update(prod *p, struct scope *s, bool lateral = 0); 151 | }; 152 | 153 | struct prepare_stmt : prod { 154 | query_spec q; 155 | static long seq; 156 | long id; 157 | virtual void out(std::ostream &out) { 158 | out << "prepare prep" << id << " as " << q; 159 | } 160 | prepare_stmt(prod *p) : prod(p), q(p, scope) { 161 | id = seq++; 162 | } 163 | virtual void accept(prod_visitor *v) { 164 | v->visit(this); 165 | q.accept(v); 166 | } 167 | }; 168 | 169 | struct modifying_stmt : prod { 170 | table *victim; 171 | struct scope myscope; 172 | modifying_stmt(prod *p, struct scope *s, struct table *victim = 0); 173 | // shared_ptr modifying_stmt::factory(prod *p, struct scope *s); 174 | virtual void pick_victim(); 175 | }; 176 | 177 | struct delete_stmt : modifying_stmt { 178 | shared_ptr search; 179 | delete_stmt(prod *p, struct scope *s, table *v); 180 | virtual ~delete_stmt() { } 181 | virtual void out(std::ostream &out) { 182 | out << "delete from " << victim->ident(); 183 | indent(out); 184 | out << "where " << std::endl << *search; 185 | } 186 | virtual void accept(prod_visitor *v) { 187 | v->visit(this); 188 | search->accept(v); 189 | } 190 | }; 191 | 192 | struct delete_returning : delete_stmt { 193 | shared_ptr select_list; 194 | delete_returning(prod *p, struct scope *s, table *victim = 0); 195 | virtual void out(std::ostream &out) { 196 | delete_stmt::out(out); 197 | out << std::endl << "returning " << *select_list; 198 | } 199 | virtual void accept(prod_visitor *v) { 200 | v->visit(this); 201 | search->accept(v); 202 | select_list->accept(v); 203 | } 204 | }; 205 | 206 | struct insert_stmt : modifying_stmt { 207 | vector > value_exprs; 208 | insert_stmt(prod *p, struct scope *s, table *victim = 0); 209 | virtual ~insert_stmt() { } 210 | virtual void out(std::ostream &out); 211 | virtual void accept(prod_visitor *v) { 212 | v->visit(this); 213 | for (auto p : value_exprs) p->accept(v); 214 | } 215 | }; 216 | 217 | struct set_list : prod { 218 | vector > value_exprs; 219 | vector names; 220 | set_list(prod *p, table *target); 221 | virtual ~set_list() { } 222 | virtual void out(std::ostream &out); 223 | virtual void accept(prod_visitor *v) { 224 | v->visit(this); 225 | for (auto p : value_exprs) p->accept(v); 226 | } 227 | }; 228 | 229 | struct upsert_stmt : insert_stmt { 230 | shared_ptr set_list; 231 | string constraint; 232 | shared_ptr search; 233 | upsert_stmt(prod *p, struct scope *s, table *v = 0); 234 | virtual void out(std::ostream &out) { 235 | insert_stmt::out(out); 236 | out << " on conflict on constraint " << constraint << " do update "; 237 | out << *set_list << " where " << *search; 238 | } 239 | virtual void accept(prod_visitor *v) { 240 | insert_stmt::accept(v); 241 | set_list->accept(v); 242 | search->accept(v); 243 | } 244 | virtual ~upsert_stmt() { } 245 | }; 246 | 247 | struct update_stmt : modifying_stmt { 248 | shared_ptr search; 249 | shared_ptr set_list; 250 | update_stmt(prod *p, struct scope *s, table *victim = 0); 251 | virtual ~update_stmt() { } 252 | virtual void out(std::ostream &out); 253 | virtual void accept(prod_visitor *v) { 254 | v->visit(this); 255 | search->accept(v); 256 | } 257 | }; 258 | 259 | struct when_clause : prod { 260 | bool matched; 261 | shared_ptr condition; 262 | // shared_ptr merge_action; 263 | when_clause(struct merge_stmt *p); 264 | virtual ~when_clause() { } 265 | static shared_ptr factory(struct merge_stmt *p); 266 | virtual void out(std::ostream &out); 267 | virtual void accept(prod_visitor *v); 268 | }; 269 | 270 | struct when_clause_update : when_clause { 271 | shared_ptr set_list; 272 | struct scope myscope; 273 | when_clause_update(struct merge_stmt *p); 274 | virtual ~when_clause_update() { } 275 | virtual void out(std::ostream &out); 276 | virtual void accept(prod_visitor *v); 277 | }; 278 | 279 | struct when_clause_insert : when_clause { 280 | vector > exprs; 281 | when_clause_insert(struct merge_stmt *p); 282 | virtual ~when_clause_insert() { } 283 | virtual void out(std::ostream &out); 284 | virtual void accept(prod_visitor *v); 285 | }; 286 | 287 | struct merge_stmt : modifying_stmt { 288 | merge_stmt(prod *p, struct scope *s, table *victim = 0); 289 | shared_ptr target_table_; 290 | shared_ptr data_source; 291 | shared_ptr join_condition; 292 | vector > clauselist; 293 | virtual ~merge_stmt() { } 294 | virtual void out(std::ostream &out); 295 | virtual void accept(prod_visitor *v); 296 | }; 297 | 298 | struct update_returning : update_stmt { 299 | shared_ptr select_list; 300 | update_returning(prod *p, struct scope *s, table *victim = 0); 301 | virtual void out(std::ostream &out) { 302 | update_stmt::out(out); 303 | out << std::endl << "returning " << *select_list; 304 | } 305 | virtual void accept(prod_visitor *v) { 306 | v->visit(this); 307 | search->accept(v); 308 | set_list->accept(v); 309 | select_list->accept(v); 310 | } 311 | }; 312 | 313 | shared_ptr statement_factory(struct scope *s); 314 | 315 | struct common_table_expression : prod { 316 | vector > with_queries; 317 | shared_ptr query; 318 | vector > refs; 319 | struct scope myscope; 320 | virtual void out(std::ostream &out); 321 | virtual void accept(prod_visitor *v); 322 | common_table_expression(prod *parent, struct scope *s); 323 | }; 324 | 325 | #endif 326 | -------------------------------------------------------------------------------- /impedance.cc: -------------------------------------------------------------------------------- 1 | #include "impedance.hh" 2 | #include "log.hh" 3 | #include 4 | 5 | using namespace std; 6 | 7 | static map occurances_in_failed_query; 8 | static map occurances_in_ok_query; 9 | static map retries; 10 | static map limited; 11 | static map failed; 12 | 13 | impedance_visitor::impedance_visitor(map &occured) 14 | : _occured(occured) 15 | { } 16 | 17 | void impedance_visitor::visit(struct prod *p) 18 | { 19 | found[typeid(*p).name()] = true; 20 | } 21 | 22 | impedance_visitor::~impedance_visitor() 23 | { 24 | for(auto pair : found) 25 | _occured[pair.first]++; 26 | } 27 | 28 | void impedance_feedback::executed(prod &query) 29 | { 30 | impedance_visitor v(occurances_in_ok_query); 31 | query.accept(&v); 32 | } 33 | 34 | void impedance_feedback::error(prod &query, const dut::failure &e) 35 | { 36 | (void)e; 37 | impedance_visitor v(occurances_in_failed_query); 38 | query.accept(&v); 39 | } 40 | 41 | namespace impedance { 42 | 43 | bool matched(const char *name) 44 | { 45 | if (100 > occurances_in_failed_query[name]) 46 | return true; 47 | double error_rate = (double)occurances_in_failed_query[name] 48 | / (occurances_in_failed_query[name] + occurances_in_ok_query[name]); 49 | if (error_rate > 0.99) 50 | return false; 51 | return true; 52 | } 53 | 54 | void report() 55 | { 56 | cerr << "impedance report: " << endl; 57 | for (auto pair : occurances_in_failed_query) { 58 | cerr << " " << pretty_type(pair.first) << ": " << 59 | pair.second << "/" << occurances_in_ok_query[pair.first] 60 | << " (bad/ok)"; 61 | if (!matched(pair.first)) 62 | cerr << " -> BLACKLISTED"; 63 | cerr << endl; 64 | } 65 | } 66 | 67 | void report(std::ostream &out) 68 | { 69 | out << "{\"impedance\": [ " << endl; 70 | 71 | for (auto pair = occurances_in_failed_query.begin(); 72 | pair != occurances_in_failed_query.end(); 73 | ++pair) { 74 | out << "{\"prod\": \"" << pretty_type(pair->first) << "\"," 75 | << "\"bad\": " << pair->second << ", " 76 | << "\"ok\": " << occurances_in_ok_query[pair->first] << ", " 77 | << "\"limited\": " << limited[pair->first] << ", " 78 | << "\"failed\": " << failed[pair->first] << ", " 79 | << "\"retries\": " << retries[pair->first] << "} "; 80 | 81 | if (next(pair) != occurances_in_failed_query.end()) 82 | out << "," << endl; 83 | } 84 | out << "]}" << endl; 85 | } 86 | 87 | void retry(const char *p) 88 | { 89 | retries[p]++; 90 | } 91 | 92 | void limit(const char *p) 93 | { 94 | limited[p]++; 95 | } 96 | 97 | void fail(const char *p) 98 | { 99 | failed[p]++; 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /impedance.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief feedback to the grammar about failed productions 3 | 4 | 5 | #ifndef IMPEDANCE_HH 6 | #define IMPEDANCE_HH 7 | 8 | #include 9 | #include 10 | #include "prod.hh" 11 | #include "util.hh" 12 | #include "log.hh" 13 | #include "dut.hh" 14 | 15 | struct impedance_visitor : prod_visitor { 16 | std::map &_occured; 17 | std::map found; 18 | virtual void visit(struct prod *p); 19 | impedance_visitor(std::map &occured); 20 | virtual ~impedance_visitor(); 21 | }; 22 | 23 | struct impedance_feedback : logger { 24 | virtual void executed(prod &query); 25 | virtual void error(prod &query, const dut::failure &e); 26 | impedance_feedback() { } 27 | }; 28 | 29 | namespace impedance { 30 | bool matched(const char *p); 31 | inline bool matched(const std::type_info &id) { return matched(id.name()); } 32 | inline bool matched(prod *p) { return matched(typeid(*p)); } 33 | void retry(const char *p); 34 | inline void retry(const std::type_info &id) { return retry(id.name()); } 35 | inline void retry(prod *p) { return retry(typeid(*p)); } 36 | void limit(const char *p); 37 | inline void limit(const std::type_info &id) { return limit(id.name()); } 38 | inline void limit(prod *p) { return limit(typeid(*p)); } 39 | void fail(const char *p); 40 | inline void fail(const std::type_info &id) { return fail(id.name()); } 41 | inline void fail(prod *p) { return fail(typeid(*p)); } 42 | void report(); 43 | void report(std::ostream &out); 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /known_re.txt: -------------------------------------------------------------------------------- 1 | \\nCONTEXT: PL/pgSQL function tg_backlink_set 2 | \\nCONTEXT: PL/pgSQL function tg_iface_biu 3 | \\nCONTEXT: PL/pgSQL function tg_pslot_biu 4 | \\nCONTEXT: PL/pgSQL.* 5 | \\nCONTEXT: SQL function "make_table" statement 6 | \\nCONTEXT: invalid type name 7 | ^ERROR: "-?\\d+" is out of range for type real 8 | ^ERROR: ".* must be ahead of ".* 9 | ^ERROR: ".*" is not a number 10 | ^ERROR: ".*" is not a sequence 11 | ^ERROR: ".*" is not an index 12 | ^ERROR: ".*" not supported for input 13 | ^ERROR: "time with time zone" units ".*" not recognized 14 | ^ERROR: "time" units " 15 | ^ERROR: -\\d+ is less than zero 16 | ^ERROR: .*is not a valid encoding code 17 | ^ERROR: .*is out of range for type double precision 18 | ^ERROR: COALESCE could not convert type 19 | ^ERROR: COALESCE types.*cannot be matched 20 | ^ERROR: Cannot use ".*": Badly formatted type 21 | ^ERROR: Cannot use ".*": No such cipher algorithm 22 | ^ERROR: Cannot use .*: No such hash algorithm 23 | ^ERROR: NUMERIC precision \\d+ must be between \\d+ and \\d+ 24 | ^ERROR: Patchfield "[^"]*" does not exist 25 | ^ERROR: Room .* does not exist 26 | ^ERROR: Syntax error near 27 | ^ERROR: \\d+ is outside the valid range for parameter 28 | ^ERROR: argument list must have even number of elements 29 | ^ERROR: array size exceeds the maximum allowed 30 | ^ERROR: attribute number \\d+ not found in view targetlist 31 | ^ERROR: bit index -?\\d+ out of valid range 32 | ^ERROR: bit index \\d+ out of valid range 33 | ^ERROR: bit string length \\d+ does not match type bit 34 | ^ERROR: bit string too long for type bit varying 35 | ^ERROR: block number \\d+ is out of range for relation " 36 | ^ERROR: cache lookup failed for constraint 37 | ^ERROR: cache lookup failed for function 38 | ^ERROR: cache lookup failed for index 39 | ^ERROR: cache lookup failed for text search 40 | ^ERROR: cache lookup failed for type 41 | ^ERROR: cannot accept a value of type 42 | ^ERROR: cannot compare dissimilar column types 43 | ^ERROR: cannot insert into column "[^"]+" of view 44 | ^ERROR: cannot retrieve commit timestamp for transaction 45 | ^ERROR: cannot take cube root of a unit with 46 | ^ERROR: cannot take square root of a unit with odd 47 | ^ERROR: collations are not supported by type " 48 | ^ERROR: column ".*" of relation ".*" does not exist 49 | ^ERROR: column .* is of type .*\\[\\].* but expression is of type .*\\[\\] 50 | ^ERROR: column .* is of type oidvector 51 | ^ERROR: column .*city_budget 52 | ^ERROR: column .*type rewritetype 53 | ^ERROR: column.*int8alias 54 | ^ERROR: column.*test_type 55 | ^ERROR: conflicting key value violates exclusion constraint 56 | ^ERROR: conflicting values for.* field in formatting string 57 | ^ERROR: conflicting values for.* field in formatting string: 58 | ^ERROR: connection .* not available 59 | ^ERROR: could not create file "pg_snapshots.*Cannot allocate memory 60 | ^ERROR: could not find array type for data type 61 | ^ERROR: could not find member .* of opfamily 1976 62 | ^ERROR: could not find tuple for trigger 63 | ^ERROR: could not identify a hash function for type 64 | ^ERROR: could not identify an equality operator for type 65 | ^ERROR: could not identify an ordering operator for type 66 | ^ERROR: could not open file " 67 | ^ERROR: could not open relation with OID (\\d+) 68 | ^ERROR: could not open server file " 69 | ^ERROR: could not perform block dump because dump file is being used by PID 70 | ^ERROR: could not read symbolic link ".*": No such file or directory 71 | ^ERROR: could not resize shared memory segment.*No space left 72 | ^ERROR: could not stat file " 73 | ^ERROR: cross-database references are not implemented 74 | ^ERROR: cube index [0-9]+ is out of bounds 75 | ^ERROR: currval of sequence "[^"]+" is not yet defined in this session 76 | ^ERROR: cursor "[^"]+" does not exist 77 | ^ERROR: database .* does not exist 78 | ^ERROR: date field value out of range: 79 | ^ERROR: date out of range: 80 | ^ERROR: date/time field value out of range: 81 | ^ERROR: dimension mismatch in 82 | ^ERROR: division by zero-valued unit: 83 | ^ERROR: duplicate JSON key " 84 | ^ERROR: duplicate JSON object key value 85 | ^ERROR: duplicate key value violates unique constraint 86 | ^ERROR: encoding conversion from 87 | ^ERROR: foreign-data wrapper " 88 | ^ERROR: function .* does not exist 89 | ^ERROR: function .*is not unique 90 | ^ERROR: hour ".*" is invalid for the 12-hour clock 91 | ^ERROR: improper (qualified|relation) name \\(too many dotted names\\): 92 | ^ERROR: incorrect number of arguments: specified \\d+ but found \\d+ 93 | ^ERROR: index "hash_ 94 | ^ERROR: index -?[0-9]+ out of valid range 95 | ^ERROR: index [^o]+out of valid range, 0..127 96 | ^ERROR: index row requires \\d+ bytes, maximum size is \\d+ 97 | ^ERROR: index row size \\d+ exceeds btree version \\d+ maximum \\d+ for index 98 | ^ERROR: index row size \\d+ exceeds maximum \\d+ for index 99 | ^ERROR: insert or update on table "[^"]+" violates foreign key constraint 100 | ^ERROR: interval field value out of range 101 | ^ERROR: interval time zone 102 | ^ERROR: interval units 103 | ^ERROR: invalid INTERVAL typmod: 104 | ^ERROR: invalid attnum \\d+ for relation " 105 | ^ERROR: invalid byte sequence for encoding 106 | ^ERROR: invalid character "[^"]+" in t_bits string 107 | ^ERROR: invalid cidr value: 108 | ^ERROR: invalid destination encoding name 109 | ^ERROR: invalid encoding name 110 | ^ERROR: invalid flags for opening a large object: 111 | ^ERROR: invalid input syntax for 112 | ^ERROR: invalid large-object descriptor: 113 | ^ERROR: invalid mask length: 114 | ^ERROR: invalid memory alloc request size \\d+ 115 | ^ERROR: invalid non-zero objectSubId for object class 116 | ^ERROR: invalid octet value in 117 | ^ERROR: invalid option 118 | ^ERROR: invalid parameter list format: 119 | ^ERROR: invalid range flags 120 | ^ERROR: invalid regexp option: 121 | ^ERROR: invalid regular expression: 122 | ^ERROR: invalid size: ".*" 123 | ^ERROR: invalid source encoding name 124 | ^ERROR: invalid type name 125 | ^ERROR: invalid value 126 | ^ERROR: language .*does not exist 127 | ^ERROR: language validation function 128 | ^ERROR: large object.*does not exist 129 | ^ERROR: malformed array literal 130 | ^ERROR: malformed range literal: " 131 | ^ERROR: missing support function .*in opfamily 132 | ^ERROR: more than one operator named 133 | ^ERROR: must be superuser 134 | ^ERROR: new row for relation .* violates check constraint 135 | ^ERROR: no operand in tsquery: 136 | ^ERROR: null value in column "[^"]+" violates not-null constraint 137 | ^ERROR: number of array dimensions .* exceeds the maximum allowed 138 | ^ERROR: operator does not exist 139 | ^ERROR: operator.*int8alias 140 | ^ERROR: operator.*test_type 141 | ^ERROR: parameter ".*" cannot be changed 142 | ^ERROR: parameter ".*" requires 143 | ^ERROR: parameter "[^"]+" cannot be changed now 144 | ^ERROR: parameter "[^"]+" cannot be changed without restarting the server 145 | ^ERROR: path element at position 146 | ^ERROR: permission denied 147 | ^ERROR: relation ".*" does not exist 148 | ^ERROR: relation .*does not exists 149 | ^ERROR: replication slot ".*" already exists 150 | ^ERROR: requested character too large for encoding 151 | ^ERROR: role .* does not exist 152 | ^ERROR: row is too big: 153 | ^ERROR: schema ".*" does not exist 154 | ^ERROR: server .* does not exist 155 | ^ERROR: setseed parameter 156 | ^ERROR: setval: value .* is out of bounds for sequence 157 | ^ERROR: source string too short for " 158 | ^ERROR: stack depth limit exceeded.*"infinite_recurse" 159 | ^ERROR: string is not a valid identifier: 160 | ^ERROR: string is too long for tsvector 161 | ^ERROR: syntax error at or near 162 | ^ERROR: syntax error in tsquery: 163 | ^ERROR: syntax error in tsvector: " 164 | ^ERROR: system "[^"]+" does not exist 165 | ^ERROR: t_bits string is expected to be 166 | ^ERROR: tablespace .*does not exist 167 | ^ERROR: text search configuration.*does not exist 168 | ^ERROR: text search dictionary .* does not exist 169 | ^ERROR: text search parser .* does not exist 170 | ^ERROR: time field value out of range: 171 | ^ERROR: time zone 172 | ^ERROR: timestamp out of range: 173 | ^ERROR: timestamp units 174 | ^ERROR: timestamp with time zone units 175 | ^ERROR: timestamp\\( 176 | ^ERROR: type ".*" does not exist 177 | ^ERROR: type .* is not a domain 178 | ^ERROR: type \\d+ is not a range type 179 | ^ERROR: unexpected length of t_bits string: 180 | ^ERROR: unit ".*" is not known 181 | ^ERROR: unrecognized configuration parameter 182 | ^ERROR: unrecognized conversion type specifier 183 | ^ERROR: unrecognized encoding: 184 | ^ERROR: unrecognized format\\(\\) type specifier 185 | ^ERROR: unrecognized headline parameter: 186 | ^ERROR: unrecognized interval typmod 187 | ^ERROR: unrecognized key word: 188 | ^ERROR: unrecognized object class: 189 | ^ERROR: unrecognized object type 190 | ^ERROR: unrecognized objtype abbreviation: 191 | ^ERROR: unrecognized privilege type: " 192 | ^ERROR: unrecognized reset target: 193 | ^ERROR: unrecognized storage manager name 194 | ^ERROR: unrecognized weight: 195 | ^ERROR: update or delete on table "[^"]+" violates foreign key constraint 196 | ^ERROR: value ".*" is out of range for ISBN type 197 | ^ERROR: value ".*" is out of range for type 198 | ^ERROR: value .* is out of range for 199 | ^ERROR: value .* is out of range for type oid 200 | ^ERROR: value for .* in source string is out of range 201 | ^ERROR: value for domain [^ ]+ violates check constraint 202 | ^ERROR: value too long for type character 203 | ^ERROR: word is too long " 204 | ^ERROR: word is too long in tsquery 205 | ^ERROR: wrong position info in tsvector: " 206 | ^ERROR: .*is not a valid binary digit 207 | ^ERROR: .*is not a valid encoding name 208 | ^ERROR:.*type tab1 209 | ^FATAL: the database system is shutting down 210 | ^NOT NULL constraint failed: 211 | ^UNIQUE constraint failed: 212 | ^cannot take square root of a unit with odd 213 | -------------------------------------------------------------------------------- /log-v1.0-to-v1.2.sql: -------------------------------------------------------------------------------- 1 | -- upgrade SQLsmith logging schema from 1.0 to 1.2 2 | 3 | alter table stat add column impedance jsonb; 4 | alter table instance add column seed text; 5 | alter table error add column sqlstate text; 6 | 7 | create or replace view impedance as 8 | SELECT stat.id, 9 | stat.generated, 10 | stat.level, 11 | stat.nodes, 12 | stat.updated, 13 | js.prod, 14 | js.ok, 15 | js.bad, 16 | js.retries, 17 | js.limited, 18 | js.failed 19 | FROM stat, 20 | LATERAL jsonb_to_recordset(stat.impedance -> 'impedance') 21 | js(prod text, 22 | ok integer, 23 | bad integer, 24 | retries integer, 25 | limited integer, 26 | failed integer) 27 | WHERE stat.impedance IS NOT NULL; 28 | 29 | comment on view impedance is 'stat table with normalized jsonb'; 30 | 31 | create view impedance_report as 32 | SELECT instance.rev, 33 | impedance.prod, 34 | sum(impedance.generated) AS generated, 35 | sum(impedance.ok) AS ok, 36 | sum(impedance.bad) AS bad, 37 | sum(impedance.retries) AS retries, 38 | sum(impedance.limited) AS limited, 39 | sum(impedance.failed) AS failed 40 | FROM impedance 41 | JOIN instance USING (id) 42 | WHERE instance.rev = (( SELECT instance_1.rev 43 | FROM instance instance_1 44 | ORDER BY instance_1.t DESC 45 | LIMIT 1)) 46 | GROUP BY instance.rev, impedance.prod 47 | ORDER BY sum(impedance.retries); 48 | 49 | comment on view impedance_report is 'impedance report for latest revision'; 50 | -------------------------------------------------------------------------------- /log.cc: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | #include 3 | #include 4 | #include 5 | 6 | #ifndef HAVE_BOOST_REGEX 7 | #include 8 | #else 9 | #include 10 | using boost::regex; 11 | using boost::smatch; 12 | using boost::regex_match; 13 | #endif 14 | 15 | #include 16 | 17 | extern "C" { 18 | #include 19 | } 20 | 21 | #include "log.hh" 22 | #include "schema.hh" 23 | #include "gitrev.h" 24 | #include "impedance.hh" 25 | #include "random.hh" 26 | 27 | using namespace std; 28 | using namespace pqxx; 29 | 30 | struct stats_visitor : prod_visitor { 31 | int nodes = 0; 32 | int maxlevel = 0; 33 | long retries = 0; 34 | map production_stats; 35 | virtual void visit(struct prod *p) { 36 | nodes++; 37 | if (p->level > maxlevel) 38 | maxlevel = p->level; 39 | production_stats[typeid(*p).name()]++; 40 | retries += p->retries; 41 | } 42 | void report() { 43 | cerr << "production statistics" << endl; 44 | vector > report; 45 | for (auto p : production_stats) 46 | report.push_back(p); 47 | stable_sort(report.begin(), report.end(), 48 | [](const pair &a, 49 | const pair &b) 50 | { return a.second > b.second; }); 51 | for (auto p : report) { 52 | cerr << p.second << "\t" << p.first << endl; 53 | } 54 | } 55 | }; 56 | 57 | void stats_collecting_logger::generated(prod &query) 58 | { 59 | queries++; 60 | 61 | stats_visitor v; 62 | query.accept(&v); 63 | 64 | sum_nodes += v.nodes; 65 | sum_height += v.maxlevel; 66 | sum_retries += v.retries; 67 | } 68 | 69 | void cerr_logger::report() 70 | { 71 | cerr << endl << "queries: " << queries << endl; 72 | // << " (" << 1000.0*query_count/gen_time.count() << " gen/s, " 73 | // << 1000.0*query_count/query_time.count() << " exec/s)" << endl; 74 | cerr << "AST stats (avg): height = " << sum_height/queries 75 | << " nodes = " << sum_nodes/queries << endl; 76 | 77 | vector > report; 78 | for (auto e : errors) { 79 | report.push_back(e); 80 | } 81 | stable_sort(report.begin(), report.end(), 82 | [](const pair &a, 83 | const pair &b) 84 | { return a.second > b.second; }); 85 | long err_count = 0; 86 | for (auto e : report) { 87 | err_count += e.second; 88 | cerr << e.second << "\t" << e.first.substr(0,80) << endl; 89 | } 90 | cerr << "error rate: " << (float)err_count/(queries) << endl; 91 | impedance::report(); 92 | } 93 | 94 | 95 | void cerr_logger::generated(prod &p) 96 | { 97 | stats_collecting_logger::generated(p); 98 | if ((10*columns-1) == queries%(10*columns)) 99 | report(); 100 | } 101 | 102 | void cerr_logger::executed(prod &query) 103 | { 104 | (void)query; 105 | if (columns-1 == (queries%columns)) { 106 | cerr << endl; 107 | } 108 | cerr << "."; 109 | } 110 | 111 | void cerr_logger::error(prod &query, const dut::failure &e) 112 | { 113 | (void)query; 114 | istringstream err(e.what()); 115 | string line; 116 | 117 | if (columns-1 == (queries%columns)) { 118 | cerr << endl; 119 | } 120 | getline(err, line); 121 | errors[line]++; 122 | if (dynamic_cast(&e)) 123 | cerr << "t"; 124 | else if (dynamic_cast(&e)) 125 | cerr << "S"; 126 | else if (dynamic_cast(&e)) 127 | cerr << "C"; 128 | else 129 | cerr << "e"; 130 | } 131 | 132 | pqxx_logger::pqxx_logger(std::string target, std::string conninfo, struct schema &s) 133 | { 134 | c = make_shared(conninfo); 135 | 136 | work w(*c); 137 | w.exec("set application_name to '" PACKAGE "::log';"); 138 | 139 | c->prepare("instance", 140 | "insert into instance (rev, target, hostname, version, seed) " 141 | "values ($1, $2, $3, $4, $5) returning id"); 142 | 143 | char hostname[1024]; 144 | gethostname(hostname, sizeof(hostname)); 145 | 146 | ostringstream seed; 147 | seed << smith::rng; 148 | 149 | #ifdef HAVE_LIBPQXX7 150 | result r = w.exec_prepared("instance", GITREV, target, hostname, s.version, seed.str()); 151 | #else 152 | result r = w.prepared("instance")(GITREV)(target)(hostname)(s.version)(seed.str()).exec(); 153 | #endif 154 | 155 | id = r[0][0].as(id); 156 | 157 | c->prepare("error", 158 | "insert into error (id, msg, query, sqlstate) " 159 | "values (" + to_string(id) + ", $1, $2, $3)"); 160 | 161 | w.exec("insert into stat (id) values (" + to_string(id) + ")"); 162 | c->prepare("stat", 163 | "update stat set generated=$1, level=$2, nodes=$3, updated=now() " 164 | ", retries = $4, impedance = $5 " 165 | "where id = " + to_string(id)); 166 | 167 | w.commit(); 168 | 169 | } 170 | 171 | void pqxx_logger::error(prod &query, const dut::failure &e) 172 | { 173 | work w(*c); 174 | ostringstream s; 175 | s << query; 176 | #ifdef HAVE_LIBPQXX7 177 | w.exec_prepared("error", e.what(), s.str(), e.sqlstate); 178 | #else 179 | w.prepared("error")(e.what())(s.str())(e.sqlstate).exec(); 180 | #endif 181 | w.commit(); 182 | } 183 | 184 | void pqxx_logger::generated(prod &query) 185 | { 186 | stats_collecting_logger::generated(query); 187 | if (999 == (queries%1000)) { 188 | work w(*c); 189 | ostringstream s; 190 | impedance::report(s); 191 | #ifdef HAVE_LIBPQXX7 192 | w.exec_prepared("stat", queries, sum_height/queries, sum_nodes/queries, sum_retries/queries, s.str()); 193 | #else 194 | w.prepared("stat")(queries)(sum_height/queries)(sum_nodes/queries)(sum_retries/queries)(s.str()).exec(); 195 | #endif 196 | w.commit(); 197 | } 198 | } 199 | 200 | -------------------------------------------------------------------------------- /log.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief logging 3 | 4 | #ifndef LOG_HH 5 | #define LOG_HH 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "prod.hh" 12 | #include "dut.hh" 13 | 14 | /// logger base class 15 | struct logger { 16 | virtual void generated(prod &query) {(void)query; } 17 | virtual void executed(prod &query) {(void)query; } 18 | virtual void error(prod &query, const dut::failure &e) { 19 | (void)query, (void)e; 20 | } 21 | }; 22 | 23 | /// logger to dump all generated queries 24 | struct query_dumper : logger { 25 | virtual void generated(prod &query) { 26 | query.out(std::cout); 27 | std::cout << ";" << std::endl; 28 | } 29 | }; 30 | 31 | /// logger for statistics collection 32 | struct stats_collecting_logger : logger { 33 | long queries = 0; 34 | float sum_nodes = 0; 35 | float sum_height = 0; 36 | float sum_retries = 0; 37 | virtual void generated(prod &query); 38 | }; 39 | 40 | /// stderr logger 41 | struct cerr_logger : stats_collecting_logger { 42 | const int columns = 80; 43 | std::map errors; 44 | virtual void report(); 45 | virtual void generated(prod &query); 46 | virtual void executed(prod &query); 47 | virtual void error(prod &query, const dut::failure &e); 48 | void report(prod &p); 49 | }; 50 | 51 | /// logger to postgres database 52 | struct pqxx_logger : stats_collecting_logger { 53 | long id; 54 | std::shared_ptr c; 55 | pqxx_logger(std::string target, std::string conninfo, struct schema &s); 56 | virtual void generated(prod &query); 57 | virtual void error(prod &query, const dut::failure &e); 58 | }; 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /log.sql: -------------------------------------------------------------------------------- 1 | -- schema for --log-to 2 | 3 | create table instance ( 4 | id bigserial primary key, 5 | rev text, -- sqlsmith git revision 6 | target text, -- target connstr 7 | hostname text, -- hostname of sqlsmith instance 8 | version text, -- target version() 9 | seed text, -- RNG seed 10 | 11 | -- not referenced by sqlsmith: 12 | t timestamptz default now(), 13 | client inet default inet_client_addr(), 14 | port integer default inet_client_port() 15 | ); 16 | 17 | comment on table instance is 'details about an sqlsmith instance'; 18 | 19 | create table error ( 20 | id bigint references instance(id), 21 | msg text, -- error message 22 | query text, -- failed query 23 | target text, -- conninfo of the target 24 | sqlstate text, -- sqlstate of error 25 | 26 | -- not referenced by sqlsmith: 27 | t timestamptz default now(), 28 | errid bigserial primary key 29 | ); 30 | 31 | comment on table error is 'observed errors'; 32 | 33 | create table stat ( 34 | id bigint references instance(id), 35 | generated bigint, -- number of generated ASTs 36 | level float, -- avg. height of ASTs 37 | nodes float, -- avg. number of nodes in ASTs 38 | retries float, -- avg. number of retries needed for ASTs 39 | updated timestamptz, 40 | impedance jsonb -- impedance report 41 | ); 42 | 43 | comment on table stat is 'statistics about ASTs and productions'; 44 | 45 | -- grant role smith just enough rights to do the logging 46 | create role smith login; 47 | grant insert,select on table instance to smith; 48 | grant insert on table error to smith; 49 | grant update,insert,select on table stat to smith; 50 | grant usage on all sequences in schema public to smith; 51 | 52 | -- stuff beyond this line is not referenced by sqlsmith 53 | 54 | create or replace function firstline(msg text) returns text as $$ 55 | select split_part(msg, E'\n', 1); 56 | $$ language sql immutable; 57 | 58 | create view base_error as 59 | select id, firstline(msg) as error, query, t, errid from error; 60 | 61 | comment on view base_error is 'like error, but truncate msg to first line'; 62 | 63 | drop view if exists report; 64 | create view report as 65 | select count(1), max(t) as last_seen, error 66 | from base_error group by 3 order by count desc; 67 | 68 | 69 | create or replace view state_report as 70 | SELECT count(1) AS count, 71 | sqlstate, 72 | min(substring(firstline(e.msg),1,80)) AS sample, 73 | array_agg(DISTINCT i.hostname) AS hosts 74 | FROM error e 75 | JOIN instance i ON i.id = e.id 76 | WHERE e.t > (now() - '24:00:00'::interval) 77 | GROUP BY sqlstate 78 | ORDER BY (count(1)); 79 | 80 | comment on view state_report is 'an sqlstate-grouped report'; 81 | 82 | comment on view report is 'same report as sqlsmith''s verbose output'; 83 | 84 | drop view if exists report24h; 85 | create view report24h as 86 | select count(1), error, max(e.t) as last_seen 87 | from base_error e join instance i on (e.id = i.id) 88 | where i.t > now() - interval '1 days' 89 | group by 2 order by count desc; 90 | 91 | create or replace view reporthosts as 92 | SELECT count(1) AS count, 93 | substring(firstline(e.msg),1,80) as firstline, 94 | array_agg(DISTINCT i.hostname) AS hosts 95 | FROM error e 96 | JOIN instance i ON i.id = e.id 97 | WHERE e.t > (now() - '24:00:00'::interval) 98 | GROUP BY 2 99 | ORDER BY (count(1)); 100 | 101 | create view instance_activity as 102 | select i.hostname, i.target, max(e.t) 103 | from instance i join error e on (i.id = e.id) 104 | group by i.hostname, i.target 105 | order by max desc; 106 | 107 | comment on view instance_activity is 'time of last error message from instance'; 108 | 109 | create view instance_speed as 110 | select hostname, 111 | generated/extract(epoch from (updated-t)) as "queries/s" 112 | from stat natural join instance 113 | where updated > now() - interval '1 minutes'; 114 | 115 | comment on view instance_speed is 'query speed of recently active instances'; 116 | 117 | -- Filtering boring errors 118 | 119 | create table boring_sqlstates (sqlstate text primary key); 120 | comment on table boring_sqlstates is 'sqlstates to reject'; 121 | grant select on boring_sqlstates to public; 122 | \copy boring_sqlstates from boring_sqlstates.txt 123 | 124 | create table known(error text); 125 | comment on table known is 'error messages to reject'; 126 | \copy known from known.txt 127 | 128 | create table known_re(re text); 129 | comment on table known_re is 'regular expressions to match error messages to reject'; 130 | \copy known_re from known_re.txt 131 | 132 | create or replace function discard_known() returns trigger as $$ 133 | begin 134 | if exists (select 1 from boring_sqlstates b where new.sqlstate = b.sqlstate) 135 | or exists (select 1 from known where firstline(new.msg) = error) 136 | then 137 | return NULL; 138 | end if; 139 | 140 | if new.msg ~ ANY (select re from known_re) 141 | then 142 | return NULL; 143 | end if; 144 | return new; 145 | end 146 | $$ language plpgsql; 147 | 148 | create trigger discard_known before insert on error 149 | for each row execute procedure discard_known(); 150 | 151 | -- YMMV. 152 | create index on error(t); 153 | 154 | -- Following views are used for debugging sqlsmith 155 | create view impedance as 156 | select id, generated, level, nodes, updated, 157 | prod, ok, bad, js.retries, limited, failed 158 | from stat, jsonb_to_recordset(impedance->'impedance') 159 | js(prod text, ok bigint, bad bigint, retries bigint, limited bigint, failed bigint) 160 | where impedance is not null; 161 | 162 | comment on view impedance is 'stat table with normalized jsonb'; 163 | 164 | create view impedance_report as 165 | select rev, prod, 166 | sum(generated) as generated, sum(ok) as ok, 167 | sum(bad) as bad, 168 | sum(retries) as retries, 169 | sum(limited)as limited, 170 | sum(failed) as failed 171 | from impedance natural join instance 172 | where rev = (select max(rev) from instance where version ~* 'postgres') 173 | group by rev, prod 174 | order by retries; 175 | 176 | comment on view impedance_report is 'impedance report for latest revision'; 177 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anse1/sqlsmith/46c1df710ea0217d87247bb1fc77f4a09bca77f7/logo.png -------------------------------------------------------------------------------- /monetdb.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "monetdb.hh" 4 | #include 5 | 6 | #ifndef HAVE_BOOST_REGEX 7 | #include 8 | #else 9 | #include 10 | using boost::regex; 11 | using boost::smatch; 12 | using boost::regex_match; 13 | #endif 14 | 15 | using namespace std; 16 | 17 | extern "C" { 18 | #include 19 | #include 20 | } 21 | 22 | // connect montetdb 23 | monetdb_connection::monetdb_connection(std::string &conninfo) 24 | { 25 | dbh = mapi_mapiuri(conninfo.c_str(), "monetdb", "monetdb", "sql"); 26 | if (mapi_error(dbh)) { 27 | if (dbh != NULL) { 28 | mapi_explain(dbh, stderr); 29 | mapi_destroy(dbh); 30 | } else { 31 | fprintf(stderr, "command failed\n"); 32 | } 33 | exit(-1); 34 | } 35 | mapi_reconnect(dbh); 36 | if (mapi_error(dbh)) { 37 | mapi_explain(dbh, stderr); 38 | mapi_destroy(dbh); 39 | exit(-1); 40 | } 41 | } 42 | 43 | // execute queries on MonetDB 44 | void monetdb_connection::q(const char* query) 45 | { 46 | MapiHdl hdl = mapi_query(dbh, query); 47 | if (mapi_result_error(hdl) != NULL) 48 | mapi_explain_result(hdl, stderr); 49 | mapi_close_handle(hdl); 50 | } 51 | 52 | // disconnect MonetDB 53 | monetdb_connection::~monetdb_connection() 54 | { 55 | mapi_destroy(dbh); 56 | } 57 | 58 | //load schema from MonetDB 59 | schema_monetdb::schema_monetdb(std::string &conninfo):monetdb_connection(conninfo) 60 | { 61 | 62 | 63 | cerr << "init booltype, inttype, internaltype, arraytype here" << endl; 64 | booltype = sqltype::get("boolean"); 65 | inttype = sqltype::get("int"); 66 | internaltype = sqltype::get("internal"); 67 | arraytype = sqltype::get("ARRAY"); 68 | 69 | cerr << "Loading tables from database: " << conninfo << endl; 70 | // string qry = "select t.name, s.name, t.system, t.type from sys.tables t, sys.schemas s where t.schema_id=s.id and t.system=false"; 71 | string qry = "select t.name, s.name, t.system, t.type from sys.tables t, sys.schemas s where t.schema_id=s.id "; 72 | MapiHdl hdl = mapi_query(dbh,qry.c_str()); 73 | while (mapi_fetch_row(hdl)) { 74 | tables.push_back(table(mapi_fetch_field(hdl,0),mapi_fetch_field(hdl,1),strcmp(mapi_fetch_field(hdl,2),"false")==0 ? true : false , atoi(mapi_fetch_field(hdl,3))==0 ? false : true)); 75 | } 76 | mapi_close_handle(hdl); 77 | cerr << " done." << endl; 78 | 79 | cerr << "Loading columns and constraints..."; 80 | for (auto t = tables.begin(); t!=tables.end(); t++) { 81 | string q("select col.name," 82 | " col.type " 83 | " from sys.columns col, sys.tables tab" 84 | " where tab.name= '"); 85 | q += t->name; 86 | q += "' and tab.id = col.table_id"; 87 | 88 | hdl = mapi_query(dbh,q.c_str()); 89 | while (mapi_fetch_row(hdl)) { 90 | column c(mapi_fetch_field(hdl,0), sqltype::get(mapi_fetch_field(hdl,1))); 91 | t->columns().push_back(c); 92 | } 93 | mapi_close_handle(hdl); 94 | } 95 | // TODO: confirm with Martin or Stefan about column 96 | // constraints in MonetDB 97 | cerr << " done." << endl; 98 | 99 | cerr << "Loading operators..."; 100 | string opq("select f.func, a.type, b.type, c.type" 101 | " from sys.functions f, sys.args a, sys.args b, sys.args c" 102 | " where f.id=a.func_id and f.id=b.func_id and f.id=c.func_id and a.name='arg_1' and b.name='arg_2' and c.number=0"); 103 | hdl = mapi_query(dbh,opq.c_str()); 104 | while (mapi_fetch_row(hdl)) { 105 | op o(mapi_fetch_field(hdl,0),sqltype::get(mapi_fetch_field(hdl,1)),sqltype::get(mapi_fetch_field(hdl,2)),sqltype::get(mapi_fetch_field(hdl,3))); 106 | register_operator(o); 107 | } 108 | mapi_close_handle(hdl); 109 | cerr << " done." << endl; 110 | 111 | 112 | cerr << "Loading routines..."; 113 | string routq("select s.name, f.id, a.type, f.name from sys.schemas s, sys.args a, sys.types t, sys.functions f where f.schema_id = s.id and f.id=a.func_id and a.number=0 and a.type = t.sqlname and f.mod<>'aggr'"); 114 | hdl = mapi_query(dbh,routq.c_str()); 115 | while (mapi_fetch_row(hdl)) { 116 | routine proc(mapi_fetch_field(hdl,0),mapi_fetch_field(hdl,1),sqltype::get(mapi_fetch_field(hdl,2)),mapi_fetch_field(hdl,3)); 117 | register_routine(proc); 118 | } 119 | mapi_close_handle(hdl); 120 | cerr << " done." << endl; 121 | 122 | cerr << "Loading routine parameters..."; 123 | for (auto &proc : routines) { 124 | string routpq ("select a.type from sys.args a," 125 | " sys.functions f " 126 | " where f.id = a.func_id and a.number <> 0 and f.id = '"); 127 | routpq += proc.specific_name; 128 | routpq += "'"; 129 | hdl = mapi_query(dbh,routpq.c_str()); 130 | while (mapi_fetch_row(hdl)) { 131 | proc.argtypes.push_back(sqltype::get(mapi_fetch_field(hdl,0))); 132 | } 133 | mapi_close_handle(hdl); 134 | } 135 | cerr << " done."<< endl; 136 | 137 | 138 | 139 | cerr << "Loading aggregates..."; 140 | string aggq("select s.name, f.id, a.type, f.name from sys.schemas s, sys.args a, sys.types t, sys.functions f where f.schema_id = s.id and f.id=a.func_id and a.number=0 and a.type = t.sqlname and f.mod='aggr'"); 141 | 142 | hdl = mapi_query(dbh,aggq.c_str()); 143 | while (mapi_fetch_row(hdl)) { 144 | routine proc(mapi_fetch_field(hdl,0),mapi_fetch_field(hdl,1),sqltype::get(mapi_fetch_field(hdl,2)),mapi_fetch_field(hdl,3)); 145 | register_aggregate(proc); 146 | } 147 | mapi_close_handle(hdl); 148 | cerr << " done." << endl; 149 | 150 | cerr << "Loading aggregates parameters..."; 151 | for (auto &proc: aggregates) { 152 | string aggpq ("select a.type from sys.args a, sys.functions f " 153 | "where f.id = a.func_id and a.number <> 0 and f.id = '"); 154 | aggpq += proc.specific_name; 155 | aggpq += "'"; 156 | hdl = mapi_query(dbh,aggpq.c_str()); 157 | while (mapi_fetch_row(hdl)) { 158 | proc.argtypes.push_back(sqltype::get(mapi_fetch_field(hdl,0))); 159 | } 160 | mapi_close_handle(hdl); 161 | } 162 | cerr << " done."<< endl; 163 | 164 | mapi_destroy(dbh); 165 | generate_indexes(); 166 | 167 | // cerr << "print loaded information to check correctness" << endl; 168 | // cerr << "Loaded tables.... " << endl; 169 | /* for (auto item : tables) { 170 | cerr << item.name << "; " << item.schema << "; " << item.is_insertable << "; " << item.is_base_table << endl; 171 | } 172 | */ 173 | // cerr << "Loaded columns... " << endl; 174 | /* for (auto tab : tables) { 175 | for (auto col: tab.columns()) 176 | cerr << tab.name << "; " << col.name << "; "<name << endl; 177 | } 178 | */ 179 | // cerr << "Loaded aggregates and parameters... " << endl; 180 | /* for (auto &proc : aggregates) { 181 | cerr << proc.specific_name << "; " << proc.schema << "; " << proc.name <<"; " << proc.restype->name ; 182 | for (auto item : proc.argtypes) 183 | cerr << "; " << item->name; 184 | cerr << endl; 185 | } 186 | */ 187 | } 188 | 189 | dut_monetdb::dut_monetdb(std::string &conninfo):monetdb_connection(conninfo) 190 | { 191 | //build connection 192 | } 193 | 194 | void dut_monetdb::test(const std::string &stmt) 195 | { 196 | MapiHdl hdl = mapi_query(dbh,"CALL sys.settimeout(1)"); 197 | mapi_close_handle(hdl); 198 | hdl = mapi_query(dbh,stmt.c_str()); 199 | 200 | if (mapi_error(dbh)!=MOK) { 201 | 202 | try { 203 | const char *error_string = mapi_result_error(hdl); 204 | 205 | if (!error_string) 206 | error_string = "unknown error"; 207 | 208 | const char *sqlstate = mapi_result_errorcode(hdl); 209 | if (!sqlstate) 210 | sqlstate = "XXXXX"; 211 | 212 | /* monetdb appears to report sqlstate 42000 for all 213 | errors, so we need to match the error string to 214 | figure out actual syntax errors */ 215 | 216 | static regex re_syntax("^syntax error,.*", regex::extended); 217 | 218 | if (mapi_error(dbh)==MERROR) 219 | throw dut::syntax(error_string, sqlstate); 220 | else if (mapi_error(dbh)==MTIMEOUT) 221 | throw dut::timeout(error_string, sqlstate); 222 | else if (regex_match(error_string, re_syntax)) 223 | throw dut::syntax(error_string, sqlstate); 224 | else 225 | throw dut::failure(error_string, sqlstate); 226 | 227 | } catch (dut::failure &e) { 228 | mapi_close_handle(hdl); 229 | throw; 230 | } 231 | } 232 | mapi_close_handle(hdl); 233 | } 234 | -------------------------------------------------------------------------------- /monetdb.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief schema and dut classes for MonetDB 3 | 4 | 5 | #ifndef MONETDB_HH 6 | #define MONETDB_HH 7 | 8 | #include "dut.hh" 9 | #include "relmodel.hh" 10 | #include "schema.hh" 11 | #include 12 | 13 | #include 14 | 15 | struct monetdb_connection { 16 | Mapi dbh; 17 | monetdb_connection(std::string &conninfo); 18 | void q(const char* query); 19 | ~monetdb_connection(); 20 | }; 21 | 22 | struct schema_monetdb : schema, monetdb_connection { 23 | schema_monetdb(std::string &conninfo); 24 | virtual std::string quote_name(const std::string &id) { 25 | return id; 26 | } 27 | }; 28 | 29 | struct dut_monetdb : dut_base, monetdb_connection { 30 | virtual void test(const std::string &stmt); 31 | dut_monetdb(std::string &conninfo); 32 | }; 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /postgres.cc: -------------------------------------------------------------------------------- 1 | #include "postgres.hh" 2 | #include "config.h" 3 | #include 4 | 5 | #ifndef HAVE_BOOST_REGEX 6 | #include 7 | #else 8 | #include 9 | using boost::regex; 10 | using boost::smatch; 11 | using boost::regex_match; 12 | #endif 13 | 14 | using namespace std; 15 | 16 | static regex e_timeout("ERROR: canceling statement due to statement timeout(\n|.)*"); 17 | static regex e_syntax("ERROR: syntax error at or near(\n|.)*"); 18 | 19 | bool pg_type::consistent(sqltype *rvalue) 20 | { 21 | pg_type *t = dynamic_cast(rvalue); 22 | 23 | if (!t) { 24 | cerr << "unknown type: " << rvalue->name << endl; 25 | return false; 26 | } 27 | 28 | switch(typtype_) { 29 | case 'b': /* base type */ 30 | case 'c': /* composite type */ 31 | case 'd': /* domain */ 32 | case 'r': /* range */ 33 | case 'm': /* multirange */ 34 | case 'e': /* enum */ 35 | return this == t; 36 | 37 | case 'p': /* pseudo type: accept any concrete matching type */ 38 | if (name == "anyarray" || name == "anycompatiblearray") { 39 | return t->typelem_ != InvalidOid; 40 | } else if (name == "anynonarray" || name == "anycompatiblenonarray") { 41 | return t->typelem_ == InvalidOid; 42 | } else if(name == "anyenum") { 43 | return t->typtype_ == 'e'; 44 | } else if (name == "\"any\"" || name == "anycompatible") { /* as quoted by quote_ident() */ 45 | return t->typtype_ != 'p'; /* any non-pseudo type */ 46 | } else if (name == "anyelement") { 47 | return t->typelem_ == InvalidOid; 48 | } else if (name == "anyrange" || name == "anycompatiblerange") { 49 | return t->typtype_ == 'r'; 50 | } else if (name == "anymultirange" || name == "anycompatiblemultirange") { 51 | return t->typtype_ == 'm'; 52 | } else if (name == "record") { 53 | return t->typtype_ == 'c'; 54 | } else if (name == "cstring") { 55 | return this == t; 56 | } else { 57 | return false; 58 | } 59 | 60 | default: 61 | throw std::logic_error("unknown typtype"); 62 | } 63 | } 64 | 65 | dut_pqxx::dut_pqxx(std::string conninfo) 66 | : c(conninfo) 67 | { 68 | c.set_variable("statement_timeout", "'1s'"); 69 | c.set_variable("client_min_messages", "'ERROR'"); 70 | c.set_variable("application_name", "'" PACKAGE "::dut'"); 71 | } 72 | 73 | void dut_pqxx::test(const std::string &stmt) 74 | { 75 | try { 76 | #ifndef HAVE_LIBPQXX7 77 | if(!c.is_open()) 78 | c.activate(); 79 | #endif 80 | 81 | pqxx::work w(c); 82 | w.exec(stmt.c_str()); 83 | w.abort(); 84 | } catch (const pqxx::failure &e) { 85 | if ((dynamic_cast(&e))) { 86 | /* re-throw to outer loop to recover session. */ 87 | throw dut::broken(e.what()); 88 | } 89 | 90 | if (regex_match(e.what(), e_timeout)) 91 | throw dut::timeout(e.what()); 92 | else if (regex_match(e.what(), e_syntax)) 93 | throw dut::syntax(e.what()); 94 | else 95 | throw dut::failure(e.what()); 96 | } 97 | } 98 | 99 | 100 | schema_pqxx::schema_pqxx(std::string &conninfo, bool no_catalog) : c(conninfo) 101 | { 102 | c.set_variable("application_name", "'" PACKAGE "::schema'"); 103 | 104 | pqxx::work w(c); 105 | pqxx::result r = w.exec("select version()"); 106 | version = r[0][0].as(); 107 | 108 | r = w.exec("SHOW server_version_num"); 109 | version_num = r[0][0].as(); 110 | 111 | // address the schema change in postgresql 11 that replaced proisagg and proiswindow with prokind 112 | string procedure_is_aggregate = version_num < 110000 ? "proisagg" : "prokind = 'a'"; 113 | string procedure_is_window = version_num < 110000 ? "proiswindow" : "prokind = 'w'"; 114 | 115 | cerr << "Loading types..."; 116 | 117 | r = w.exec("select case typnamespace when 'pg_catalog'::regnamespace then quote_ident(typname) " 118 | "else format('%I.%I', typnamespace::regnamespace, typname) end, " 119 | "oid, typdelim, typrelid, typelem, typarray, typtype " 120 | "from pg_type "); 121 | 122 | for (auto row = r.begin(); row != r.end(); ++row) { 123 | string name(row[0].as()); 124 | OID oid(row[1].as()); 125 | string typdelim(row[2].as()); 126 | OID typrelid(row[3].as()); 127 | OID typelem(row[4].as()); 128 | OID typarray(row[5].as()); 129 | string typtype(row[6].as()); 130 | // if (schema == "pg_catalog") 131 | // continue; 132 | // if (schema == "information_schema") 133 | // continue; 134 | 135 | pg_type *t = new pg_type(name,oid,typdelim[0],typrelid, typelem, typarray, typtype[0]); 136 | oid2type[oid] = t; 137 | name2type[name] = t; 138 | types.push_back(t); 139 | } 140 | 141 | booltype = name2type["bool"]; 142 | inttype = name2type["int4"]; 143 | 144 | internaltype = name2type["internal"]; 145 | arraytype = name2type["anyarray"]; 146 | 147 | cerr << "done." << endl; 148 | 149 | cerr << "Loading tables..."; 150 | r = w.exec("select table_name, " 151 | "table_schema, " 152 | "is_insertable_into, " 153 | "table_type " 154 | "from information_schema.tables"); 155 | 156 | for (auto row = r.begin(); row != r.end(); ++row) { 157 | string schema(row[1].as()); 158 | string insertable(row[2].as()); 159 | string table_type(row[3].as()); 160 | 161 | if (no_catalog && ((schema == "pg_catalog") || (schema == "information_schema"))) 162 | continue; 163 | 164 | tables.push_back(table(row[0].as(), 165 | schema, 166 | ((insertable == "YES") ? true : false), 167 | ((table_type == "BASE TABLE") ? true : false))); 168 | } 169 | 170 | cerr << "done." << endl; 171 | 172 | cerr << "Loading columns and constraints..."; 173 | 174 | for (auto t = tables.begin(); t != tables.end(); ++t) { 175 | string q("select attname, " 176 | "atttypid " 177 | "from pg_attribute join pg_class c on( c.oid = attrelid ) " 178 | "join pg_namespace n on n.oid = relnamespace " 179 | "where not attisdropped " 180 | "and attname not in " 181 | "('xmin', 'xmax', 'ctid', 'cmin', 'cmax', 'tableoid', 'oid') "); 182 | q += " and relname = " + w.quote(t->name); 183 | q += " and nspname = " + w.quote(t->schema); 184 | 185 | r = w.exec(q); 186 | for (auto row : r) { 187 | column c(row[0].as(), oid2type[row[1].as()]); 188 | t->columns().push_back(c); 189 | } 190 | 191 | q = "select conname from pg_class t " 192 | "join pg_constraint c on (t.oid = c.conrelid) " 193 | "where contype in ('f', 'u', 'p') "; 194 | q += " and relnamespace = " " (select oid from pg_namespace where nspname = " + w.quote(t->schema) + ")"; 195 | q += " and relname = " + w.quote(t->name); 196 | 197 | for (auto row : w.exec(q)) { 198 | t->constraints.push_back(row[0].as()); 199 | } 200 | 201 | } 202 | cerr << "done." << endl; 203 | 204 | cerr << "Loading operators..."; 205 | 206 | r = w.exec("select oprname, oprleft," 207 | "oprright, oprresult " 208 | "from pg_catalog.pg_operator " 209 | "where 0 not in (oprresult, oprright, oprleft) "); 210 | for (auto row : r) { 211 | op o(row[0].as(), 212 | oid2type[row[1].as()], 213 | oid2type[row[2].as()], 214 | oid2type[row[3].as()]); 215 | register_operator(o); 216 | } 217 | 218 | cerr << "done." << endl; 219 | 220 | cerr << "Loading routines..."; 221 | r = w.exec("select (select nspname from pg_namespace where oid = pronamespace), oid, prorettype, proname " 222 | "from pg_proc " 223 | "where prorettype::regtype::text not in ('event_trigger', 'trigger', 'opaque', 'internal') " 224 | "and proname <> 'pg_event_trigger_table_rewrite_reason' " 225 | "and proname <> 'pg_event_trigger_table_rewrite_oid' " 226 | "and proname !~ '^ri_fkey_' " 227 | "and not (proretset or " + procedure_is_aggregate + " or " + procedure_is_window + ") "); 228 | 229 | for (auto row : r) { 230 | routine proc(row[0].as(), 231 | row[1].as(), 232 | oid2type[row[2].as()], 233 | row[3].as()); 234 | register_routine(proc); 235 | } 236 | 237 | cerr << "done." << endl; 238 | 239 | cerr << "Loading routine parameters..."; 240 | 241 | for (auto &proc : routines) { 242 | string q("select unnest(proargtypes) " 243 | "from pg_proc "); 244 | q += " where oid = " + w.quote(proc.specific_name); 245 | 246 | r = w.exec(q); 247 | for (auto row : r) { 248 | sqltype *t = oid2type[row[0].as()]; 249 | assert(t); 250 | proc.argtypes.push_back(t); 251 | } 252 | } 253 | cerr << "done." << endl; 254 | 255 | cerr << "Loading aggregates..."; 256 | r = w.exec("select (select nspname from pg_namespace where oid = pronamespace), oid, prorettype, proname " 257 | "from pg_proc " 258 | "where prorettype::regtype::text not in ('event_trigger', 'trigger', 'opaque', 'internal') " 259 | "and proname not in ('pg_event_trigger_table_rewrite_reason') " 260 | "and proname not in ('percentile_cont', 'dense_rank', 'cume_dist', " 261 | "'rank', 'test_rank', 'percent_rank', 'percentile_disc', 'mode', 'test_percentile_disc') " 262 | "and proname !~ '^ri_fkey_' " 263 | "and not (proretset or " + procedure_is_window + ") " 264 | "and " + procedure_is_aggregate); 265 | 266 | for (auto row : r) { 267 | routine proc(row[0].as(), 268 | row[1].as(), 269 | oid2type[row[2].as()], 270 | row[3].as()); 271 | register_aggregate(proc); 272 | } 273 | 274 | cerr << "done." << endl; 275 | 276 | cerr << "Loading aggregate parameters..."; 277 | 278 | for (auto &proc : aggregates) { 279 | string q("select unnest(proargtypes) " 280 | "from pg_proc "); 281 | q += " where oid = " + w.quote(proc.specific_name); 282 | 283 | r = w.exec(q); 284 | for (auto row : r) { 285 | sqltype *t = oid2type[row[0].as()]; 286 | assert(t); 287 | proc.argtypes.push_back(t); 288 | } 289 | } 290 | cerr << "done." << endl; 291 | #ifdef HAVE_LIBPQXX7 292 | c.close(); 293 | #else 294 | c.disconnect(); 295 | #endif 296 | generate_indexes(); 297 | } 298 | 299 | extern "C" { 300 | void dut_libpq_notice_rx(void *arg, const PGresult *res); 301 | } 302 | 303 | void dut_libpq_notice_rx(void *arg, const PGresult *res) 304 | { 305 | (void) arg; 306 | (void) res; 307 | } 308 | 309 | void dut_libpq::connect(std::string &conninfo) 310 | { 311 | if (conn) { 312 | PQfinish(conn); 313 | } 314 | conn = PQconnectdb(conninfo.c_str()); 315 | if (PQstatus(conn) != CONNECTION_OK) 316 | { 317 | char *errmsg = PQerrorMessage(conn); 318 | if (strlen(errmsg)) 319 | throw dut::broken(errmsg, "08001"); 320 | } 321 | 322 | command("set statement_timeout to '1s'"); 323 | command("set client_min_messages to 'ERROR';"); 324 | command("set application_name to '" PACKAGE "::dut';"); 325 | 326 | PQsetNoticeReceiver(conn, dut_libpq_notice_rx, (void *) 0); 327 | } 328 | 329 | dut_libpq::dut_libpq(std::string conninfo) 330 | : conninfo_(conninfo) 331 | { 332 | connect(conninfo); 333 | } 334 | 335 | void dut_libpq::command(const std::string &stmt) 336 | { 337 | if (!conn) 338 | connect(conninfo_); 339 | PGresult *res = PQexec(conn, stmt.c_str()); 340 | 341 | switch (PQresultStatus(res)) { 342 | 343 | case PGRES_FATAL_ERROR: 344 | default: 345 | { 346 | const char *errmsg = PQresultErrorMessage(res); 347 | if (!errmsg || !strlen(errmsg)) 348 | errmsg = PQerrorMessage(conn); 349 | 350 | const char *sqlstate = PQresultErrorField(res, PG_DIAG_SQLSTATE); 351 | if (!sqlstate || !strlen(sqlstate)) 352 | sqlstate = (CONNECTION_OK != PQstatus(conn)) ? "08000" : "?????"; 353 | 354 | std::string error_string(errmsg); 355 | std::string sqlstate_string(sqlstate); 356 | PQclear(res); 357 | 358 | if (CONNECTION_OK != PQstatus(conn)) { 359 | PQfinish(conn); 360 | conn = 0; 361 | throw dut::broken(error_string.c_str(), sqlstate_string.c_str()); 362 | } 363 | if (sqlstate_string == "42601") 364 | throw dut::syntax(error_string.c_str(), sqlstate_string.c_str()); 365 | else 366 | throw dut::failure(error_string.c_str(), sqlstate_string.c_str()); 367 | } 368 | 369 | case PGRES_NONFATAL_ERROR: 370 | case PGRES_TUPLES_OK: 371 | case PGRES_SINGLE_TUPLE: 372 | case PGRES_COMMAND_OK: 373 | PQclear(res); 374 | return; 375 | } 376 | } 377 | 378 | void dut_libpq::test(const std::string &stmt) 379 | { 380 | command("ROLLBACK;"); 381 | command("BEGIN;"); 382 | command(stmt.c_str()); 383 | command("ROLLBACK;"); 384 | } 385 | -------------------------------------------------------------------------------- /postgres.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief schema and dut classes for PostgreSQL 3 | 4 | #ifndef POSTGRES_HH 5 | #define POSTGRES_HH 6 | 7 | #include "dut.hh" 8 | #include "relmodel.hh" 9 | #include "schema.hh" 10 | 11 | #include 12 | 13 | extern "C" { 14 | #include 15 | } 16 | 17 | #define OID long 18 | 19 | struct pg_type : sqltype { 20 | OID oid_; 21 | char typdelim_; 22 | OID typrelid_; 23 | OID typelem_; 24 | OID typarray_; 25 | char typtype_; 26 | pg_type(string name, 27 | OID oid, 28 | char typdelim, 29 | OID typrelid, 30 | OID typelem, 31 | OID typarray, 32 | char typtype) 33 | : sqltype(name), oid_(oid), typdelim_(typdelim), typrelid_(typrelid), 34 | typelem_(typelem), typarray_(typarray), typtype_(typtype) { } 35 | 36 | virtual bool consistent(struct sqltype *rvalue); 37 | bool consistent_(sqltype *rvalue); 38 | }; 39 | 40 | 41 | struct schema_pqxx : public schema { 42 | pqxx::connection c; 43 | map oid2type; 44 | map name2type; 45 | 46 | virtual std::string quote_name(const std::string &id) { 47 | return c.quote_name(id); 48 | } 49 | schema_pqxx(std::string &conninfo, bool no_catalog); 50 | }; 51 | 52 | struct dut_pqxx : dut_base { 53 | pqxx::connection c; 54 | virtual void test(const std::string &stmt); 55 | dut_pqxx(std::string conninfo); 56 | }; 57 | 58 | struct dut_libpq : dut_base { 59 | PGconn *conn = 0; 60 | std::string conninfo_; 61 | virtual void test(const std::string &stmt); 62 | void command(const std::string &stmt); 63 | void connect(std::string &conninfo); 64 | dut_libpq(std::string conninfo); 65 | }; 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /prod.cc: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief Base class for grammar productions 3 | #include 4 | #include 5 | #include "prod.hh" 6 | #include "impedance.hh" 7 | 8 | prod::prod(struct prod *parent) 9 | : pprod(parent) 10 | { 11 | if (parent) { 12 | level = parent->level + 1; 13 | scope = parent->scope; 14 | } else { 15 | scope = 0; 16 | level = 0; 17 | } 18 | } 19 | 20 | void prod::indent(std::ostream &out) 21 | { 22 | out << std::endl; 23 | for (int i = 0; i < level; i++) 24 | out << " "; 25 | } 26 | 27 | void prod::retry() 28 | { 29 | impedance::retry(this); 30 | if (retries++ <= retry_limit) 31 | return; 32 | 33 | impedance::limit(this); 34 | throw std::runtime_error(std::string("excessive retries in ") 35 | + typeid(*this).name()); 36 | } 37 | 38 | void prod::match() 39 | { 40 | if (!impedance::matched(this)) 41 | throw std::runtime_error("impedance mismatch"); 42 | } 43 | 44 | void prod::fail(const char *reason) 45 | { 46 | impedance::fail(this); 47 | throw std::runtime_error(reason); 48 | } 49 | -------------------------------------------------------------------------------- /prod.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief Base class for grammar productions 3 | 4 | #include 5 | #include 6 | 7 | #ifndef PROD_HH 8 | #define PROD_HH 9 | 10 | /// Base class for walking the AST 11 | struct prod_visitor { 12 | virtual void visit(struct prod *p) = 0; 13 | virtual ~prod_visitor() { } 14 | }; 15 | 16 | /// Base class for AST nodes 17 | struct prod { 18 | /// Parent production that instanciated this one. May be NULL for 19 | /// top-level productions. 20 | struct prod *pprod; 21 | /// Scope object to model column/table reference visibility. 22 | struct scope *scope; 23 | /// Level of this production in the AST. 0 for root node. 24 | int level; 25 | /// Number of retries in this production. Child productions are 26 | /// generated speculatively and may fail. 27 | long retries = 0; 28 | /// Maximum number of retries allowed before reporting a failure to 29 | /// the Parent prod. 30 | long retry_limit = 100; 31 | prod(prod *parent); 32 | /// Newline and indent according to tree level. 33 | virtual void indent(std::ostream &out); 34 | /// Emit SQL for this production. 35 | virtual void out(std::ostream &out) = 0; 36 | /// Check with the impedance matching code whether this production 37 | /// has been blacklisted and throw an exception. 38 | virtual void match(); 39 | /// Visitor pattern for walking the AST. Make sure you visit all 40 | /// child production when deriving classes. 41 | virtual void accept(prod_visitor *v) { v->visit(this); } 42 | /// Report a "failed to generate" error. 43 | virtual void fail(const char *reason); 44 | /// Increase the retry count and throw an exception when retry_limit 45 | /// is exceeded. 46 | void retry(); 47 | }; 48 | 49 | inline std::ostream& operator<<(std::ostream& s, prod& p) 50 | { 51 | p.out(s); return s; 52 | } 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /random.cc: -------------------------------------------------------------------------------- 1 | #include "random.hh" 2 | 3 | namespace smith { 4 | std::mt19937_64 rng; 5 | } 6 | 7 | int d6() { 8 | static std::uniform_int_distribution<> pick(1, 6); 9 | return pick(smith::rng); 10 | } 11 | 12 | int d9() { 13 | static std::uniform_int_distribution<> pick(1, 9); 14 | return pick(smith::rng); 15 | } 16 | 17 | int d12() { 18 | static std::uniform_int_distribution<> pick(1, 12); 19 | return pick(smith::rng); 20 | } 21 | 22 | int d20() { 23 | static std::uniform_int_distribution<> pick(1, 20); 24 | return pick(smith::rng); 25 | } 26 | 27 | int d42() { 28 | static std::uniform_int_distribution<> pick(1, 42); 29 | return pick(smith::rng); 30 | } 31 | 32 | int d100() { 33 | static std::uniform_int_distribution<> pick(1, 100); 34 | return pick(smith::rng); 35 | } 36 | -------------------------------------------------------------------------------- /random.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief randomness 3 | 4 | #ifndef RANDOM_HH 5 | #define RANDOM_HH 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace smith { 13 | extern std::mt19937_64 rng; 14 | } 15 | 16 | template T& random_pick(std::vector& container) { 17 | if (!container.size()) 18 | throw std::runtime_error("No candidates available"); 19 | 20 | std::uniform_int_distribution pick(0, container.size()-1); 21 | return container[pick(smith::rng)]; 22 | } 23 | 24 | template 25 | I random_pick(I beg, I end) { 26 | if (beg == end) 27 | throw std::runtime_error("No candidates available"); 28 | 29 | std::uniform_int_distribution<> pick(0, std::distance(beg, end) - 1); 30 | std::advance(beg, pick(smith::rng)); 31 | return beg; 32 | } 33 | 34 | template 35 | I random_pick(std::pair iters) { 36 | return random_pick(iters.first, iters.second); 37 | } 38 | 39 | int d6(), d9(), d12(), d20(), d42(), d100(); 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /relmodel.cc: -------------------------------------------------------------------------------- 1 | #include "relmodel.hh" 2 | 3 | map sqltype::typemap; 4 | 5 | sqltype * sqltype::get(string n) 6 | { 7 | if (typemap.count(n)) 8 | return typemap[n]; 9 | else 10 | return typemap[n] = new sqltype(n); 11 | } 12 | 13 | bool sqltype::consistent(struct sqltype *rvalue) 14 | { 15 | return this == rvalue; 16 | } 17 | -------------------------------------------------------------------------------- /relmodel.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief supporting classes for the grammar 3 | 4 | #ifndef RELMODEL_HH 5 | #define RELMODEL_HH 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using std::string; 14 | using std::vector; 15 | using std::map; 16 | using std::pair; 17 | using std::make_pair; 18 | using std::shared_ptr; 19 | 20 | struct sqltype { 21 | string name; 22 | static map typemap; 23 | static struct sqltype *get(string s); 24 | sqltype(string n) : name(n) { } 25 | 26 | /** This function is used to model postgres-style pseudotypes. 27 | A generic type is consistent with a more concrete type. 28 | E.G., anyarray->consistent(intarray) is true 29 | while int4array->consistent(anyarray) is false 30 | 31 | There must not be cycles in the consistency graph, since the 32 | grammar will use fixpoint iteration to resolve type conformance 33 | situations in the direction of more concrete types */ 34 | virtual bool consistent(struct sqltype *rvalue); 35 | }; 36 | 37 | struct column { 38 | string name; 39 | sqltype *type; 40 | column(string name) : name(name) { } 41 | column(string name, sqltype *t) : name(name), type(t) { 42 | assert(t); 43 | } 44 | }; 45 | 46 | struct relation { 47 | vector cols; 48 | virtual vector &columns() { return cols; } 49 | }; 50 | 51 | struct named_relation : relation { 52 | string name; 53 | virtual string ident() { return name; } 54 | virtual ~named_relation() { } 55 | named_relation(string n) : name(n) { } 56 | }; 57 | 58 | struct aliased_relation : named_relation { 59 | relation *rel; 60 | virtual ~aliased_relation() { } 61 | aliased_relation(string n, relation* r) : named_relation(n), rel(r) { } 62 | virtual vector& columns() { return rel->columns(); } 63 | }; 64 | 65 | struct table : named_relation { 66 | string schema; 67 | bool is_insertable; 68 | bool is_base_table; 69 | vector constraints; 70 | table(string name, string schema, bool insertable, bool base_table) 71 | : named_relation(name), 72 | schema(schema), 73 | is_insertable(insertable), 74 | is_base_table(base_table) { } 75 | virtual string ident() { return schema + "." + name; } 76 | virtual ~table() { }; 77 | }; 78 | 79 | struct scope { 80 | struct scope *parent; 81 | /// available to table_ref productions 82 | vector tables; 83 | /// available to column_ref productions 84 | vector refs; 85 | struct schema *schema; 86 | /// Counters for prefixed stmt-unique identifiers 87 | shared_ptr > stmt_seq; 88 | scope(struct scope *parent = 0) : parent(parent) { 89 | if (parent) { 90 | schema = parent->schema; 91 | tables = parent->tables; 92 | refs = parent->refs; 93 | stmt_seq = parent->stmt_seq; 94 | } 95 | } 96 | vector > refs_of_type(sqltype *t) { 97 | vector > result; 98 | for (auto r : refs) 99 | for (auto c : r->columns()) 100 | if (t->consistent(c.type)) 101 | result.push_back(make_pair(r,c)); 102 | return result; 103 | } 104 | /** Generate unique identifier with prefix. */ 105 | string stmt_uid(const char* prefix) { 106 | string result(prefix); 107 | result += "_"; 108 | result += std::to_string((*stmt_seq)[result]++); 109 | return result; 110 | } 111 | /** Reset unique identifier counters. */ 112 | void new_stmt() { 113 | stmt_seq = std::make_shared >(); 114 | } 115 | }; 116 | 117 | struct op { 118 | string name; 119 | sqltype *left; 120 | sqltype *right; 121 | sqltype *result; 122 | op(string n,sqltype *l,sqltype *r, sqltype *res) 123 | : name(n), left(l), right(r), result(res) { } 124 | op() { } 125 | }; 126 | 127 | struct routine { 128 | string specific_name; 129 | string schema; 130 | vector argtypes; 131 | sqltype *restype; 132 | string name; 133 | routine(string schema, string specific_name, sqltype* data_type, string name) 134 | : specific_name(specific_name), schema(schema), restype(data_type), name(name) { 135 | assert(data_type); 136 | } 137 | virtual string ident() { 138 | if (schema.size()) 139 | return schema + "." + name; 140 | else 141 | return name; 142 | } 143 | }; 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /schema.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "config.h" 3 | #include "schema.hh" 4 | #include "relmodel.hh" 5 | #include 6 | #include "gitrev.h" 7 | 8 | using namespace std; 9 | using namespace pqxx; 10 | 11 | void schema::generate_indexes() { 12 | 13 | cerr << "Generating indexes..."; 14 | 15 | for (auto &type: types) { 16 | assert(type); 17 | for(auto &r: aggregates) { 18 | if (type->consistent(r.restype)) 19 | aggregates_returning_type[type].push_back(&r); 20 | } 21 | 22 | for(auto &r: routines) { 23 | if (!type->consistent(r.restype)) 24 | continue; 25 | routines_returning_type[type].push_back(&r); 26 | if(!r.argtypes.size()) 27 | parameterless_routines_returning_type[type].push_back(&r); 28 | } 29 | 30 | for (auto &t: tables) { 31 | for (auto &c: t.columns()) { 32 | if (type->consistent(c.type)) { 33 | tables_with_columns_of_type[type].push_back(&t); 34 | break; 35 | } 36 | } 37 | } 38 | 39 | for (auto &concrete: types) { 40 | if (type->consistent(concrete)) 41 | concrete_type[type].push_back(concrete); 42 | } 43 | 44 | for (auto &o: operators) { 45 | if (type->consistent(o.result)) 46 | operators_returning_type[type].push_back(&o); 47 | } 48 | } 49 | 50 | for (auto &t: tables) { 51 | if (t.is_base_table) 52 | base_tables.push_back(&t); 53 | } 54 | 55 | cerr << "done." << endl; 56 | 57 | assert(booltype); 58 | assert(inttype); 59 | assert(internaltype); 60 | assert(arraytype); 61 | 62 | } 63 | -------------------------------------------------------------------------------- /schema.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief Base class providing schema information to grammar 3 | 4 | #ifndef SCHEMA_HH 5 | #define SCHEMA_HH 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "relmodel.hh" 14 | #include "random.hh" 15 | 16 | struct schema { 17 | sqltype *booltype; 18 | sqltype *inttype; 19 | sqltype *internaltype; 20 | sqltype *arraytype; 21 | 22 | std::vector types; 23 | 24 | std::vector
tables; 25 | std::vector operators; 26 | std::vector routines; 27 | std::vector aggregates; 28 | 29 | typedef std::tuple typekey; 30 | std::multimap index; 31 | typedef std::multimap::iterator op_iterator; 32 | 33 | std::map> routines_returning_type; 34 | std::map> aggregates_returning_type; 35 | std::map> parameterless_routines_returning_type; 36 | std::map> tables_with_columns_of_type; 37 | std::map> operators_returning_type; 38 | std::map> concrete_type; 39 | std::vector base_tables; 40 | 41 | string version; 42 | int version_num; // comparable version number 43 | 44 | const char *true_literal = "true"; 45 | const char *false_literal = "false"; 46 | 47 | virtual std::string quote_name(const std::string &id) = 0; 48 | 49 | void summary() { 50 | std::cout << "Found " << tables.size() << 51 | " user table(s) in information schema." << std::endl; 52 | } 53 | void fill_scope(struct scope &s) { 54 | for (auto &t : tables) 55 | s.tables.push_back(&t); 56 | s.schema = this; 57 | } 58 | virtual void register_operator(op& o) { 59 | operators.push_back(o); 60 | typekey t(o.left, o.right, o.result); 61 | index.insert(std::pair(t,o)); 62 | } 63 | virtual void register_routine(routine& r) { 64 | routines.push_back(r); 65 | } 66 | virtual void register_aggregate(routine& r) { 67 | aggregates.push_back(r); 68 | } 69 | virtual op_iterator find_operator(sqltype *left, sqltype *right, sqltype *res) { 70 | typekey t(left, right, res); 71 | auto cons = index.equal_range(t); 72 | if (cons.first == cons.second) 73 | return index.end(); 74 | else 75 | return random_pick<>(cons.first, cons.second); 76 | } 77 | schema() { } 78 | void generate_indexes(); 79 | }; 80 | 81 | #endif 82 | 83 | -------------------------------------------------------------------------------- /sqlite.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "sqlite.hh" 5 | #include 6 | 7 | #ifndef HAVE_BOOST_REGEX 8 | #include 9 | #else 10 | #include 11 | using boost::regex; 12 | using boost::smatch; 13 | using boost::regex_match; 14 | #endif 15 | 16 | using namespace std; 17 | 18 | static regex e_syntax("near \".*\": syntax error"); 19 | static regex e_user_abort("callback requested query abort"); 20 | 21 | extern "C" { 22 | #include 23 | #include 24 | } 25 | 26 | extern "C" int my_sqlite3_busy_handler(void *, int) 27 | { 28 | throw std::runtime_error("sqlite3 timeout"); 29 | } 30 | 31 | extern "C" int callback(void *arg, int argc, char **argv, char **azColName) 32 | { 33 | (void)arg; 34 | 35 | int i; 36 | for(i=0; i *)arg; 47 | bool view = (string("view") == argv[0]); 48 | table tab(argv[2], "main", !view, !view); 49 | tables->push_back(tab); 50 | return 0; 51 | } 52 | 53 | extern "C" int column_callback(void *arg, int argc, char **argv, char **azColName) 54 | { 55 | (void) argc; (void) azColName; 56 | table *tab = (table *)arg; 57 | column c(argv[1], sqltype::get(argv[2])); 58 | tab->columns().push_back(c); 59 | return 0; 60 | } 61 | 62 | sqlite_connection::sqlite_connection(std::string &conninfo) 63 | { 64 | assert(sqlite3_libversion_number()==SQLITE_VERSION_NUMBER); 65 | assert(strcmp(sqlite3_sourceid(),SQLITE_SOURCE_ID)==0); 66 | assert(strcmp(sqlite3_libversion(),SQLITE_VERSION)==0); 67 | rc = sqlite3_open_v2(conninfo.c_str(), &db, SQLITE_OPEN_READWRITE|SQLITE_OPEN_URI, 0); 68 | if (rc) { 69 | throw std::runtime_error(sqlite3_errmsg(db)); 70 | } 71 | } 72 | 73 | void sqlite_connection::q(const char *query) 74 | { 75 | rc = sqlite3_exec(db, query, callback, 0, &zErrMsg); 76 | if( rc!=SQLITE_OK ){ 77 | auto e = std::runtime_error(zErrMsg); 78 | sqlite3_free(zErrMsg); 79 | throw e; 80 | } 81 | } 82 | 83 | sqlite_connection::~sqlite_connection() 84 | { 85 | if (db) 86 | sqlite3_close(db); 87 | } 88 | 89 | schema_sqlite::schema_sqlite(std::string &conninfo, bool no_catalog) 90 | : sqlite_connection(conninfo) 91 | { 92 | std::string query = "SELECT * FROM main.sqlite_master where type in ('table', 'view')"; 93 | 94 | if (no_catalog) 95 | query+= " AND name NOT like 'sqlite_%%'"; 96 | 97 | version = "SQLite " SQLITE_VERSION " " SQLITE_SOURCE_ID; 98 | 99 | // sqlite3_busy_handler(db, my_sqlite3_busy_handler, 0); 100 | cerr << "Loading tables..."; 101 | 102 | rc = sqlite3_exec(db, query.c_str(), table_callback, (void *)&tables, &zErrMsg); 103 | if (rc!=SQLITE_OK) { 104 | auto e = std::runtime_error(zErrMsg); 105 | sqlite3_free(zErrMsg); 106 | throw e; 107 | } 108 | 109 | if (!no_catalog) 110 | { 111 | // sqlite_master doesn't list itself, do it manually 112 | table tab("sqlite_master", "main", false, false); 113 | tables.push_back(tab); 114 | } 115 | 116 | cerr << "done." << endl; 117 | 118 | cerr << "Loading columns and constraints..."; 119 | 120 | for (auto t = tables.begin(); t != tables.end(); ++t) { 121 | string q("pragma table_info("); 122 | q += t->name; 123 | q += ");"; 124 | 125 | rc = sqlite3_exec(db, q.c_str(), column_callback, (void *)&*t, &zErrMsg); 126 | if (rc!=SQLITE_OK) { 127 | auto e = std::runtime_error(zErrMsg); 128 | sqlite3_free(zErrMsg); 129 | throw e; 130 | } 131 | } 132 | 133 | cerr << "done." << endl; 134 | 135 | #define BINOP(n,t) do {op o(#n,sqltype::get(#t),sqltype::get(#t),sqltype::get(#t)); register_operator(o); } while(0) 136 | 137 | BINOP(||, TEXT); 138 | BINOP(*, INTEGER); 139 | BINOP(/, INTEGER); 140 | 141 | BINOP(+, INTEGER); 142 | BINOP(-, INTEGER); 143 | 144 | BINOP(>>, INTEGER); 145 | BINOP(<<, INTEGER); 146 | 147 | BINOP(&, INTEGER); 148 | BINOP(|, INTEGER); 149 | 150 | BINOP(<, INTEGER); 151 | BINOP(<=, INTEGER); 152 | BINOP(>, INTEGER); 153 | BINOP(>=, INTEGER); 154 | 155 | BINOP(=, INTEGER); 156 | BINOP(<>, INTEGER); 157 | BINOP(IS, INTEGER); 158 | BINOP(IS NOT, INTEGER); 159 | 160 | BINOP(AND, INTEGER); 161 | BINOP(OR, INTEGER); 162 | 163 | #define FUNC(n,r) do { \ 164 | routine proc("", "", sqltype::get(#r), #n); \ 165 | register_routine(proc); \ 166 | } while(0) 167 | 168 | #define FUNC1(n,r,a) do { \ 169 | routine proc("", "", sqltype::get(#r), #n); \ 170 | proc.argtypes.push_back(sqltype::get(#a)); \ 171 | register_routine(proc); \ 172 | } while(0) 173 | 174 | #define FUNC2(n,r,a,b) do { \ 175 | routine proc("", "", sqltype::get(#r), #n); \ 176 | proc.argtypes.push_back(sqltype::get(#a)); \ 177 | proc.argtypes.push_back(sqltype::get(#b)); \ 178 | register_routine(proc); \ 179 | } while(0) 180 | 181 | #define FUNC3(n,r,a,b,c) do { \ 182 | routine proc("", "", sqltype::get(#r), #n); \ 183 | proc.argtypes.push_back(sqltype::get(#a)); \ 184 | proc.argtypes.push_back(sqltype::get(#b)); \ 185 | proc.argtypes.push_back(sqltype::get(#c)); \ 186 | register_routine(proc); \ 187 | } while(0) 188 | 189 | FUNC(last_insert_rowid, INTEGER); 190 | FUNC(random, INTEGER); 191 | FUNC(sqlite_source_id, TEXT); 192 | FUNC(sqlite_version, TEXT); 193 | FUNC(total_changes, INTEGER); 194 | 195 | FUNC1(abs, INTEGER, REAL); 196 | FUNC1(hex, TEXT, TEXT); 197 | FUNC1(length, INTEGER, TEXT); 198 | FUNC1(lower, TEXT, TEXT); 199 | FUNC1(ltrim, TEXT, TEXT); 200 | FUNC1(quote, TEXT, TEXT); 201 | FUNC1(randomblob, TEXT, INTEGER); 202 | FUNC1(round, INTEGER, REAL); 203 | FUNC1(rtrim, TEXT, TEXT); 204 | FUNC1(soundex, TEXT, TEXT); 205 | FUNC1(sqlite_compileoption_get, TEXT, INTEGER); 206 | FUNC1(sqlite_compileoption_used, INTEGER, TEXT); 207 | FUNC1(trim, TEXT, TEXT); 208 | FUNC1(typeof, TEXT, INTEGER); 209 | FUNC1(typeof, TEXT, NUMERIC); 210 | FUNC1(typeof, TEXT, REAL); 211 | FUNC1(typeof, TEXT, TEXT); 212 | FUNC1(unicode, INTEGER, TEXT); 213 | FUNC1(upper, TEXT, TEXT); 214 | FUNC1(zeroblob, TEXT, INTEGER); 215 | 216 | FUNC2(glob, INTEGER, TEXT, TEXT); 217 | FUNC2(instr, INTEGER, TEXT, TEXT); 218 | FUNC2(like, INTEGER, TEXT, TEXT); 219 | FUNC2(ltrim, TEXT, TEXT, TEXT); 220 | FUNC2(rtrim, TEXT, TEXT, TEXT); 221 | FUNC2(trim, TEXT, TEXT, TEXT); 222 | FUNC2(round, INTEGER, REAL, INTEGER); 223 | FUNC2(substr, TEXT, TEXT, INTEGER); 224 | 225 | FUNC3(substr, TEXT, TEXT, INTEGER, INTEGER); 226 | FUNC3(replace, TEXT, TEXT, TEXT, TEXT); 227 | 228 | 229 | #define AGG(n,r, a) do { \ 230 | routine proc("", "", sqltype::get(#r), #n); \ 231 | proc.argtypes.push_back(sqltype::get(#a)); \ 232 | register_aggregate(proc); \ 233 | } while(0) 234 | 235 | AGG(avg, INTEGER, INTEGER); 236 | AGG(avg, REAL, REAL); 237 | AGG(count, INTEGER, REAL); 238 | AGG(count, INTEGER, TEXT); 239 | AGG(count, INTEGER, INTEGER); 240 | AGG(group_concat, TEXT, TEXT); 241 | AGG(max, REAL, REAL); 242 | AGG(max, INTEGER, INTEGER); 243 | AGG(sum, REAL, REAL); 244 | AGG(sum, INTEGER, INTEGER); 245 | AGG(total, REAL, INTEGER); 246 | AGG(total, REAL, REAL); 247 | 248 | booltype = sqltype::get("INTEGER"); 249 | inttype = sqltype::get("INTEGER"); 250 | 251 | internaltype = sqltype::get("internal"); 252 | arraytype = sqltype::get("ARRAY"); 253 | 254 | true_literal = "1"; 255 | false_literal = "0"; 256 | 257 | generate_indexes(); 258 | sqlite3_close(db); 259 | db = 0; 260 | } 261 | 262 | dut_sqlite::dut_sqlite(std::string &conninfo) 263 | : sqlite_connection(conninfo) 264 | { 265 | q("PRAGMA main.auto_vacuum = 2"); 266 | } 267 | 268 | extern "C" int dut_callback(void *arg, int argc, char **argv, char **azColName) 269 | { 270 | (void) arg; (void) argc; (void) argv; (void) azColName; 271 | return SQLITE_ABORT; 272 | } 273 | 274 | void dut_sqlite::test(const std::string &stmt) 275 | { 276 | alarm(6); 277 | rc = sqlite3_exec(db, stmt.c_str(), dut_callback, 0, &zErrMsg); 278 | if( rc!=SQLITE_OK ){ 279 | try { 280 | if (regex_match(zErrMsg, e_syntax)) 281 | throw dut::syntax(zErrMsg); 282 | else if (regex_match(zErrMsg, e_user_abort)) { 283 | sqlite3_free(zErrMsg); 284 | return; 285 | } else 286 | throw dut::failure(zErrMsg); 287 | } catch (dut::failure &e) { 288 | sqlite3_free(zErrMsg); 289 | throw; 290 | } 291 | } 292 | } 293 | 294 | -------------------------------------------------------------------------------- /sqlite.hh: -------------------------------------------------------------------------------- 1 | /// @file 2 | /// @brief schema and dut classes for SQLite 3 3 | 4 | #ifndef SQLITE_HH 5 | #define SQLITE_HH 6 | 7 | extern "C" { 8 | #include 9 | } 10 | 11 | #include "schema.hh" 12 | #include "relmodel.hh" 13 | #include "dut.hh" 14 | 15 | struct sqlite_connection { 16 | sqlite3 *db; 17 | char *zErrMsg = 0; 18 | int rc; 19 | void q(const char *query); 20 | sqlite_connection(std::string &conninfo); 21 | ~sqlite_connection(); 22 | }; 23 | 24 | struct schema_sqlite : schema, sqlite_connection { 25 | schema_sqlite(std::string &conninfo, bool no_catalog); 26 | virtual std::string quote_name(const std::string &id) { 27 | return id; 28 | } 29 | }; 30 | 31 | struct dut_sqlite : dut_base, sqlite_connection { 32 | virtual void test(const std::string &stmt); 33 | dut_sqlite(std::string &conninfo); 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /sqlsmith.cc: -------------------------------------------------------------------------------- 1 | #include "config.h" 2 | 3 | #include 4 | #include 5 | 6 | #ifndef HAVE_BOOST_REGEX 7 | #include 8 | #else 9 | #include 10 | using boost::regex; 11 | using boost::smatch; 12 | using boost::regex_match; 13 | #endif 14 | 15 | #include 16 | #include 17 | 18 | #include "random.hh" 19 | #include "grammar.hh" 20 | #include "relmodel.hh" 21 | #include "schema.hh" 22 | #include "gitrev.h" 23 | 24 | #include "log.hh" 25 | #include "dump.hh" 26 | #include "impedance.hh" 27 | #include "dut.hh" 28 | 29 | #ifdef HAVE_LIBSQLITE3 30 | #include "sqlite.hh" 31 | #endif 32 | 33 | #ifdef HAVE_MONETDB 34 | #include "monetdb.hh" 35 | #endif 36 | 37 | #include "postgres.hh" 38 | 39 | using namespace std; 40 | 41 | using namespace std::chrono; 42 | 43 | extern "C" { 44 | #include 45 | #include 46 | #include 47 | } 48 | 49 | /* make the cerr logger globally accessible so we can emit one last 50 | report on SIGINT */ 51 | cerr_logger *global_cerr_logger; 52 | 53 | extern "C" void cerr_log_handler(int) 54 | { 55 | if (global_cerr_logger) 56 | global_cerr_logger->report(); 57 | exit(1); 58 | } 59 | 60 | int main(int argc, char *argv[]) 61 | { 62 | cerr << PACKAGE_NAME " " GITREV << endl; 63 | 64 | map options; 65 | regex optregex("--(help|log-to|verbose|target|sqlite|monetdb|version|dump-all-graphs|dump-all-queries|seed|dry-run|max-queries|rng-state|exclude-catalog)(?:=((?:.|\n)*))?"); 66 | 67 | for(char **opt = argv+1 ;opt < argv+argc; opt++) { 68 | smatch match; 69 | string s(*opt); 70 | if (regex_match(s, match, optregex)) { 71 | options[string(match[1])] = match[2]; 72 | } else { 73 | cerr << "Cannot parse option: " << *opt << endl; 74 | options["help"] = ""; 75 | } 76 | } 77 | 78 | if (options.count("help")) { 79 | cerr << 80 | " --target=connstr postgres database to send queries to" << endl << 81 | #ifdef HAVE_LIBSQLITE3 82 | " --sqlite=URI SQLite database to send queries to" << endl << 83 | #endif 84 | #ifdef HAVE_MONETDB 85 | " --monetdb=connstr MonetDB database to send queries to" < schema; 106 | if (options.count("sqlite")) { 107 | #ifdef HAVE_LIBSQLITE3 108 | schema = make_shared(options["sqlite"], options.count("exclude-catalog")); 109 | #else 110 | cerr << "Sorry, " PACKAGE_NAME " was compiled without SQLite support." << endl; 111 | return 1; 112 | #endif 113 | } 114 | else if(options.count("monetdb")) { 115 | #ifdef HAVE_MONETDB 116 | schema = make_shared(options["monetdb"]); 117 | #else 118 | cerr << "Sorry, " PACKAGE_NAME " was compiled without MonetDB support." << endl; 119 | return 1; 120 | #endif 121 | } 122 | else 123 | schema = make_shared(options["target"], options.count("exclude-catalog")); 124 | 125 | scope scope; 126 | long queries_generated = 0; 127 | schema->fill_scope(scope); 128 | 129 | if (options.count("rng-state")) { 130 | istringstream(options["rng-state"]) >> smith::rng; 131 | } else { 132 | smith::rng.seed(options.count("seed") ? stoi(options["seed"]) : getpid()); 133 | } 134 | 135 | vector > loggers; 136 | 137 | loggers.push_back(make_shared()); 138 | 139 | if (options.count("log-to")) 140 | loggers.push_back(make_shared( 141 | options.count("sqlite") ? options["sqlite"] : options["target"], 142 | options["log-to"], *schema)); 143 | 144 | if (options.count("verbose")) { 145 | auto l = make_shared(); 146 | global_cerr_logger = &*l; 147 | loggers.push_back(l); 148 | signal(SIGINT, cerr_log_handler); 149 | } 150 | 151 | if (options.count("dump-all-graphs")) 152 | loggers.push_back(make_shared()); 153 | 154 | if (options.count("dump-all-queries")) 155 | loggers.push_back(make_shared()); 156 | 157 | if (options.count("dry-run")) { 158 | while (1) { 159 | shared_ptr gen = statement_factory(&scope); 160 | gen->out(cout); 161 | for (auto l : loggers) 162 | l->generated(*gen); 163 | cout << ";" << endl; 164 | queries_generated++; 165 | 166 | if (options.count("max-queries") 167 | && (queries_generated >= stol(options["max-queries"]))) 168 | return 0; 169 | } 170 | } 171 | 172 | shared_ptr dut; 173 | 174 | if (options.count("sqlite")) { 175 | #ifdef HAVE_LIBSQLITE3 176 | dut = make_shared(options["sqlite"]); 177 | #else 178 | cerr << "Sorry, " PACKAGE_NAME " was compiled without SQLite support." << endl; 179 | return 1; 180 | #endif 181 | } 182 | else if(options.count("monetdb")) { 183 | #ifdef HAVE_MONETDB 184 | dut = make_shared(options["monetdb"]); 185 | #else 186 | cerr << "Sorry, " PACKAGE_NAME " was compiled without MonetDB support." << endl; 187 | return 1; 188 | #endif 189 | } 190 | else 191 | dut = make_shared(options["target"]); 192 | 193 | while (1) /* Loop to recover connection loss */ 194 | { 195 | try { 196 | while (1) { /* Main loop */ 197 | 198 | if (options.count("max-queries") 199 | && (++queries_generated > stol(options["max-queries"]))) { 200 | if (global_cerr_logger) 201 | global_cerr_logger->report(); 202 | return 0; 203 | } 204 | 205 | /* Invoke top-level production to generate AST */ 206 | shared_ptr gen = statement_factory(&scope); 207 | 208 | for (auto l : loggers) 209 | l->generated(*gen); 210 | 211 | /* Generate SQL from AST */ 212 | ostringstream s; 213 | gen->out(s); 214 | 215 | /* Try to execute it */ 216 | try { 217 | dut->test(s.str()); 218 | for (auto l : loggers) 219 | l->executed(*gen); 220 | } catch (const dut::failure &e) { 221 | for (auto l : loggers) 222 | try { 223 | l->error(*gen, e); 224 | } catch (runtime_error &e) { 225 | cerr << endl << "log failed: " << typeid(*l).name() << ": " 226 | << e.what() << endl; 227 | } 228 | if ((dynamic_cast(&e))) { 229 | /* re-throw to outer loop to recover session. */ 230 | throw; 231 | } 232 | } 233 | } 234 | } 235 | catch (const dut::broken &e) { 236 | /* Give server some time to recover. */ 237 | this_thread::sleep_for(milliseconds(1000)); 238 | } 239 | } 240 | } 241 | catch (const exception &e) { 242 | cerr << e.what() << endl; 243 | return 1; 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /util.hh: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_HH 2 | #define UTIL_HH 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | /* TODO: The strings are implementation-defined. How do they look in 10 | clang? */ 11 | 12 | inline std::string pretty_type(const char *raw) 13 | { 14 | ostringstream os; 15 | os << raw; 16 | string s = os.str(); 17 | while(s[0] <= '9') 18 | s.erase(s.begin()); 19 | return s; 20 | } 21 | 22 | inline std::string pretty_type(struct prod *p) 23 | { 24 | return pretty_type(typeid(*p).name()); 25 | } 26 | 27 | #endif 28 | --------------------------------------------------------------------------------