├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bootstrap ├── docs ├── application │ └── 4001.md ├── logical │ ├── 1001.md │ ├── 1002.md │ ├── 1003.md │ ├── 1004.md │ ├── 1005.md │ ├── 1006.md │ └── 1007.md ├── physical │ ├── 2001.md │ ├── 2002.md │ ├── 2003.md │ ├── 2004.md │ └── 2005.md └── query │ ├── 3001.md │ ├── 3002.md │ ├── 3003.md │ ├── 3004.md │ ├── 3005.md │ ├── 3006.md │ ├── 3007.md │ ├── 3008.md │ ├── 3009.md │ ├── 3010.md │ ├── 3011.md │ ├── 3012.md │ ├── 3013.md │ ├── 3014.md │ ├── 3015.md │ └── 3016.md ├── examples ├── auctionmark-ddl.sql ├── query4.sql ├── si-ddl.sql └── top_mutexes.sql ├── script └── packages.sh ├── src ├── CMakeLists.txt ├── checker.cpp ├── configuration.cpp ├── include │ ├── checker.h │ ├── color.h │ ├── configuration.h │ └── list.h ├── list.cpp └── main.cpp └── test ├── CMakeLists.txt └── test_suite.cpp /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - master 8 | pull_request: 9 | branches: 10 | - master 11 | 12 | jobs: 13 | build-linux-macos: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: ['ubuntu-latest', 'macos-latest'] 18 | fail-fast: false 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - name: run build 23 | run: | 24 | git submodule update --init --recursive 25 | mkdir build 26 | cd build && cmake -DCMAKE_BUILD_TYPE=RELEASE .. 27 | make 28 | 29 | - name: run tests 30 | run: ./build/test/test_suite 31 | 32 | - name: create release artifacts/packages 33 | run: | 34 | cd build 35 | make package 36 | 37 | - name: upload artifacts 38 | if: github.event_name == 'workflow_dispatch' 39 | uses: actions/upload-artifact@v4 40 | with: 41 | name: release-packages 42 | path: ./build/sqlcheck-x86_64.* 43 | 44 | build-windows: 45 | runs-on: ${{ matrix.os }} 46 | strategy: 47 | matrix: 48 | os: ['windows-latest'] 49 | fail-fast: false 50 | steps: 51 | - uses: actions/checkout@v4 52 | 53 | - name: Add msbuild to PATH 54 | uses: microsoft/setup-msbuild@v1.1 55 | with: 56 | msbuild-architecture: x64 57 | 58 | - name: run build 59 | run: | 60 | git submodule update --init --recursive 61 | mkdir build 62 | cd build 63 | cmake -DCMAKE_BUILD_TYPE=RELEASE .. 64 | msbuild ALL_BUILD.vcxproj /t:Build /p:Configuration=Release 65 | 66 | - name: run tests 67 | run: ./build/test/Release/test_suite.exe 68 | 69 | - name: upload artifacts 70 | if: github.event_name == 'workflow_dispatch' 71 | uses: actions/upload-artifact@v4 72 | with: 73 | name: release-packages 74 | path: ./build/bin/Release/sqlcheck.* 75 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #==============================================================================# 2 | # This file specifies intentionally untracked files that git should ignore. 3 | #==============================================================================# 4 | 5 | #==============================================================================# 6 | # File extensions to be ignored anywhere in the tree. 7 | #==============================================================================# 8 | # Temp files created by most text editors. 9 | *~ 10 | # Merge files created by git. 11 | *.orig 12 | # Byte compiled python modules. 13 | *.pyc 14 | # vim swap files 15 | .*.sw? 16 | .sw? 17 | #OS X specific files. 18 | .DS_store 19 | # Core files 20 | #core 21 | 22 | #==============================================================================# 23 | # Explicit files to ignore (only matches one). 24 | #==============================================================================# 25 | # Various tag programs 26 | /tags 27 | /TAGS 28 | /GPATH 29 | /GRTAGS 30 | /GSYMS 31 | /GTAGS 32 | .gitusers 33 | autom4te.cache 34 | cscope.files 35 | cscope.out 36 | autoconf/aclocal.m4 37 | autoconf/autom4te.cache 38 | /compile_commands.json 39 | 40 | #==============================================================================# 41 | # Directories to ignore (do not add trailing '/'s, they skip symlinks). 42 | #==============================================================================# 43 | # External projects that are tracked independently. 44 | projects/* 45 | !projects/*.* 46 | !projects/Makefile 47 | 48 | 49 | #==============================================================================# 50 | # Autotools artifacts 51 | #==============================================================================# 52 | config/ 53 | configure 54 | config-h.in 55 | autom4te.cache 56 | *Makefile.in 57 | *Makefile 58 | libtool 59 | aclocal.m4 60 | config.log 61 | config.status 62 | stamp-h1 63 | config.h 64 | m4/libtool.m4 65 | m4/ltoptions.m4 66 | m4/ltsugar.m4 67 | m4/ltversion.m4 68 | m4/lt~obsolete.m4 69 | 70 | #==============================================================================# 71 | # Build artifacts 72 | #==============================================================================# 73 | #m4/ 74 | build/ 75 | #*.m4 76 | *.o 77 | *.lo 78 | *.la 79 | *~ 80 | *.pdf 81 | *.swp 82 | a.out 83 | 84 | #==============================================================================# 85 | # Kate Swap Files 86 | #==============================================================================# 87 | *.kate-swp 88 | .#kate-* 89 | 90 | #==============================================================================# 91 | # Backup artifacts 92 | #==============================================================================# 93 | ~* 94 | *~ 95 | tmp/ 96 | 97 | #==============================================================================# 98 | # Doxygen artifacts 99 | #==============================================================================# 100 | doc/ 101 | 102 | #==============================================================================# 103 | # KDevelop files 104 | #==============================================================================# 105 | .kdev4 106 | *.kdev4 107 | .dirstamp 108 | .deps 109 | .libs 110 | 111 | #==============================================================================# 112 | # Eclipse files 113 | #==============================================================================# 114 | .wtpmodules 115 | .classpath 116 | .project 117 | .cproject 118 | .pydevproject 119 | .settings 120 | .autotools 121 | 122 | /Debug/ 123 | /misc/ 124 | 125 | #==============================================================================# 126 | # Intellij files 127 | #==============================================================================# 128 | .idea 129 | *.iml 130 | 131 | #==============================================================================# 132 | # Code Coverage files 133 | #==============================================================================# 134 | *.gcno 135 | *.gcda 136 | 137 | #==============================================================================# 138 | # Scripts 139 | #==============================================================================# 140 | *.jar 141 | scripts/PelotonTest/out 142 | scripts/PelotonTest/lib 143 | 144 | #==============================================================================# 145 | # Protobuf 146 | #==============================================================================# 147 | *.pb-c.c 148 | *.pb-c.h 149 | *.pb.cc 150 | *.pb.h 151 | *.pb.go 152 | 153 | #==============================================================================# 154 | # Third party 155 | #==============================================================================# 156 | third_party/nanomsg/ 157 | third_party/nvml/ 158 | third_party/logcabin/ 159 | 160 | #==============================================================================# 161 | # Eclipse 162 | #==============================================================================# 163 | 164 | .metadata 165 | bin/ 166 | tmp/ 167 | *.tmp 168 | *.bak 169 | *.swp 170 | *~.nib 171 | local.properties 172 | .settings/ 173 | .loadpath 174 | .recommenders 175 | 176 | # Eclipse Core 177 | .project 178 | 179 | # External tool builders 180 | .externalToolBuilders/ 181 | 182 | # Locally stored "Eclipse launch configurations" 183 | *.launch 184 | 185 | # PyDev specific (Python IDE for Eclipse) 186 | *.pydevproject 187 | 188 | # CDT-specific (C/C++ Development Tooling) 189 | .cproject 190 | 191 | # JDT-specific (Eclipse Java Development Tools) 192 | .classpath 193 | 194 | # Java annotation processor (APT) 195 | .factorypath 196 | 197 | # PDT-specific (PHP Development Tools) 198 | .buildpath 199 | 200 | # sbteclipse plugin 201 | .target 202 | 203 | # Tern plugin 204 | .tern-project 205 | 206 | # TeXlipse plugin 207 | .texlipse 208 | 209 | # STS (Spring Tool Suite) 210 | .springBeans 211 | 212 | # Code Recommenders 213 | .recommenders/ 214 | CMakeLists.txt 215 | io_file 216 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/googletest"] 2 | path = external/googletest 3 | url = https://github.com/google/googletest.git 4 | [submodule "external/gflags"] 5 | path = external/gflags 6 | url = https://github.com/gflags/gflags.git 7 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | 3 | #Preamble 4 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR) 5 | project (sqlcheck) 6 | 7 | enable_language(C) 8 | enable_language(CXX) 9 | 10 | # ---[ C++ Flags 11 | if(CMAKE_CXX_COMPILER_ID MATCHES GNU) 12 | set(CMAKE_CXX_FLAGS "-Wall -Wno-unknown-pragmas -Wno-sign-compare -Woverloaded-virtual -Wno-write-strings -Wno-error=maybe-uninitialized") 13 | set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g3") 14 | set(CMAKE_CXX_FLAGS_RELEASE "-O3") 15 | endif() 16 | 17 | IF(CMAKE_BUILD_TYPE MATCHES COVERAGE) 18 | message("Coverage mode") 19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") 20 | ENDIF(CMAKE_BUILD_TYPE MATCHES COVERAGE) 21 | 22 | # ---[ CTest 23 | include(CTest) 24 | 25 | #Environment detection 26 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/") 27 | 28 | # Compiler version 29 | # Compiler-specific C++11 activation. 30 | if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") 31 | execute_process( 32 | COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) 33 | if (NOT (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9)) 34 | message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.9 or greater.") 35 | endif () 36 | elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") 37 | execute_process( 38 | COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE CLANG_VERSION) 39 | else() 40 | message(WARNING "You are using an unsupported compiler! Compilation has only been tested with GCC.") 41 | endif () 42 | 43 | # ---[ C++11 Flags 44 | include(CheckCXXCompilerFlag) 45 | CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) 46 | if(COMPILER_SUPPORTS_CXX11) 47 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 48 | else() 49 | message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.") 50 | endif() 51 | 52 | # -- [ Debug Flags 53 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb") 54 | 55 | # -- [ GTest 56 | enable_testing() 57 | set(GOOGLETEST_ROOT external/googletest/googletest CACHE STRING "Google Test source root") 58 | include_directories( 59 | ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT} 60 | ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}/include 61 | ) 62 | set(GOOGLETEST_SOURCES 63 | ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}/src/gtest-all.cc 64 | ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}/src/gtest_main.cc 65 | ) 66 | foreach(_source ${GOOGLETEST_SOURCES}) 67 | set_source_files_properties(${_source} PROPERTIES GENERATED 1) 68 | endforeach() 69 | 70 | add_library(googletest ${GOOGLETEST_SOURCES}) 71 | 72 | # -- [ GFlags 73 | 74 | set(GFLAGS_ROOT external/gflags CACHE STRING "Google Flags") 75 | 76 | add_subdirectory(${PROJECT_SOURCE_DIR}/${GFLAGS_ROOT}) 77 | 78 | include_directories( 79 | ${PROJECT_BINARY_DIR}/${GFLAGS_ROOT}/include 80 | ) 81 | 82 | # --[ Threads 83 | 84 | find_package(Threads REQUIRED) 85 | 86 | 87 | # --[ Flags 88 | if(UNIX OR APPLE) 89 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall -Wextra -Werror -Wno-writable-strings") 90 | endif() 91 | 92 | # -- [Info 93 | 94 | # Choose the type of build. Example: SET(CMAKE_BUILD_TYPE Debug) 95 | MESSAGE( STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} ) 96 | # the compiler used for C++ files 97 | MESSAGE( STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER} ) 98 | # the compiler flags for compiling C++ sources 99 | MESSAGE( STATUS "CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS} ) 100 | 101 | 102 | # --[ Packaging 103 | 104 | SET(VERSION "1.3") 105 | SET(CPACK_PACKAGE_NAME "sqlcheck") 106 | SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Automatically identify anti-patterns in SQL queries.") 107 | SET(CPACK_PACKAGE_DESCRIPTION "SQLCheck automates the detection of common anti-patterns in SQL queries. 108 | Such anti-patterns often increase the time taken to run the queries on a database management system. 109 | Addressing these anti-patterns will, therefore, help improve the runtime performance of the queries. 110 | This tool targets all SQL dialects.") 111 | SET(CPACK_PACKAGE_VERSION ${VERSION}) 112 | SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "Joy Arulraj") #required 113 | 114 | set(CPACK_PACKAGE_EXECUTABLES "sqlcheck") 115 | set(CPACK_MONOLITHIC_INSTALL TRUE) 116 | if (WIN32) 117 | # NSIS Setup 118 | SET(CPACK_GENERATOR "NSIS") 119 | SET(CPACK_NSIS_MODIFY_PATH TRUE) 120 | SET(CPACK_NSIS_EXECUTABLES_DIRECTORY "${DISPLAZ_BIN_DIR}") 121 | SET(CPACK_NSIS_ENABLE_UNINSTALL_BEFORE_INSTALL TRUE) 122 | elseif(APPLE) 123 | # OSX Bundle generator setup 124 | SET(CPACK_GENERATOR "DragNDrop") 125 | SET(CPACK_BUNDLE_NAME "sqlcheck") 126 | elseif(UNIX) 127 | SET(CPACK_GENERATOR "TGZ;RPM;DEB;ZIP") 128 | endif() 129 | SET(CPACK_STRIP_FILES TRUE) 130 | 131 | SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CMAKE_SYSTEM_PROCESSOR}") 132 | 133 | INCLUDE(CPack) 134 | 135 | # ---[ Subdirectories 136 | add_subdirectory(src) 137 | add_subdirectory(test) 138 | 139 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![GitHub license](https://img.shields.io/badge/license-apache-blue.svg?style=flat)](https://www.apache.org/licenses/LICENSE-2.0) 3 | [![Version](https://img.shields.io/badge/version-1.3-red.svg)](https://github.com/jarulraj/sqlcheck/) 4 | [![main](https://github.com/jarulraj/sqlcheck/actions/workflows/main.yml/badge.svg)](https://github.com/jarulraj/sqlcheck/actions/workflows/main.yml) 5 | [![Coverage Status](https://coveralls.io/repos/github/jarulraj/sqlcheck/badge.svg?branch=master)](https://coveralls.io/github/jarulraj/sqlcheck?branch=master) 6 | 7 | ## What Is SQLCheck? 8 | 9 | `sqlcheck` automatically detects common SQL anti-patterns. Such anti-patterns often slow down queries. Addressing them will, therefore, help accelerate queries. 10 | 11 | `sqlcheck` targets **all major SQL dialects**. 12 | 13 | For development updates on `sqlcheck` and general news on next generation database systems, follow me at @joy_arulraj. 14 | 15 | ## What it can do 16 | 17 | Right now SQLCheck can detect the following anti-patterns: 18 | 19 | ### Logical Database Design Anti-Patterns 20 | 21 | * [Multi-Valued Attribute](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1001.md) 22 | * [Recursive Dependency](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1002.md) 23 | * [Primary Key Does Not Exist](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1003.md) 24 | * [Generic Primary Key](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1004.md) 25 | * [Foreign Key Does Not Exist](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1005.md) 26 | * [Entity-Attribute-Value Pattern](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1006.md) 27 | * [Metadata Tribbles](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1007.md) 28 | 29 | ### Physical Database Design Anti-Patterns 30 | 31 | * [Imprecise Data Type](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2001.md) 32 | * [Values In Definition](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2002.md) 33 | * [Files Are Not SQL Data Types](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2003.md) 34 | * [Too Many Indexes](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2004.md) 35 | * [Index Attribute Order](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2005.md) 36 | 37 | ### Query Anti-Patterns 38 | 39 | * [SELECT *](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3001.md) 40 | * [NULL Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3002.md) 41 | * [NOT NULL Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3003.md) 42 | * [String Concatenation](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3004.md) 43 | * [GROUP BY Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3005.md) 44 | * [ORDER BY RAND Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3006.md) 45 | * [Pattern Matching Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3007.md) 46 | * [Spaghetti Query Alert](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3008.md) 47 | * [Reduce Number of JOINs](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3009.md) 48 | * [Eliminate Unnecessary DISTINCT Conditions](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3010.md) 49 | * [Implicit Column Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3011.md) 50 | * [HAVING Clause Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3012.md) 51 | * [Nested sub queries](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3013.md) 52 | * [OR Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3014.md) 53 | * [UNION Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3015.md) 54 | * [DISTINCT & JOIN Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3016.md) 55 | 56 | ### Application Development Anti-Patterns 57 | 58 | * [Readable Passwords](https://github.com/jarulraj/sqlcheck/blob/master/docs/application/4001.md) 59 | 60 | ## Installation 61 | 62 | ###UBUNTU/MINT (DEBIAN) 63 | 64 | 1. Download the **debian package** from the latest release. 65 | ```shell 66 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.deb 67 | ``` 68 | 69 | 2. Install it by running the following command. 70 | ```shell 71 | dpkg -i sqlcheck-x86_64.deb 72 | ``` 73 | 74 | ### FEDORA/CENTOS (RPM) 75 | 76 | 1. Download the **rpm package** from the latest release. 77 | ```shell 78 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.rpm 79 | ``` 80 | 81 | 2. Install it by running the following command. 82 | ```shell 83 | yum --nogpgcheck localinstall sqlcheck-x86_64.rpm 84 | ``` 85 | 86 | ### MAC (DMG) 87 | 88 | 1. Download the **dmg package** from the latest release. 89 | ```shell 90 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.dmg 91 | ``` 92 | 93 | 2. Click the dmg to mount the package. This will mount it in the `Volumes` directory. 94 | 95 | 3. Open the `Terminal` app. [This page](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line) contains more details on finding the app. 96 | 97 | 4. Copy over the SQLCheck binary file to desired local directory. 98 | ```shell 99 | cp /Volumes/sqlcheck-x86_64/bin/sqlcheck /usr/local/bin/ 100 | ``` 101 | 102 | 103 | ### WINDOWS (EXE) 104 | 105 | 1. Download the **exe file** from the latest release. 106 | ```shell 107 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck.exe 108 | ``` 109 | 110 | 2. Open a terminal (like `cmd.exe`) and run the executable. 111 | 112 | ### ZIP 113 | 114 | 1. Download the **zip package** from the latest release. 115 | ```shell 116 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.zip 117 | ``` 118 | 119 | 2. Unzip it and find the SQLCheck binary (`bin/sqlcheck`). 120 | ```shell 121 | unzip sqlcheck-x86_64.zip 122 | ``` 123 | 124 | ## SOURCE CODE 125 | 126 | SQLCheck has the following software dependencies: 127 | 128 | - **g++ 4.9+** 129 | - **cmake** ([Cmake installation guide](https://cmake.org/install/)) 130 | 131 | First, clone the repository (with **--recursive** option). 132 | 133 | ```shell 134 | git clone --recursive https://github.com/jarulraj/sqlcheck.git 135 | ``` 136 | 137 | Next, run the following commands to build and install SQLCheck: 138 | 139 | ```shell 140 | ./bootstrap 141 | cd build 142 | cmake -DCMAKE_BUILD_TYPE=RELEASE .. 143 | make 144 | make install 145 | ``` 146 | 147 | 148 | ### TESTS 149 | 150 | To run SQLCheck unit tests during development, run: 151 | 152 | ```shell 153 | ./build/test/test_suite 154 | ``` 155 | 156 | ## Usage 157 | 158 | ``` 159 | $ sqlcheck -h 160 | 161 | Command line options : sqlcheck 162 | -f --file_name : file name 163 | -r --risk_level : set of anti-patterns to check 164 | : 1 (all anti-patterns, default) 165 | : 2 (only medium and high risk anti-patterns) 166 | : 3 (only high risk anti-patterns) 167 | -c --color_mode : color mode 168 | -v --verbose_mode : verbose mode 169 | ``` 170 | 171 | ```sql 172 | $ sqlcheck -f examples/top_mutexes.sql -v 173 | 174 | ------------------------------------------------- 175 | > RISK LEVEL :: ALL ANTI-PATTERNS 176 | > SQL FILE NAME :: examples/top_mutexes.sql 177 | ------------------------------------------------- 178 | ==================== Results =================== 179 | 180 | ------------------------------------------------- 181 | SQL Statement: with top_mutexes as ( select--+ leading(t1 s1 v1 v2 t2 s2) use_hash(s1) 182 | use_nl(v1) use_hash(s2) materialize t1.hsecs ,s1.* ,s2.sleeps as end_sleeps 183 | ,s2.wait_time as end_wait_time ,s2.sleeps-s1.sleeps as delta_sleeps ,t2.hsecs - 184 | t1.hsecs as delta_hsecs --,s2.* from v$timer t1 ,v$mutex_sleep s1 ,(select/*+ 185 | no_merge */ sum(level) a from dual connect by level<=1e6) v1 ,v$timer t2 186 | ,v$mutex_sleep s2 where s1.mutex_type=s2.mutex_type and s1.location=s2.location 187 | ) select * from top_mutexes order by delta_sleeps desc; 188 | ``` 189 | 190 | ``` 191 | [examples/top_mutexes.sql]: (HIGH RISK) (QUERY ANTI-PATTERN) SELECT * 192 | 193 | ● Inefficiency in moving data to the consumer: 194 | 195 | When you SELECT *, you're often retrieving more columns from the database than 196 | your application really needs to function. This causes more data to move from 197 | the database server to the client, slowing access and increasing load on your 198 | machines, as well as taking more time to travel across the network. This is 199 | especially true when someone adds new columns to underlying tables that didn't 200 | exist and weren't needed when the original consumers coded their data access. 201 | 202 | 203 | ● Indexing issues: 204 | 205 | Consider a scenario where you want to tune a query to a high level of 206 | performance. If you were to use *, and it returned more columns than you 207 | actually needed, the server would often have to perform more expensive methods 208 | to retrieve your data than it otherwise might. For example, you wouldn't be able 209 | to create an index which simply covered the columns in your SELECT list, and 210 | even if you did (including all columns [shudder]), the next developer who came around 211 | and added a column to the underlying table would cause the optimizer to ignore 212 | your optimized covering index, and you'd likely find that the performance of 213 | your query would drop substantially for no readily apparent reason. 214 | [Matching Expression: select *] 215 | 216 | [examples/top_mutexes.sql]: (LOW RISK) (QUERY ANTI-PATTERN) Spaghetti Query Alert 217 | 218 | ● Split up a complex spaghetti query into several simpler queries: 219 | 220 | SQL is a very expressive language—you can accomplish a lot in a single query 221 | or statement. But that doesn't mean it's mandatory or even a good idea to 222 | approach every task with the assumption it has to be done in one line of code. 223 | One common unintended consequence of producing all your results in one query is 224 | a Cartesian product. This happens when two of the tables in the query have no 225 | condition restricting their relationship. Without such a restriction, the join 226 | of two tables pairs each row in the first table to every row in the other table. 227 | Each such pairing becomes a row of the result set, and you end up with many more 228 | rows than you expect. 229 | 230 | It's important to consider that these queries are simply 231 | hard to write, hard to modify, and hard to debug. You should expect to get 232 | regular requests for incremental enhancements to your database applications. 233 | Managers want more complex reports and more fields in a user interface. If you 234 | design intricate, monolithic SQL queries, it's more costly and time-consuming to 235 | make enhancements to them. Your time is worth something, both to you and to your 236 | project. Split up a complex spaghetti query into several simpler queries. When 237 | you split up a complex SQL query, the result may be many similar queries, 238 | perhaps varying slightly depending on data values. Writing these queries is a 239 | chore, so it's a good application of SQL code generation. Although SQL makes it 240 | seem possible to solve a complex problem in a single line of code, don't be 241 | tempted to build a house of cards. 242 | 243 | ==================== Summary =================== 244 | All Anti-Patterns :: 2 245 | > High Risk :: 1 246 | > Medium Risk :: 0 247 | > Low Risk :: 1 248 | 249 | ``` 250 | 251 | ## References 252 | 253 | (1) SQL Anti-patterns: Avoiding the Pitfalls of Database Programming, Bill Karwin 254 | (2) [Common SQL Anti-patterns](https://stackoverflow.com/questions/346659/what-are-the-most-common-sql-anti-patterns), StackOverflow 255 | 256 | ## Contributions 257 | 258 | Contributions to SQLCheck are always welcome. You can contribute in different ways: 259 | * Open an issue with suggestions for improvements and errors you're facing; 260 | * Fork this repository and submit a pull request; 261 | * Improve the documentation. 262 | 263 | ## License 264 | 265 | Licensed under the [Apache License](LICENSE). 266 | -------------------------------------------------------------------------------- /bootstrap: -------------------------------------------------------------------------------- 1 | #! /bin/sh -e 2 | # bootstrap -- Use this script to create generated files from the CVS dist 3 | 4 | if [ ! -d "build" ] ; then 5 | mkdir build 6 | fi 7 | 8 | -------------------------------------------------------------------------------- /docs/application/4001.md: -------------------------------------------------------------------------------- 1 | # Readable Passwords 2 | 3 | ## Do not store readable passwords: 4 | It's not secure to store a password in clear text or even to pass it over the 5 | network in the clear. If an attacker can read the SQL statement you use to 6 | insert a password, they can see the password plainly. 7 | Additionally, interpolating the user's input string into the SQL query in plain text 8 | exposes it to discovery by an attacker. 9 | If you can read passwords, so can a hacker. 10 | The solution is to encode the password using a one-way cryptographic hash 11 | function. This function transforms its input string into a new string, 12 | called the hash, that is unrecognizable. 13 | Use a salt to thwart dictionary attacks. Don't put the plain-text password 14 | into the SQL query. Instead, compute the hash in your application code, 15 | and use only the hash in the SQL query. -------------------------------------------------------------------------------- /docs/logical/1001.md: -------------------------------------------------------------------------------- 1 | # Multi-Valued Attribute 2 | 3 | ## **Store each value in its own column and row:** 4 | Storing a list of IDs as a VARCHAR/TEXT column can cause performance and data integrity 5 | problems. Querying against such a column would require using pattern-matching 6 | expressions. It is awkward and costly to join a comma-separated list to matching rows. 7 | This will make it harder to validate IDs. Think about what is the greatest number of 8 | entries this list must support? Instead of using a multi-valued attribute, 9 | consider storing it in a separate table, so that each individual value of that attribute 10 | occupies a separate row. Such an intersection table implements a many-to-many relationship 11 | between the two referenced tables. This will greatly simplify querying and validating 12 | the IDs. -------------------------------------------------------------------------------- /docs/logical/1002.md: -------------------------------------------------------------------------------- 1 | # Recursive Dependency 2 | 3 | ## **Avoid recursive relationships:** 4 | It's common for data to have recursive relationships. Data may be organized in a 5 | treelike or hierarchical way. However, creating a foreign key constraint to enforce 6 | the relationship between two columns in the same table lends to awkward querying. 7 | Each level of the tree corresponds to another join. You will need to issue recursive 8 | queries to get all descendants or all ancestors of a node. 9 | A solution is to construct an additional closure table. It involves storing all paths 10 | through the tree, not just those with a direct parent-child relationship. 11 | You might want to compare different hierarchical data designs -- closure table, 12 | path enumeration, nested sets -- and pick one based on your application's needs. -------------------------------------------------------------------------------- /docs/logical/1003.md: -------------------------------------------------------------------------------- 1 | # Primary Key Does Not Exist 2 | 3 | ## **Consider adding a primary key:** 4 | A primary key constraint is important when you need to do the following: 5 | prevent a table from containing duplicate rows, 6 | reference individual rows in queries, and 7 | support foreign key references 8 | If you don't use primary key constraints, you create a chore for yourself: 9 | checking for duplicate rows. More often than not, you will need to define 10 | a primary key for every table. Use compound keys when they are appropriate. 11 | -------------------------------------------------------------------------------- /docs/logical/1004.md: -------------------------------------------------------------------------------- 1 | # Generic Primary Key 2 | 3 | ## **Skip using a generic primary key (id):** 4 | Adding an id column to every table causes several effects that make its 5 | use seem arbitrary. You might end up creating a redundant key or allow 6 | duplicate rows if you add this column in a compound key. 7 | The name id is so generic that it holds no meaning. This is especially 8 | important when you join two tables and they have the same primary 9 | key column name. -------------------------------------------------------------------------------- /docs/logical/1005.md: -------------------------------------------------------------------------------- 1 | # Foreign Key Does Not Exist 2 | 3 | ## **Consider adding a foreign key:** 4 | Are you leaving out the application constraints? Even though it seems at 5 | first that skipping foreign key constraints makes your database design 6 | simpler, more flexible, or speedier, you pay for this in other ways. 7 | It becomes your responsibility to write code to ensure referential integrity 8 | manually. Use foreign key constraints to enforce referential integrity. 9 | Foreign keys have another feature you can't mimic using application code: 10 | cascading updates to multiple tables. This feature allows you to 11 | update or delete the parent row and lets the database takes care of any child 12 | rows that reference it. The way you declare the ON UPDATE or ON DELETE clauses 13 | in the foreign key constraint allow you to control the result of a cascading 14 | operation. Make your database mistake-proof with constraints. 15 | -------------------------------------------------------------------------------- /docs/logical/1006.md: -------------------------------------------------------------------------------- 1 | # Entity-Attribute-Value Pattern 2 | 3 | 4 | ## **Dynamic schema with variable attributes:** 5 | Are you trying to create a schema where you can define new attributes 6 | at runtime.? This involves storing attributes as rows in an attribute table. 7 | This is referred to as the Entity-Attribute-Value or schemaless pattern. 8 | When you use this pattern, you sacrifice many advantages that a conventional 9 | database design would have given you. You can't make mandatory attributes. 10 | You can't enforce referential integrity. You might find that attributes are 11 | not being named consistently. A solution is to store all related types in one table, 12 | with distinct columns for every attribute that exists in any type 13 | (Single Table Inheritance). Use one attribute to define the subtype of a given row. 14 | Many attributes are subtype-specific, and these columns must 15 | be given a null value on any row storing an object for which the attribute 16 | does not apply the columns with non-null values become sparse. 17 | Another solution is to create a separate table for each subtype 18 | (Concrete Table Inheritance). A third solution mimics inheritance, 19 | as though tables were object-oriented classes (Class Table Inheritance). 20 | Create a single table for the base type, containing attributes common to 21 | all subtypes. Then for each subtype, create another table, with a primary key 22 | that also serves as a foreign key to the base table. 23 | If you have many subtypes or if you must support new attributes frequently, 24 | you can add a BLOB column to store data in a format such as XML or JSON, 25 | which encodes both the attribute names and their values. 26 | This design is best when you can't limit yourself to a finite set of subtypes 27 | and when you need complete flexibility to define new attributes at any time. -------------------------------------------------------------------------------- /docs/logical/1007.md: -------------------------------------------------------------------------------- 1 | # Metadata Tribbles 2 | 3 | ## **Breaking down a table or column by year/user/etc.:** 4 | You might be trying to split a single column into multiple columns, 5 | using column names based on distinct values in another attribute. 6 | For each year or user, you will need to add one more column or table. 7 | You are mixing metadata with data. You will now need to make sure that 8 | the primary key values are unique across all the split columns or tables. 9 | The solution is to use a feature called sharding or horizontal partitioning. 10 | ```(PARTITION BY HASH ( YEAR(...) )```. With this feature, you can gain the 11 | benefits of splitting a large table without the drawbacks. 12 | Partitioning is not defined in the SQL standard, so each brand of database 13 | implements it in their own nonstandard way. 14 | Another remedy for metadata tribbles is to create a dependent table. 15 | Instead of one row per entity with multiple columns for each year, 16 | use multiple rows. Don't let data spawn metadata. 17 | 18 | ## **Store each value with the same meaning in a single column:** 19 | Creating multiple columns in a table with the same prefix 20 | indicates that you are trying to store a multivalued attribute. 21 | This design makes it hard to add or remove values, 22 | to ensure the uniqueness of values, and handling growing sets of values. 23 | The best solution is to create a dependent table with one column for the 24 | multivalued attribute. Store the multiple values in multiple rows instead of 25 | multiple columns and define a foreign key in the dependent table to associate 26 | the values to its parent row. -------------------------------------------------------------------------------- /docs/physical/2001.md: -------------------------------------------------------------------------------- 1 | # Imprecise Data Type 2 | 3 | ## Use precise data types: 4 | Virtually any use of FLOAT, REAL, or DOUBLE PRECISION data types is suspect. 5 | Most applications that use floating-point numbers don't require the range of 6 | values supported by IEEE 754 formats. The cumulative impact of inexact 7 | floating-point numbers is severe when calculating aggregates. 8 | Instead of FLOAT or its siblings, use the NUMERIC or DECIMAL SQL data types 9 | for fixed-precision fractional numbers. These data types store numeric values 10 | exactly, up to the precision you specify in the column definition. 11 | Do not use FLOAT if you can avoid it. -------------------------------------------------------------------------------- /docs/physical/2002.md: -------------------------------------------------------------------------------- 1 | # Values In Definition 2 | 3 | ## Don't specify values in column definition: 4 | With enum, you declare the values as strings, 5 | but internally the column is stored as the ordinal number of the string 6 | in the enumerated list. The storage is therefore compact, but when you 7 | sort a query by this column, the result is ordered by the ordinal value, 8 | not alphabetically by the string value. You may not expect this behavior. 9 | Not all databases allow you to add or remove a value from an ENUM or check 10 | constraint; you can only redefine the column with a new set of values. 11 | Moreover, if you make a value obsolete, you could upset historical data. 12 | As a matter of policy, changing metadata — that is, changing the definition 13 | of tables and columns—should be infrequent and with attention to testing and 14 | quality assurance. There's a better solution to restrict values in a column: 15 | create a lookup table with one row for each value you allow. 16 | Then declare a foreign key constraint on the old table referencing 17 | the new table. 18 | Use metadata when validating against a fixed set of values. 19 | Use data when validating against a fluid set of values. 20 | -------------------------------------------------------------------------------- /docs/physical/2003.md: -------------------------------------------------------------------------------- 1 | # Files Are Not SQL Data Types 2 | 3 | ## Resources outside the database are not managed by the database: 4 | It's common for programmers to be unequivocal that we should always 5 | store files external to the database. 6 | Files don't obey DELETE, transaction isolation, rollback, or work well with 7 | database backup tools. They do not obey SQL access privileges and are not SQL 8 | data types. 9 | Resources outside the database are not managed by the database. 10 | You should consider storing blobs inside the database instead of in 11 | external files. You can save the contents of a BLOB column to a file. -------------------------------------------------------------------------------- /docs/physical/2004.md: -------------------------------------------------------------------------------- 1 | # Too Many Indexes 2 | 3 | ## Don't create too many indexes: 4 | You benefit from an index only if you run queries that use that index. 5 | There's no benefit to creating indexes that you don't use. 6 | If you cover a database table with indexes, you incur a lot of overhead 7 | with no assurance of payoff. 8 | Consider dropping unnecessary indexes. 9 | If an index provides all the columns we need, then we don't need to read 10 | rows of data from the table at all. Consider using such covering indexes. 11 | Know your data, know your queries, and maintain the right set of indexes. 12 | -------------------------------------------------------------------------------- /docs/physical/2005.md: -------------------------------------------------------------------------------- 1 | # Index Attribute Order 2 | 3 | ## Align the index attribute order with queries: 4 | If you create a compound index for the columns, make sure that the query 5 | attributes are in the same order as the index attributes, so that the DBMS 6 | can use the index while processing the query. 7 | If the query and index attribute orders are not aligned, then the DBMS might 8 | be unable to use the index during query processing. 9 | 10 | ### Example 11 | 12 | ``` 13 | CREATE INDEX TelephoneBook ON Accounts(last_name, first_name) 14 | SELECT * FROM Accounts ORDER BY first_name, last_name 15 | ``` 16 | -------------------------------------------------------------------------------- /docs/query/3001.md: -------------------------------------------------------------------------------- 1 | # SELECT * 2 | 3 | ## Inefficiency in moving data to the consumer: 4 | When you SELECT *, you're often retrieving more columns from the database than 5 | your application really needs to function. This causes more data to move from 6 | the database server to the client, slowing access and increasing load on your 7 | machines, as well as taking more time to travel across the network. This is 8 | especially true when someone adds new columns to underlying tables that didn't 9 | exist and weren't needed when the original consumers coded their data access. 10 | 11 | ## Indexing issues: 12 | Consider a scenario where you want to tune a query to a high level of performance. 13 | If you were to use *, and it returned more columns than you actually needed, 14 | the server would often have to perform more expensive methods to retrieve your 15 | data than it otherwise might. For example, you wouldn't be able to create an index 16 | which simply covered the columns in your SELECT list, and even if you did 17 | (including all columns [shudder]), the next developer who came around and added a column 18 | to the underlying table would cause the optimizer to ignore your optimized covering 19 | index, and you'd likely find that the performance of your query would drop 20 | substantially for no readily apparent reason. 21 | 22 | ## Binding Problems: 23 | When you SELECT *, it's possible to retrieve two columns of the same name from two 24 | different tables. This can often crash your data consumer. Imagine a query that joins 25 | two tables, both of which contain a column called \ID\. How would a consumer know 26 | which was which? SELECT * can also confuse views (at least in some versions SQL Server) 27 | when underlying table structures change -- the view is not rebuilt, and the data which 28 | comes back can be nonsense. And the worst part of it is that you can take care to name 29 | your columns whatever you want, but the next developer who comes along might have no way of 30 | knowing that he has to worry about adding a column which will collide with your 31 | already-developed names. 32 | -------------------------------------------------------------------------------- /docs/query/3002.md: -------------------------------------------------------------------------------- 1 | # NULL Usage 2 | 3 | 4 | ## Use NULL as a Unique Value: 5 | NULL is not the same as zero. A number ten greater than an unknown is still an unknown. 6 | NULL is not the same as a string of zero length. 7 | Combining any string with NULL in standard SQL returns NULL. 8 | NULL is not the same as false. Boolean expressions with AND, OR, and NOT also produce 9 | results that some people find confusing. 10 | When you declare a column as NOT NULL, it should be because it would make no sense 11 | for the row to exist without a value in that column. 12 | Use null to signify a missing value for any data type. 13 | -------------------------------------------------------------------------------- /docs/query/3003.md: -------------------------------------------------------------------------------- 1 | # NOT NULL Usage 2 | 3 | ## Use NOT NULL only if the column cannot have a missing value: 4 | When you declare a column as NOT NULL, it should be because it would make no sense 5 | for the row to exist without a value in that column. 6 | Use null to signify a missing value for any data type. 7 | -------------------------------------------------------------------------------- /docs/query/3004.md: -------------------------------------------------------------------------------- 1 | # String Concatenation 2 | 3 | ## Use COALESCE for string concatenation of nullable columns: 4 | You may need to force a column or expression to be non-null for the sake of 5 | simplifying the query logic, but you don't want that value to be stored. 6 | Use COALESCE function to construct the concatenated expression so that a 7 | null-valued column doesn't make the whole expression become null. 8 | 9 | ### Example 10 | 11 | ``` 12 | SELECT first_name || COALESCE(' ' || middle_initial || ' ', ' ') || last_name 13 | AS full_name FROM Accounts 14 | ``` -------------------------------------------------------------------------------- /docs/query/3005.md: -------------------------------------------------------------------------------- 1 | # GROUP BY Usage 2 | 3 | ## Do not reference non-grouped columns: 4 | Every column in the select-list of a query must have a single value row 5 | per row group. This is called the Single-Value Rule. 6 | Columns named in the GROUP BY clause are guaranteed to be exactly one value 7 | per group, no matter how many rows the group matches. 8 | Most DBMSs report an error if you try to run any query that tries to return 9 | a column other than those columns named in the GROUP BY clause or as 10 | arguments to aggregate functions. 11 | Every expression in the select list must be contained in either an 12 | aggregate function or the GROUP BY clause. 13 | Follow the single-value rule to avoid ambiguous query results. 14 | -------------------------------------------------------------------------------- /docs/query/3006.md: -------------------------------------------------------------------------------- 1 | # ORDER BY RAND Usage 2 | 3 | ## Sorting by a nondeterministic expression (RAND()) means the sorting cannot benefit from an index: 4 | There is no index containing the values returned by the random function. 5 | That's the point of them being ran- dom: they are different and 6 | unpredictable each time they're selected. This is a problem for the performance 7 | of the query, because using an index is one of the best ways of speeding up 8 | sorting. The consequence of not using an index is that the query result set 9 | has to be sorted by the database using a slow table scan. 10 | One technique that avoids sorting the table is to choose a random value 11 | between 1 and the greatest primary key value. 12 | Still another technique that avoids problems found in the preceding alternatives 13 | is to count the rows in the data set and return a random number between 0 and 14 | the count. Then use this number as an offset when querying the data set. 15 | Some queries just cannot be optimized consider taking a different approach. -------------------------------------------------------------------------------- /docs/query/3007.md: -------------------------------------------------------------------------------- 1 | # Pattern Matching Usage 2 | 3 | ## Avoid using vanilla pattern matching: 4 | The most important disadvantage of pattern-matching operators is that 5 | they have poor performance. A second problem of simple pattern-matching using LIKE 6 | or regular expressions is that it can find unintended matches. 7 | It's best to use a specialized search engine technology like Apache Lucene, instead of SQL. 8 | Another alternative is to reduce the recurring cost of search by saving the result. 9 | Consider using vendor extensions like FULLTEXT INDEX in MySQL. 10 | More broadly, you don't have to use SQL to solve every problem. -------------------------------------------------------------------------------- /docs/query/3008.md: -------------------------------------------------------------------------------- 1 | # Spaghetti Query Alert 2 | 3 | ## Split up a complex spaghetti query into several simpler queries: 4 | SQL is a very expressive language—you can accomplish a lot in a single query or statement. 5 | But that doesn't mean it's mandatory or even a good idea to approach every task with the 6 | assumption it has to be done in one line of code. 7 | 8 | One common unintended consequence of producing all your results in one query is 9 | a Cartesian product. This happens when two of the tables in the query have no condition 10 | restricting their relationship. Without such a restriction, the join of two tables pairs 11 | each row in the first table to every row in the other table. Each such pairing becomes a 12 | row of the result set, and you end up with many more rows than you expect. 13 | 14 | It's important to consider that these queries are simply hard to write, hard to modify, 15 | and hard to debug. You should expect to get regular requests for incremental enhancements 16 | to your database applications. Managers want more complex reports and more fields in a 17 | user interface. If you design intricate, monolithic SQL queries, it's more costly and 18 | time-consuming to make enhancements to them. Your time is worth something, both to you 19 | and to your project. 20 | 21 | Split up a complex spaghetti query into several simpler queries. 22 | When you split up a complex SQL query, the result may be many similar queries, 23 | perhaps varying slightly depending on data values. Writing these queries is a chore, 24 | so it's a good application of SQL code generation. 25 | Although SQL makes it seem possible to solve a complex problem in a single line of code, 26 | don't be tempted to build a house of cards. 27 | -------------------------------------------------------------------------------- /docs/query/3009.md: -------------------------------------------------------------------------------- 1 | # Reduce Number of JOINs 2 | 3 | ## Reduce Number of JOINs: 4 | Too many JOINs is a symptom of complex spaghetti queries. Consider splitting 5 | up the complex query into many simpler queries, and reduce the number of JOINs 6 | -------------------------------------------------------------------------------- /docs/query/3010.md: -------------------------------------------------------------------------------- 1 | # Eliminate Unnecessary DISTINCT Conditions 2 | 3 | ## Eliminate Unnecessary DISTINCT Conditions: 4 | Too many DISTINCT conditions is a symptom of complex spaghetti queries. 5 | Consider splitting up the complex query into many simpler queries, 6 | and reduce the number of DISTINCT conditions 7 | It is possible that the DISTINCT condition has no effect if a primary key 8 | column is part of the result set of columns -------------------------------------------------------------------------------- /docs/query/3011.md: -------------------------------------------------------------------------------- 1 | # Implicit Column Usage 2 | 3 | ## Explicitly name columns: 4 | Although using wildcards and unnamed columns satisfies the goal 5 | of less typing, this habit creates several hazards. 6 | This can break application refactoring and can harm performance. 7 | Always spell out all the columns you need, instead of relying on 8 | wild-cards or implicit column lists. -------------------------------------------------------------------------------- /docs/query/3012.md: -------------------------------------------------------------------------------- 1 | # HAVING Clause Usage 2 | 3 | ## Consider removing the HAVING clause: 4 | Rewriting the query's HAVING clause into a predicate will enable the 5 | use of indexes during query processing. 6 | 7 | ### Example 8 | 9 | ``` 10 | SELECT s.cust_id,count(s.cust_id) FROM SH.sales s GROUP BY s.cust_id 11 | HAVING s.cust_id != '1660' AND s.cust_id != '2' 12 | ``` 13 | can be rewritten as: 14 | ``` 15 | SELECT s.cust_id,count(cust_id) FROM SH.sales s WHERE s.cust_id != '1660' 16 | AND s.cust_id !='2' GROUP BY s.cust_id 17 | ``` -------------------------------------------------------------------------------- /docs/query/3013.md: -------------------------------------------------------------------------------- 1 | # Nested sub queries 2 | 3 | ## Un-nest sub queries: 4 | Rewriting nested queries as joins often leads to more efficient 5 | execution and more effective optimization. In general, sub-query unnesting 6 | is always done for correlated sub-queries with, at most, one table in 7 | the FROM clause, which are used in ANY, ALL, and EXISTS predicates. 8 | A uncorrelated sub-query, or a sub-query with more than one table in 9 | the FROM clause, is flattened if it can be decided, based on the query 10 | semantics, that the sub-query returns at most one row. 11 | 12 | ### Example 13 | 14 | ``` 15 | SELECT * FROM SH.products p WHERE p.prod_id = (SELECT s.prod_id FROM SH.sales 16 | s WHERE s.cust_id = 100996 AND s.quantity_sold = 1 ) 17 | ``` 18 | can be rewritten as: 19 | ``` 20 | SELECT p.* FROM SH.products p, sales s WHERE p.prod_id = s.prod_id AND 21 | s.cust_id = 100996 AND s.quantity_sold = 1 22 | ``` 23 | -------------------------------------------------------------------------------- /docs/query/3014.md: -------------------------------------------------------------------------------- 1 | # OR Usage 2 | 3 | ## Consider using an IN predicate when querying an indexed column: 4 | The IN-list predicate can be exploited for indexed retrieval and also, 5 | the optimizer can sort the IN-list to match the sort sequence of the index, 6 | leading to more efficient retrieval. Note that the IN-list must contain only 7 | constants, or values that are constant during one execution of the query block, 8 | such as outer references. 9 | 10 | ### Example 11 | 12 | ``` 13 | SELECT s.* FROM SH.sales s WHERE s.prod_id = 14 OR s.prod_id = 17 14 | ``` 15 | can be rewritten as: 16 | ``` 17 | SELECT s.* FROM SH.sales s WHERE s.prod_id IN (14, 17) 18 | ``` -------------------------------------------------------------------------------- /docs/query/3015.md: -------------------------------------------------------------------------------- 1 | # UNION Usage 2 | 3 | ## Consider using UNION ALL if you do not care about duplicates: 4 | Unlike UNION which removes duplicates, UNION ALL allows duplicate tuples. 5 | If you do not care about duplicate tuples, then using UNION ALL would be 6 | a faster option. -------------------------------------------------------------------------------- /docs/query/3016.md: -------------------------------------------------------------------------------- 1 | # DISTINCT & JOIN Usage 2 | 3 | ## Consider using a sub-query with EXISTS instead of DISTINCT: 4 | The DISTINCT keyword removes duplicates after sorting the tuples. 5 | Instead, consider using a sub query with the EXISTS keyword, you can avoid 6 | having to return an entire table. 7 | 8 | ### Example 9 | 10 | ``` 11 | SELECT DISTINCT c.country_id, c.country_name FROM SH.countries c, 12 | SH.customers e WHERE e.country_id = c.country_id 13 | ``` 14 | can be rewritten to: 15 | ``` 16 | SELECT c.country_id, c.country_name FROM SH.countries c WHERE EXISTS 17 | (SELECT 'X' FROM SH.customers e WHERE e.country_id = c.country_id) 18 | ``` 19 | -------------------------------------------------------------------------------- /examples/auctionmark-ddl.sql: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * Copyright (C) 2010 by H-Store Project * 3 | * Brown University * 4 | * Massachusetts Institute of Technology * 5 | * Yale University * 6 | * * 7 | * Andy Pavlo (pavlo@cs.brown.edu) * 8 | * http://www.cs.brown.edu/~pavlo/ * 9 | * * 10 | * Visawee Angkanawaraphan (visawee@cs.brown.edu) * 11 | * http://www.cs.brown.edu/~visawee/ * 12 | * * 13 | * Permission is hereby granted, free of charge, to any person obtaining * 14 | * a copy of this software and associated documentation files (the * 15 | * "Software"), to deal in the Software without restriction, including * 16 | * without limitation the rights to use, copy, modify, merge, publish, * 17 | * distribute, sublicense, and/or sell copies of the Software, and to * 18 | * permit persons to whom the Software is furnished to do so, subject to * 19 | * the following conditions: * 20 | * * 21 | * The above copyright notice and this permission notice shall be * 22 | * included in all copies or substantial portions of the Software. * 23 | * * 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * 27 | * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * 28 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * 29 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * 30 | * OTHER DEALINGS IN THE SOFTWARE. * 31 | ***************************************************************************/ 32 | 33 | -- ================================================================ 34 | -- CONFIG_PROFILE 35 | -- ================================================================ 36 | CREATE TABLE CONFIG_PROFILE ( 37 | cfp_scale_factor FLOAT NOT NULL, 38 | cfp_loader_start TIMESTAMP NOT NULL, 39 | cfp_loader_stop TIMESTAMP NOT NULL, 40 | cfp_user_item_histogram VARCHAR(12000) NOT NULL 41 | ); 42 | 43 | -- ================================================================ 44 | -- REGION 45 | -- Represents regions of users 46 | -- r_id Region's ID 47 | -- r_name Region's name 48 | -- ================================================================ 49 | CREATE TABLE REGION ( 50 | r_id BIGINT NOT NULL, 51 | r_name VARCHAR(32), 52 | PRIMARY KEY (r_id) 53 | ); 54 | 55 | -- ================================================================ 56 | -- USERACCT 57 | -- Represents user accounts 58 | -- u_id User ID 59 | -- u_firstname User's first name 60 | -- u_lastname User's last name 61 | -- u_password User's password 62 | -- u_email User's email 63 | -- u_rating User's rating as a seller 64 | -- u_balance User's balance 65 | -- u_created User's create date 66 | -- u_r_id User's region ID 67 | -- ================================================================ 68 | CREATE TABLE USERACCT ( 69 | u_id BIGINT NOT NULL, 70 | u_rating BIGINT NOT NULL, 71 | u_balance FLOAT NOT NULL, 72 | u_comments INTEGER DEFAULT 0, 73 | u_r_id BIGINT NOT NULL REFERENCES REGION (r_id), 74 | u_created TIMESTAMP, 75 | u_updated TIMESTAMP, 76 | u_sattr0 VARCHAR(64), 77 | u_sattr1 VARCHAR(64), 78 | u_sattr2 VARCHAR(64), 79 | u_sattr3 VARCHAR(64), 80 | u_sattr4 VARCHAR(64), 81 | u_sattr5 VARCHAR(64), 82 | u_sattr6 VARCHAR(64), 83 | u_sattr7 VARCHAR(64), 84 | u_iattr0 BIGINT DEFAULT NULL, 85 | u_iattr1 BIGINT DEFAULT NULL, 86 | u_iattr2 BIGINT DEFAULT NULL, 87 | u_iattr3 BIGINT DEFAULT NULL, 88 | u_iattr4 BIGINT DEFAULT NULL, 89 | u_iattr5 BIGINT DEFAULT NULL, 90 | u_iattr6 BIGINT DEFAULT NULL, 91 | u_iattr7 BIGINT DEFAULT NULL, 92 | PRIMARY KEY (u_id) 93 | ); 94 | CREATE INDEX IDX_USERACCT_REGION ON USERACCT (u_id, u_r_id); 95 | 96 | -- ================================================================ 97 | -- USERACCT_ATTRIBUTES 98 | -- Represents user's attributes 99 | -- ================================================================ 100 | CREATE TABLE USERACCT_ATTRIBUTES ( 101 | ua_id BIGINT NOT NULL, 102 | ua_u_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 103 | ua_name VARCHAR(32) NOT NULL, 104 | ua_value VARCHAR(32) NOT NULL, 105 | u_created TIMESTAMP, 106 | PRIMARY KEY (ua_id, ua_u_id) 107 | ); 108 | 109 | -- ================================================================ 110 | -- CATEGORY 111 | -- Represents merchandises' categories. Category can be hierarchical aligned using c_parent_id. 112 | -- c_id Category's ID 113 | -- c_name Category's name 114 | -- c_parent_id Parent category's ID 115 | -- ================================================================ 116 | CREATE TABLE CATEGORY ( 117 | c_id BIGINT NOT NULL, 118 | c_name VARCHAR(50), 119 | c_parent_id BIGINT REFERENCES CATEGORY (c_id), 120 | PRIMARY KEY (c_id) 121 | ); 122 | CREATE INDEX IDX_CATEGORY_PARENT ON CATEGORY (c_parent_id); 123 | 124 | -- ================================================================ 125 | -- GLOBAL_ATTRIBUTE_GROUP 126 | -- Represents merchandises' global attribute groups (for example, brand, material, feature etc.). 127 | -- gag_id Global attribute group's ID 128 | -- gag_c_id Associated Category's ID 129 | -- gag_name Global attribute group's name 130 | -- ================================================================ 131 | CREATE TABLE GLOBAL_ATTRIBUTE_GROUP ( 132 | gag_id BIGINT NOT NULL, 133 | gag_c_id BIGINT NOT NULL REFERENCES CATEGORY (c_id), 134 | gag_name VARCHAR(100) NOT NULL, 135 | PRIMARY KEY (gag_id) 136 | ); 137 | 138 | -- ================================================================ 139 | -- GLOBAL_ATTRIBUTE_VALUE 140 | -- Represents merchandises' global attributes within each attribute 141 | -- groups (for example, Rolex, Casio, Seiko within brand) 142 | -- gav_id Global attribute value's ID 143 | -- gav_gag_id Associated Global attribute group's ID 144 | -- gav_name Global attribute value's name 145 | -- ================================================================ 146 | CREATE TABLE GLOBAL_ATTRIBUTE_VALUE ( 147 | gav_id BIGINT NOT NULL, 148 | gav_gag_id BIGINT NOT NULL REFERENCES GLOBAL_ATTRIBUTE_GROUP (gag_id), 149 | gav_name VARCHAR(100) NOT NULL, 150 | PRIMARY KEY (gav_id, gav_gag_id) 151 | ); 152 | 153 | -- ================================================================ 154 | -- ITEM 155 | -- Represents merchandises 156 | -- i_id Item's ID 157 | -- i_u_id Seller's ID 158 | -- i_c_id Category's ID 159 | -- i_name Item's name 160 | -- i_description Item's description 161 | -- i_initial_price Item's initial price 162 | -- i_reserve_price Item's reserve price 163 | -- i_buy_now Item's buy now price 164 | -- i_nb_of_bids Item's number of bids 165 | -- i_max_bid Item's max bid price 166 | -- i_user_attributes Text field for attributes defined just for this item 167 | -- i_start_date Item's bid start date 168 | -- i_end_date Item's bid end date 169 | -- i_status Items' status (0 = open, 1 = wait for purchase, 2 = close) 170 | -- ================================================================ 171 | CREATE TABLE ITEM ( 172 | i_id BIGINT NOT NULL, 173 | i_u_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 174 | i_c_id BIGINT NOT NULL REFERENCES CATEGORY (c_id), 175 | i_name VARCHAR(100), 176 | i_description VARCHAR(1024), 177 | i_user_attributes VARCHAR(255) DEFAULT NULL, 178 | i_initial_price FLOAT NOT NULL, 179 | i_current_price FLOAT NOT NULL, 180 | i_num_bids BIGINT, 181 | i_num_images BIGINT, 182 | i_num_global_attrs BIGINT, 183 | i_num_comments BIGINT, 184 | i_start_date TIMESTAMP, 185 | i_end_date TIMESTAMP, 186 | i_status INT DEFAULT 0, 187 | i_created TIMESTAMP, 188 | i_updated TIMESTAMP, 189 | i_iattr0 BIGINT DEFAULT NULL, 190 | i_iattr1 BIGINT DEFAULT NULL, 191 | i_iattr2 BIGINT DEFAULT NULL, 192 | i_iattr3 BIGINT DEFAULT NULL, 193 | i_iattr4 BIGINT DEFAULT NULL, 194 | i_iattr5 BIGINT DEFAULT NULL, 195 | i_iattr6 BIGINT DEFAULT NULL, 196 | i_iattr7 BIGINT DEFAULT NULL, 197 | PRIMARY KEY (i_id, i_u_id) 198 | ); 199 | CREATE INDEX IDX_ITEM_SELLER ON ITEM (i_u_id); 200 | 201 | -- ================================================================ 202 | -- ITEM_ATTRIBUTE 203 | -- Represents mappings between attribute values and items 204 | -- ia_id Item attribute's ID 205 | -- ia_i_id Item's ID 206 | -- ia_gav_id Global attribute value's ID 207 | -- ================================================================ 208 | CREATE TABLE ITEM_ATTRIBUTE ( 209 | ia_id BIGINT NOT NULL, 210 | ia_i_id BIGINT NOT NULL, 211 | ia_u_id BIGINT NOT NULL, 212 | ia_gav_id BIGINT NOT NULL, 213 | ia_gag_id BIGINT NOT NULL, 214 | ia_sattr0 VARCHAR(64) DEFAULT NULL, 215 | FOREIGN KEY (ia_i_id, ia_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 216 | FOREIGN KEY (ia_gav_id, ia_gag_id) REFERENCES GLOBAL_ATTRIBUTE_VALUE (gav_id, gav_gag_id), 217 | PRIMARY KEY (ia_id, ia_i_id, ia_u_id) 218 | ); 219 | 220 | -- ================================================================ 221 | -- ITEM_IMAGE 222 | -- Represents images of items 223 | -- ii_id Image's ID 224 | -- ii_i_id Item's ID 225 | -- ii_path Image's path 226 | -- ================================================================ 227 | CREATE TABLE ITEM_IMAGE ( 228 | ii_id BIGINT NOT NULL, 229 | ii_i_id BIGINT NOT NULL, 230 | ii_u_id BIGINT NOT NULL, 231 | ii_sattr0 VARCHAR(128) NOT NULL, 232 | FOREIGN KEY (ii_i_id, ii_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 233 | PRIMARY KEY (ii_id, ii_i_id, ii_u_id) 234 | ); 235 | 236 | -- ================================================================ 237 | -- ITEM_COMMENT 238 | -- Represents comments provided by buyers 239 | -- ic_id Comment's ID 240 | -- ic_i_id Item's ID 241 | -- ic_u_id Buyer's ID 242 | -- ic_date Comment's create date 243 | -- ic_question Comment by buyer 244 | -- ic_response Response from seller 245 | -- ================================================================ 246 | CREATE TABLE ITEM_COMMENT ( 247 | ic_id BIGINT NOT NULL, 248 | ic_i_id BIGINT NOT NULL, 249 | ic_u_id BIGINT NOT NULL, 250 | ic_buyer_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 251 | ic_question VARCHAR(128) NOT NULL, 252 | ic_response VARCHAR(128) DEFAULT NULL, 253 | ic_created TIMESTAMP, 254 | ic_updated TIMESTAMP, 255 | FOREIGN KEY (ic_i_id, ic_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 256 | PRIMARY KEY (ic_id, ic_i_id, ic_u_id) 257 | ); 258 | -- CREATE INDEX IDX_ITEM_COMMENT ON ITEM_COMMENT (ic_i_id, ic_u_id); 259 | 260 | -- ================================================================ 261 | -- ITEM_BID 262 | -- Represents merchandises' bids 263 | -- ib_id Bid's ID 264 | -- ib_i_id Item's ID 265 | -- ib_u_id Buyer's ID 266 | -- ib_type Type of transaction (bid or buy_now) 267 | -- ib_bid Bid's price 268 | -- ib_max_bid ??? 269 | -- ib_date Bid's date 270 | -- ================================================================ 271 | CREATE TABLE ITEM_BID ( 272 | ib_id BIGINT NOT NULL, 273 | ib_i_id BIGINT NOT NULL, 274 | ib_u_id BIGINT NOT NULL, 275 | ib_buyer_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 276 | ib_bid FLOAT NOT NULL, 277 | ib_max_bid FLOAT NOT NULL, 278 | ib_created TIMESTAMP, 279 | ib_updated TIMESTAMP, 280 | FOREIGN KEY (ib_i_id, ib_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 281 | PRIMARY KEY (ib_id, ib_i_id, ib_u_id) 282 | ); 283 | 284 | -- ================================================================ 285 | -- ITEM_MAX_BID 286 | -- Cross-reference table to the current max bid for an auction 287 | -- ================================================================ 288 | CREATE TABLE ITEM_MAX_BID ( 289 | imb_i_id BIGINT NOT NULL, 290 | imb_u_id BIGINT NOT NULL, 291 | imb_ib_id BIGINT NOT NULL, 292 | imb_ib_i_id BIGINT NOT NULL, 293 | imb_ib_u_id BIGINT NOT NULL, 294 | imb_created TIMESTAMP, 295 | imb_updated TIMESTAMP, 296 | FOREIGN KEY (imb_i_id, imb_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 297 | FOREIGN KEY (imb_ib_id, imb_ib_i_id, imb_ib_u_id) REFERENCES ITEM_BID (ib_id, ib_i_id, ib_u_id) ON DELETE CASCADE, 298 | PRIMARY KEY (imb_i_id, imb_u_id) 299 | ); 300 | 301 | -- ================================================================ 302 | -- ITEM_PURCHASE 303 | -- Represents purchase transaction (buy_now bid or win bid) 304 | -- ip_id Purchase's ID 305 | -- ip_ib_id Bid's ID 306 | -- ip_date Purchase's date 307 | -- ================================================================ 308 | CREATE TABLE ITEM_PURCHASE ( 309 | ip_id BIGINT NOT NULL, 310 | ip_ib_id BIGINT NOT NULL, 311 | ip_ib_i_id BIGINT NOT NULL, 312 | ip_ib_u_id BIGINT NOT NULL, 313 | ip_date TIMESTAMP, 314 | FOREIGN KEY (ip_ib_id, ip_ib_i_id, ip_ib_u_id) REFERENCES ITEM_BID (ib_id, ib_i_id, ib_u_id) ON DELETE CASCADE, 315 | PRIMARY KEY (ip_id, ip_ib_id, ip_ib_i_id, ip_ib_u_id) 316 | ); 317 | 318 | -- ================================================================ 319 | -- USERACCT_FEEDBACK 320 | -- Represents feedbacks between buyers and sellers for a transaction 321 | -- uf_id Feedback's ID 322 | -- uf_u_id The user receiving the feedback 323 | -- uf_i_id Item's ID 324 | -- uf_i_u_id Item's seller id 325 | -- uf_from_id The other user writing the feedback 326 | -- uf_date Feedback's create date 327 | -- uf_comment Feedback by other user 328 | -- ================================================================ 329 | CREATE TABLE USERACCT_FEEDBACK ( 330 | uf_u_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 331 | uf_i_id BIGINT NOT NULL, 332 | uf_i_u_id BIGINT NOT NULL, 333 | uf_from_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 334 | uf_rating TINYINT NOT NULL, 335 | uf_date TIMESTAMP, 336 | uf_sattr0 VARCHAR(80) NOT NULL, 337 | FOREIGN KEY (uf_i_id, uf_i_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 338 | PRIMARY KEY (uf_u_id, uf_i_id, uf_i_u_id, uf_from_id), 339 | CHECK (uf_u_id <> uf_from_id) 340 | ); 341 | 342 | -- ================================================================ 343 | -- USERACCT_ITEM 344 | -- The items that a user has recently purchased 345 | -- ================================================================ 346 | CREATE TABLE USERACCT_ITEM ( 347 | ui_u_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 348 | ui_i_id BIGINT NOT NULL, 349 | ui_i_u_id BIGINT NOT NULL, 350 | ui_ip_id BIGINT, 351 | ui_ip_ib_id BIGINT, 352 | ui_ip_ib_i_id BIGINT, 353 | ui_ip_ib_u_id BIGINT, 354 | ui_created TIMESTAMP, 355 | FOREIGN KEY (ui_i_id, ui_i_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 356 | FOREIGN KEY (ui_ip_id, ui_ip_ib_id, ui_ip_ib_i_id, ui_ip_ib_u_id) REFERENCES ITEM_PURCHASE (ip_id, ip_ib_id, ip_ib_i_id, ip_ib_u_id) ON DELETE CASCADE, 357 | PRIMARY KEY (ui_u_id, ui_i_id, ui_i_u_id) 358 | ); 359 | -- CREATE INDEX IDX_USERACCT_ITEM_ID ON USERACCT_ITEM (ui_i_id); 360 | 361 | -- ================================================================ 362 | -- USERACCT_WATCH 363 | -- The items that a user is watching 364 | -- ================================================================ 365 | CREATE TABLE USERACCT_WATCH ( 366 | uw_u_id BIGINT NOT NULL REFERENCES USERACCT (u_id), 367 | uw_i_id BIGINT NOT NULL, 368 | uw_i_u_id BIGINT NOT NULL, 369 | uw_created TIMESTAMP, 370 | FOREIGN KEY (uw_i_id, uw_i_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE, 371 | PRIMARY KEY (uw_u_id, uw_i_id, uw_i_u_id) 372 | ); 373 | -------------------------------------------------------------------------------- /examples/query4.sql: -------------------------------------------------------------------------------- 1 | select 2 | o_orderpriority, 3 | count(*) as order_count 4 | from 5 | orders 6 | where 7 | o_orderdate >= date '1994-08-01' 8 | and o_orderdate < date '1994-08-01' + interval '3' month 9 | and exists ( 10 | select 11 | * 12 | from 13 | lineitem 14 | where 15 | l_orderkey = o_orderkey 16 | and l_commitdate < l_receiptdate 17 | ) 18 | group by 19 | o_orderpriority 20 | order by 21 | o_orderpriority; 22 | -------------------------------------------------------------------------------- /examples/si-ddl.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS SITEST; 2 | CREATE TABLE SITEST ( 3 | id INT PRIMARY KEY, 4 | value INT NOT NULL 5 | ); 6 | -------------------------------------------------------------------------------- /examples/top_mutexes.sql: -------------------------------------------------------------------------------- 1 | with top_mutexes as ( 2 | select--+ leading(t1 s1 v1 v2 t2 s2) use_hash(s1) use_nl(v1) use_hash(s2) materialize 3 | t1.hsecs 4 | ,s1.* 5 | ,s2.sleeps as end_sleeps 6 | ,s2.wait_time as end_wait_time 7 | ,s2.sleeps-s1.sleeps as delta_sleeps 8 | ,t2.HSECS - t1.HSECS as delta_hsecs 9 | --,s2.* 10 | from v$timer t1 11 | ,v$mutex_sleep s1 12 | ,(select/*+ no_merge */ sum(level) a from dual connect by level<=1e6) v1 13 | ,v$timer t2 14 | ,v$mutex_sleep s2 15 | where s1.MUTEX_TYPE=s2.MUTEX_TYPE 16 | and s1.location=s2.location 17 | ) 18 | select * 19 | from top_mutexes 20 | order by delta_sleeps desc; 21 | -------------------------------------------------------------------------------- /script/packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | sudo apt-get --ignore-missing -y install \ 4 | git \ 5 | g++ \ 6 | cmake \ 7 | libgflags-dev \ 8 | libgtest-dev 9 | 10 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | 3 | # --[ SQLCheck 4 | 5 | # Set output directory 6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 8 | 9 | # Make sure the compiler can find include files for our sqlcheck library 10 | include_directories (${CMAKE_CURRENT_SOURCE_DIR}/include) 11 | 12 | # Create our sqlcheck library 13 | add_library (sqlcheck_library checker.cpp configuration.cpp list.cpp) 14 | 15 | # Create our executable 16 | add_executable(sqlcheck main.cpp) 17 | target_link_libraries(sqlcheck sqlcheck_library 18 | ${GTEST_BOTH_LIBRARIES} 19 | ${GLOG_LIBRARIES} 20 | ${CMAKE_THREAD_LIBS_INIT} 21 | gflags 22 | ) 23 | 24 | # Add installation target 25 | install (TARGETS sqlcheck sqlcheck_library DESTINATION bin) 26 | -------------------------------------------------------------------------------- /src/checker.cpp: -------------------------------------------------------------------------------- 1 | // CHECKER SOURCE 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "include/checker.h" 16 | 17 | #include "include/configuration.h" 18 | #include "include/list.h" 19 | #include "include/color.h" 20 | 21 | namespace sqlcheck { 22 | 23 | bool Check(Configuration& state) { 24 | 25 | bool has_issues = false; 26 | std::unique_ptr input; 27 | 28 | // Set up stream 29 | if(state.testing_mode == true){ 30 | input.reset(state.test_stream.release()); 31 | } 32 | else if (state.file_name.empty()) { 33 | input.reset(&std::cin); 34 | } 35 | else { 36 | //std::cout << "Checking " << state.file_name << "...\n"; 37 | input.reset(new std::ifstream(state.file_name.c_str())); 38 | } 39 | 40 | std::stringstream sql_statement; 41 | state.line_number = 1; 42 | 43 | std::cout << "==================== Results ===================\n"; 44 | 45 | // Go over the input stream 46 | while(!input->eof()){ 47 | 48 | // Get a statement from the input stream 49 | std::string statement_fragment; 50 | std::getline(*input, statement_fragment, state.delimiter[0]); 51 | 52 | // Append fragment to statement 53 | if(statement_fragment.empty() == false){ 54 | sql_statement << statement_fragment << " "; 55 | } 56 | 57 | // Check the statement 58 | CheckStatement(state, sql_statement.str()); 59 | 60 | // Reset statement 61 | sql_statement.str(std::string()); 62 | } 63 | 64 | // Print summary 65 | if(state.checker_stats[RISK_LEVEL_ALL] == 0){ 66 | std::cout << "No issues found.\n"; 67 | } 68 | else { 69 | std::cout << "\n==================== Summary ===================\n"; 70 | std::cout << "All Anti-Patterns and Hints :: " << state.checker_stats[RISK_LEVEL_ALL] << "\n"; 71 | std::cout << "> High Risk :: " << state.checker_stats[RISK_LEVEL_HIGH] << "\n"; 72 | std::cout << "> Medium Risk :: " << state.checker_stats[RISK_LEVEL_MEDIUM] << "\n"; 73 | std::cout << "> Low Risk :: " << state.checker_stats[RISK_LEVEL_LOW] << "\n"; 74 | std::cout << "> Hints :: " << state.checker_stats[RISK_LEVEL_NONE] << "\n"; 75 | has_issues = true; 76 | } 77 | 78 | // Skip destroying std::cin 79 | if (state.file_name.empty()) { 80 | input.release(); 81 | } 82 | 83 | return has_issues; 84 | 85 | } 86 | 87 | // Wrap the text 88 | std::string WrapText(const std::string& text){ 89 | 90 | size_t line_length = 80; 91 | 92 | std::istringstream words(text); 93 | std::ostringstream wrapped; 94 | std::string word; 95 | bool newline = false; 96 | bool newpara = false; 97 | 98 | if (words >> word) { 99 | 100 | wrapped << word; 101 | 102 | size_t space_left = line_length - word.length(); 103 | while (words >> word) { 104 | if(word == "●"){ 105 | wrapped << "\n\n"; 106 | newpara = true; 107 | } 108 | else{ 109 | newpara = false; 110 | } 111 | 112 | if (space_left < word.length() + 1 || newline) { 113 | wrapped << '\n' << word; 114 | space_left = line_length - word.length(); 115 | } 116 | else { 117 | if(newpara == false){ 118 | wrapped << ' ' << word; 119 | } 120 | else{ 121 | wrapped << word; 122 | } 123 | space_left -= word.length() + 1; 124 | } 125 | 126 | if(word.back() == ':'){ 127 | newline = true; 128 | } 129 | else{ 130 | newline = false; 131 | } 132 | } 133 | 134 | } 135 | 136 | return wrapped.str(); 137 | } 138 | 139 | void PrintMessage(Configuration& state, 140 | const std::string sql_statement, 141 | const bool print_statement, 142 | const RiskLevel pattern_risk_level, 143 | const PatternType pattern_type, 144 | const std::string title, 145 | const std::string message){ 146 | 147 | ColorModifier red(ColorCode::FG_RED, state.color_mode, true); 148 | ColorModifier green(ColorCode::FG_GREEN, state.color_mode, true); 149 | ColorModifier blue(ColorCode::FG_BLUE, state.color_mode, true); 150 | ColorModifier regular(ColorCode::FG_DEFAULT, state.color_mode, false); 151 | 152 | if(print_statement == true){ 153 | std::cout << "\n-------------------------------------------------\n"; 154 | ColorModifier regular(ColorCode::FG_DEFAULT, state.color_mode, false); 155 | 156 | if(state.color_mode == true){ 157 | std::cout << "SQL Statement at line " << state.line_number <<": " << red << WrapText(sql_statement) << state.delimiter << regular << "\n"; 158 | } 159 | else { 160 | std::cout << "SQL Statement at line " << state.line_number << ": " << WrapText(sql_statement) << state.delimiter << "\n"; 161 | } 162 | } 163 | 164 | if(state.color_mode == true){ 165 | if(state.file_name.empty() == false){ 166 | std::cout << "[" << state.file_name << "]: "; 167 | } 168 | 169 | std::cout << "(" << green << RiskLevelToString(pattern_risk_level) << regular << ") "; 170 | std::cout << blue << title << regular << "\n"; 171 | } 172 | else { 173 | if(state.file_name.empty() == false){ 174 | std::cout << "[" << state.file_name << "]: "; 175 | } 176 | 177 | std::cout << "(" << RiskLevelToString(pattern_risk_level) << ") "; 178 | std::cout << "(" << PatternTypeToString(pattern_type) << ") "; 179 | std::cout << title << "\n"; 180 | } 181 | 182 | // Print detailed message only in verbose mode 183 | if(state.verbose == true){ 184 | std::cout << WrapText(message) << "\n"; 185 | } 186 | 187 | // Update checker stats 188 | state.checker_stats[pattern_risk_level]++; 189 | state.checker_stats[RISK_LEVEL_ALL]++; 190 | 191 | } 192 | 193 | void CheckPattern(Configuration& state, 194 | const std::string& sql_statement, 195 | bool& print_statement, 196 | const std::regex& anti_pattern, 197 | const RiskLevel pattern_risk_level, 198 | const PatternType pattern_type, 199 | const std::string title, 200 | const std::string message, 201 | const bool exists, 202 | const size_t min_count){ 203 | 204 | //std::cout << "PATTERN LEVEL: " << pattern_risk_level << "\n"; 205 | //std::cout << "CHECKER LEVEL: " << state.log_level << "\n"; 206 | 207 | // Check log level 208 | if(pattern_risk_level < state.risk_level){ 209 | return; 210 | } 211 | 212 | bool found = false; 213 | std::smatch match; 214 | std::size_t count = 0; 215 | 216 | // create an vector for the match positions 217 | std::vector positions; 218 | try { 219 | std::sregex_iterator sqlsearch = std::sregex_iterator(sql_statement.begin(), sql_statement.end(), anti_pattern); 220 | std::sregex_iterator sqlend = std::sregex_iterator(); 221 | count = std::distance(sqlsearch, sqlend); 222 | if (count > 0) { 223 | found = true; 224 | } 225 | 226 | if(found == exists && count > min_count){ 227 | for (std::sregex_iterator next = sqlsearch; next != sqlend; ++next) 228 | { 229 | match = *next; 230 | // add match position to the vector 231 | positions.push_back(match.position(0)); 232 | } 233 | 234 | // update positions from character number to line number 235 | uint32_t position_checker = 0; 236 | uint32_t num_lines = state.line_number; 237 | if (positions.size() > 0) { 238 | for (size_t statement_char = 0; statement_char < sql_statement.length(); statement_char++) { 239 | if (positions[position_checker] == statement_char) { 240 | positions[position_checker] = num_lines; 241 | position_checker++; 242 | } 243 | if (sql_statement[statement_char] == '\n') { 244 | num_lines++; 245 | } 246 | } 247 | } 248 | 249 | std::stringstream linelocations; 250 | // convert line numbers to output string 251 | if (positions.size() > 1) { 252 | linelocations << " at lines "; 253 | } else { 254 | linelocations << " at line "; 255 | } 256 | for (size_t i = 0; i < positions.size(); i++) { 257 | linelocations << positions[i]; 258 | if (i < positions.size() - 1) { 259 | linelocations << ", "; 260 | } 261 | } 262 | PrintMessage(state, 263 | sql_statement, 264 | print_statement, 265 | pattern_risk_level, 266 | pattern_type, 267 | title, 268 | message); 269 | 270 | if(exists == true){ 271 | ColorModifier blue(ColorCode::FG_BLUE, state.color_mode, true); 272 | ColorModifier regular(ColorCode::FG_DEFAULT, state.color_mode, false); 273 | if(state.color_mode == true){ 274 | std::cout << "[Matching Expression: " << blue << WrapText(match.str(0)) << regular << linelocations.str() << "]"; 275 | } 276 | else{ 277 | std::cout << "[Matching Expression: " << WrapText(match.str(0)) << linelocations.str() << "]"; 278 | } 279 | std::cout << "\n\n"; 280 | } 281 | 282 | // TOGGLE PRINT STATEMENT 283 | print_statement = false; 284 | } 285 | } catch (std::regex_error& e) { 286 | // Syntax error in the regular expression 287 | } 288 | } 289 | 290 | void CheckStatement(Configuration& state, 291 | const std::string& sql_statement){ 292 | 293 | // TRANSFORM TO LOWER CASE 294 | auto statement = sql_statement; 295 | 296 | std::transform(statement.begin(), 297 | statement.end(), 298 | statement.begin(), 299 | ::tolower); 300 | 301 | // REMOVE SPACE 302 | statement = std::regex_replace(statement, std::regex("^ +| +$|( ) +"), "$1"); 303 | 304 | // CHECK FOR LEADING NEWLINE 305 | if (statement[0] == '\n') { 306 | statement = statement.erase(0,1); 307 | state.line_number++; 308 | } 309 | 310 | // RESET 311 | bool print_statement = true; 312 | 313 | // LOGICAL DATABASE DESIGN 314 | 315 | CheckMultiValuedAttribute(state, statement, print_statement); 316 | 317 | CheckRecursiveDependency(state, statement, print_statement); 318 | 319 | CheckPrimaryKeyExists(state, statement, print_statement); 320 | 321 | CheckGenericPrimaryKey(state, statement, print_statement); 322 | 323 | CheckForeignKeyExists(state, statement, print_statement); 324 | 325 | CheckVariableAttribute(state, statement, print_statement); 326 | 327 | CheckMetadataTribbles(state, statement, print_statement); 328 | 329 | // PHYSICAL DATABASE DESIGN 330 | 331 | CheckFloat(state, statement, print_statement); 332 | 333 | CheckValuesInDefinition(state, statement, print_statement); 334 | 335 | CheckExternalFiles(state, statement, print_statement); 336 | 337 | CheckIndexCount(state, statement, print_statement); 338 | 339 | CheckIndexAttributeOrder(state, statement, print_statement); 340 | 341 | // QUERY 342 | 343 | CheckSelectStar(state, statement, print_statement); 344 | 345 | CheckJoinWithoutEquality(state, statement, print_statement); 346 | 347 | CheckNullUsage(state, statement, print_statement); 348 | 349 | CheckNotNullUsage(state, statement, print_statement); 350 | 351 | CheckConcatenation(state, statement, print_statement); 352 | 353 | CheckGroupByUsage(state, statement, print_statement); 354 | 355 | CheckOrderByRand(state, statement, print_statement); 356 | 357 | CheckPatternMatching(state, statement, print_statement); 358 | 359 | CheckSpaghettiQuery(state, statement, print_statement); 360 | 361 | CheckJoinCount(state, statement, print_statement); 362 | 363 | CheckDistinctCount(state, statement, print_statement); 364 | 365 | CheckImplicitColumns(state, statement, print_statement); 366 | 367 | CheckHaving(state, statement, print_statement); 368 | 369 | CheckNesting(state, statement, print_statement); 370 | 371 | CheckOr(state, statement, print_statement); 372 | 373 | CheckUnion(state, statement, print_statement); 374 | 375 | CheckDistinctJoin(state, statement, print_statement); 376 | 377 | // APPLICATION 378 | 379 | CheckReadablePasswords(state, statement, print_statement); 380 | 381 | 382 | 383 | 384 | // update state.line_number with number of line breaks in the statement that was just checked 385 | for (size_t i = 0; i < statement.length(); i++) 386 | { 387 | if (statement[i] == '\n') 388 | { 389 | state.line_number++; 390 | } 391 | } 392 | } 393 | 394 | } // namespace machine 395 | 396 | -------------------------------------------------------------------------------- /src/configuration.cpp: -------------------------------------------------------------------------------- 1 | // CONFIGURATION SOURCE 2 | 3 | #include "include/configuration.h" 4 | 5 | #include "gflags/gflags.h" 6 | 7 | namespace sqlcheck { 8 | 9 | std::string RiskLevelToString(const RiskLevel& risk_level){ 10 | 11 | switch (risk_level) { 12 | case RISK_LEVEL_HIGH: 13 | return "HIGH RISK"; 14 | case RISK_LEVEL_MEDIUM: 15 | return "MEDIUM RISK"; 16 | case RISK_LEVEL_LOW: 17 | return "LOW RISK"; 18 | case RISK_LEVEL_NONE: 19 | return "HINTS"; 20 | case RISK_LEVEL_ALL: 21 | return "ALL ANTI-PATTERNS"; 22 | 23 | case RISK_LEVEL_INVALID: 24 | default: 25 | return "INVALID"; 26 | } 27 | 28 | } 29 | 30 | std::string RiskLevelToDetailedString(const RiskLevel& risk_level){ 31 | 32 | switch (risk_level) { 33 | case RISK_LEVEL_HIGH: 34 | return "ONLY HIGH RISK ANTI-PATTERNS"; 35 | case RISK_LEVEL_MEDIUM: 36 | return "ONLY MEDIUM AND HIGH RISK ANTI-PATTERNS"; 37 | case RISK_LEVEL_LOW: 38 | return "ONLY ANTI-PATTERNS"; 39 | case RISK_LEVEL_NONE: 40 | return "ALL ANTI-PATTERNS AND HINTS"; 41 | case RISK_LEVEL_ALL: 42 | return "ALL ANTI-PATTERNS"; 43 | 44 | case RISK_LEVEL_INVALID: 45 | default: 46 | return "INVALID"; 47 | } 48 | 49 | } 50 | 51 | 52 | std::string PatternTypeToString(const PatternType& pattern_type){ 53 | 54 | switch (pattern_type) { 55 | case PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN: 56 | return "PHYSICAL_DATABASE_DESIGN ANTI-PATTERN"; 57 | case PATTERN_TYPE_LOGICAL_DATABASE_DESIGN: 58 | return "LOGICAL_DATABASE_DESIGN ANTI-PATTERN"; 59 | case PATTERN_TYPE_QUERY: 60 | return "QUERY ANTI-PATTERN"; 61 | case PATTERN_TYPE_APPLICATION: 62 | return "APPLICATION ANTI-PATTERN"; 63 | 64 | case PATTERN_TYPE_INVALID: 65 | default: 66 | return "INVALID"; 67 | } 68 | 69 | } 70 | 71 | std::string GetBooleanString(const bool& status){ 72 | if(status == true){ 73 | return "ENABLED"; 74 | } 75 | else { 76 | return "DISABLED"; 77 | } 78 | } 79 | 80 | void ValidateRiskLevel(const Configuration &state) { 81 | if (state.risk_level < RISK_LEVEL_ALL || state.risk_level > RISK_LEVEL_HIGH) { 82 | printf("INVALID RISK LEVEL :: %d\n", state.risk_level); 83 | exit(EXIT_FAILURE); 84 | } 85 | else { 86 | printf("> %s :: %s\n", "RISK LEVEL ", 87 | RiskLevelToDetailedString(state.risk_level).c_str()); 88 | } 89 | } 90 | 91 | void ValidateFileName(const Configuration &state) { 92 | if (state.file_name.empty() == false) { 93 | printf("> %s :: %s\n", "SQL FILE NAME", 94 | state.file_name.c_str()); 95 | } 96 | } 97 | 98 | 99 | void ValidateColorMode(const Configuration &state) { 100 | printf("> %s :: %s\n", "COLOR MODE ", 101 | GetBooleanString(state.color_mode).c_str()); 102 | } 103 | 104 | void ValidateVerbose(const Configuration &state) { 105 | printf("> %s :: %s\n", "VERBOSE MODE ", 106 | GetBooleanString(state.verbose).c_str()); 107 | } 108 | 109 | void ValidateDelimiter(const Configuration &state) { 110 | printf("> %s :: %s\n", "DELIMITER ", 111 | state.delimiter.c_str()); 112 | } 113 | 114 | } // namespace sqlcheck 115 | -------------------------------------------------------------------------------- /src/include/checker.h: -------------------------------------------------------------------------------- 1 | // CHECKER HEADER 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | #include "configuration.h" 8 | 9 | namespace sqlcheck { 10 | 11 | // Check a set of SQL statements 12 | bool Check(Configuration& state); 13 | 14 | // Check a SQL statement 15 | void CheckStatement(Configuration& state, 16 | const std::string& sql_statement); 17 | 18 | // Check a pattern 19 | void CheckPattern(Configuration& state, 20 | const std::string& sql_statement, 21 | bool& print_statement, 22 | const std::regex& anti_pattern, 23 | const RiskLevel pattern_level, 24 | const PatternType pattern_type, 25 | const std::string title, 26 | const std::string message, 27 | const bool exists, 28 | const size_t min_count = 0); 29 | 30 | } // namespace machine 31 | -------------------------------------------------------------------------------- /src/include/color.h: -------------------------------------------------------------------------------- 1 | // COLOR HEADER 2 | 3 | #pragma once 4 | 5 | #include 6 | 7 | namespace sqlcheck { 8 | 9 | enum ColorCode { 10 | 11 | FG_DEFAULT = 39, 12 | FG_BLACK = 30, 13 | FG_RED = 31, 14 | FG_GREEN = 32, 15 | FG_YELLOW = 33, 16 | FG_BLUE = 34, 17 | FG_MAGENTA = 35, 18 | FG_CYAN = 36, 19 | FG_LIGHT_GRAY = 37, 20 | 21 | FG_DARK_GRAY = 90, 22 | FG_LIGHT_RED = 91, 23 | FG_LIGHT_GREEN = 92, 24 | FG_LIGHT_YELLOW = 93, 25 | FG_LIGHT_BLUE = 94, 26 | FG_LIGHT_MAGENTA = 95, 27 | FG_LIGHT_CYAN = 96, 28 | FG_WHITE = 97, 29 | 30 | BG_RED = 41, 31 | BG_GREEN = 42, 32 | BG_BLUE = 44, 33 | BG_DEFAULT = 49 34 | 35 | }; 36 | 37 | class ColorModifier { 38 | 39 | public: 40 | ColorModifier(ColorCode color_code, bool enable_color, bool enable_bold) 41 | : color_code_(color_code), 42 | enable_color_(enable_color), 43 | enable_bold_(enable_bold){ 44 | } 45 | 46 | friend std::ostream& operator<<(std::ostream& os, const ColorModifier& color_modifier) { 47 | if(color_modifier.enable_bold_ == true){ 48 | if(color_modifier.enable_color_ == true) { 49 | return os << "\e[1" << "m" << "\033[" << color_modifier.color_code_ << "m"; 50 | } 51 | else { 52 | return os; 53 | } 54 | } 55 | else { 56 | if(color_modifier.enable_color_ == true) { 57 | return os << "\e[0" << "m" << "\033[" << color_modifier.color_code_ << "m"; 58 | } 59 | else { 60 | return os; 61 | } 62 | } 63 | } 64 | 65 | private: 66 | 67 | // color code 68 | ColorCode color_code_; 69 | 70 | // enable color 71 | bool enable_color_; 72 | 73 | // enable bold 74 | bool enable_bold_; 75 | 76 | }; 77 | 78 | } // namespace sqlcheck 79 | -------------------------------------------------------------------------------- /src/include/configuration.h: -------------------------------------------------------------------------------- 1 | // CONFIGURATION HEADER 2 | 3 | #pragma once 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace sqlcheck { 14 | 15 | #define UNUSED_ATTRIBUTE __attribute__((unused)) 16 | 17 | enum RiskLevel { 18 | RISK_LEVEL_INVALID = 10, 19 | 20 | RISK_LEVEL_HIGH = 4, 21 | RISK_LEVEL_MEDIUM = 3, 22 | RISK_LEVEL_LOW = 2, 23 | RISK_LEVEL_NONE = 1, 24 | RISK_LEVEL_ALL = 0 25 | 26 | }; 27 | 28 | enum PatternType { 29 | PATTERN_TYPE_INVALID = 0, 30 | 31 | PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN = 1, 32 | PATTERN_TYPE_LOGICAL_DATABASE_DESIGN = 2, 33 | PATTERN_TYPE_QUERY = 3, 34 | PATTERN_TYPE_APPLICATION = 4, 35 | 36 | }; 37 | 38 | // Checker stats 39 | struct CheckerStats { 40 | 41 | unsigned int error_level_counter = 0; 42 | unsigned int warn_level_counter = 0; 43 | unsigned int info_level_counter = 0; 44 | unsigned int debug_level_counter = 0; 45 | unsigned int trace_level_counter = 0; 46 | unsigned int all_level_counter = 0; 47 | 48 | }; 49 | 50 | class Configuration { 51 | public: 52 | 53 | // Constructor 54 | Configuration() 55 | : 56 | color_mode(true), 57 | file_name(""), 58 | delimiter(";"), 59 | risk_level(RiskLevel::RISK_LEVEL_ALL), 60 | verbose(false), 61 | testing_mode(false) { 62 | } 63 | 64 | // color mode 65 | bool color_mode; 66 | 67 | // filename 68 | std::string file_name; 69 | 70 | // query delimiter 71 | std::string delimiter; 72 | 73 | // risk level 74 | RiskLevel risk_level; 75 | 76 | // verbose mode 77 | bool verbose; 78 | 79 | // test stream 80 | std::unique_ptr test_stream; 81 | 82 | // testing mode 83 | bool testing_mode; 84 | 85 | /// checker stats 86 | std::map checker_stats; 87 | 88 | // line number 89 | std::uint32_t line_number; 90 | 91 | }; 92 | 93 | std::string RiskLevelToString(const RiskLevel& risk_level); 94 | 95 | std::string RiskLevelToDetailedString(const RiskLevel& risk_level); 96 | 97 | std::string PatternTypeToString(const PatternType& pattern_type); 98 | 99 | void ValidateRiskLevel(const Configuration &state); 100 | 101 | void ValidateFileName(const Configuration &state); 102 | 103 | void ValidateColorMode(const Configuration &state); 104 | 105 | void ValidateVerbose(const Configuration &state); 106 | 107 | void ValidateDelimiter(const Configuration &state); 108 | 109 | 110 | } // namespace sqlcheck 111 | -------------------------------------------------------------------------------- /src/include/list.h: -------------------------------------------------------------------------------- 1 | // LIST HEADER 2 | 3 | #pragma once 4 | 5 | #include "configuration.h" 6 | 7 | namespace sqlcheck { 8 | 9 | // LOGICAL DATABASE DESIGN 10 | 11 | void CheckMultiValuedAttribute(Configuration& state, 12 | const std::string& sql_statement, 13 | bool& print_statement); 14 | 15 | void CheckRecursiveDependency(Configuration& state, 16 | const std::string& sql_statement, 17 | bool& print_statement); 18 | 19 | void CheckPrimaryKeyExists(Configuration& state, 20 | const std::string& sql_statement, 21 | bool& print_statement); 22 | 23 | void CheckGenericPrimaryKey(Configuration& state, 24 | const std::string& sql_statement, 25 | bool& print_statement); 26 | 27 | void CheckForeignKeyExists(Configuration& state, 28 | const std::string& sql_statement, 29 | bool& print_statement); 30 | 31 | void CheckVariableAttribute(Configuration& state, 32 | const std::string& sql_statement, 33 | bool& print_statement); 34 | 35 | void CheckMetadataTribbles(Configuration& state, 36 | const std::string& sql_statement, 37 | bool& print_statement); 38 | 39 | // PHYSICAL DATABASE DESIGN 40 | 41 | void CheckFloat(Configuration& state, 42 | const std::string& sql_statement, 43 | bool& print_statement); 44 | 45 | void CheckValuesInDefinition(Configuration& state, 46 | const std::string& sql_statement, 47 | bool& print_statement); 48 | 49 | void CheckExternalFiles(Configuration& state, 50 | const std::string& sql_statement, 51 | bool& print_statement); 52 | 53 | void CheckIndexCount(Configuration& state, 54 | const std::string& sql_statement, 55 | bool& print_statement); 56 | 57 | void CheckIndexAttributeOrder(Configuration& state, 58 | const std::string& sql_statement, 59 | bool& print_statement); 60 | 61 | // QUERY 62 | 63 | void CheckSelectStar(Configuration& state, 64 | const std::string& sql_statement, 65 | bool& print_statement); 66 | 67 | void CheckJoinWithoutEquality(Configuration& state, 68 | const std::string& sql_statement, 69 | bool& print_statement); 70 | 71 | void CheckNullUsage(Configuration& state, 72 | const std::string& sql_statement, 73 | bool& print_statement); 74 | 75 | void CheckNotNullUsage(Configuration& state, 76 | const std::string& sql_statement, 77 | bool& print_statement); 78 | 79 | void CheckConcatenation(Configuration& state, 80 | const std::string& sql_statement, 81 | bool& print_statement); 82 | 83 | void CheckGroupByUsage(Configuration& state, 84 | const std::string& sql_statement, 85 | bool& print_statement); 86 | 87 | void CheckOrderByRand(Configuration& state, 88 | const std::string& sql_statement, 89 | bool& print_statement); 90 | 91 | void CheckPatternMatching(Configuration& state, 92 | const std::string& sql_statement, 93 | bool& print_statement); 94 | 95 | void CheckSpaghettiQuery(Configuration& state, 96 | const std::string& sql_statement, 97 | bool& print_statement); 98 | 99 | void CheckJoinCount(Configuration& state, 100 | const std::string& sql_statement, 101 | bool& print_statement); 102 | 103 | void CheckDistinctCount(Configuration& state, 104 | const std::string& sql_statement, 105 | bool& print_statement); 106 | 107 | void CheckImplicitColumns(Configuration& state, 108 | const std::string& sql_statement, 109 | bool& print_statement); 110 | 111 | void CheckHaving(Configuration& state, 112 | const std::string& sql_statement, 113 | bool& print_statement); 114 | 115 | void CheckNesting(Configuration& state, 116 | const std::string& sql_statement, 117 | bool& print_statement); 118 | 119 | void CheckOr(Configuration& state, 120 | const std::string& sql_statement, 121 | bool& print_statement); 122 | 123 | void CheckUnion(Configuration& state, 124 | const std::string& sql_statement, 125 | bool& print_statement); 126 | 127 | void CheckDistinctJoin(Configuration& state, 128 | const std::string& sql_statement, 129 | bool& print_statement); 130 | 131 | // APPLICATION 132 | 133 | void CheckReadablePasswords(Configuration& state, 134 | const std::string& sql_statement, 135 | bool& print_statement); 136 | 137 | 138 | } // namespace machine 139 | -------------------------------------------------------------------------------- /src/list.cpp: -------------------------------------------------------------------------------- 1 | // LIST SOURCE 2 | 3 | #include 4 | 5 | #include "include/list.h" 6 | #include "include/checker.h" 7 | 8 | namespace sqlcheck { 9 | 10 | // UTILITY 11 | 12 | std::string GetTableName(const std::string& sql_statement){ 13 | std::string table_template = "create table"; 14 | std::size_t found = sql_statement.find(table_template); 15 | if (found == std::string::npos) { 16 | return ""; 17 | } 18 | 19 | // Locate table name 20 | auto rest = sql_statement.substr(found + table_template.size()); 21 | // Strip space at beginning 22 | rest = std::regex_replace(rest, std::regex("^ +| +$|( ) +"), "$1"); 23 | // check if space or ( comes first in remaining string 24 | if (rest.find(' ') < rest.find('(')) { 25 | // space comes first 26 | rest = rest.substr(0, rest.find(' ')); 27 | } else { 28 | // ( comes first 29 | rest = rest.substr(0, rest.find('(')); 30 | } 31 | auto table_name = rest; 32 | 33 | return table_name; 34 | } 35 | 36 | bool IsDDLStatement(const std::string& sql_statement){ 37 | std::string create_table_template = "create table"; 38 | std::size_t found = sql_statement.find(create_table_template); 39 | if (found != std::string::npos) { 40 | return true; 41 | } 42 | 43 | std::string alter_table_template = "alter table"; 44 | found = sql_statement.find(alter_table_template); 45 | if (found != std::string::npos) { 46 | return true; 47 | } 48 | 49 | return false; 50 | } 51 | 52 | bool IsCreateStatement(const std::string& sql_statement){ 53 | std::string create_table_template = "create table"; 54 | std::size_t found = sql_statement.find(create_table_template); 55 | if (found != std::string::npos) { 56 | return true; 57 | } 58 | 59 | return false; 60 | } 61 | 62 | // LOGICAL DATABASE DESIGN 63 | 64 | 65 | void CheckMultiValuedAttribute(Configuration& state, 66 | const std::string& sql_statement, 67 | bool& print_statement){ 68 | 69 | std::regex pattern("(id\\s+varchar)|(id\\s+text)|(id\\s+regexp)"); 70 | std::string title = "Multi-Valued Attribute"; 71 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 72 | 73 | auto message = 74 | "● Store each value in its own column and row: " 75 | "Storing a list of IDs as a VARCHAR/TEXT column can cause performance and data integrity " 76 | "problems. Querying against such a column would require using pattern-matching " 77 | "expressions. It is awkward and costly to join a comma-separated list to matching rows. " 78 | "This will make it harder to validate IDs. Think about what is the greatest number of " 79 | "entries this list must support? Instead of using a multi-valued attribute, " 80 | "consider storing it in a separate table, so that each individual value of that attribute " 81 | "occupies a separate row. Such an intersection table implements a many-to-many relationship " 82 | "between the two referenced tables. This will greatly simplify querying and validating " 83 | "the IDs."; 84 | 85 | CheckPattern(state, 86 | sql_statement, 87 | print_statement, 88 | pattern, 89 | RISK_LEVEL_HIGH, 90 | pattern_type, 91 | title, 92 | message, 93 | true); 94 | 95 | } 96 | 97 | void CheckRecursiveDependency(Configuration& state, 98 | const std::string& sql_statement, 99 | bool& print_statement){ 100 | 101 | std::string table_name = GetTableName(sql_statement); 102 | if(table_name.empty()){ 103 | return; 104 | } 105 | 106 | std::regex pattern("(references\\s+" + table_name+ ")"); 107 | std::string title = "Recursive Dependency"; 108 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 109 | 110 | auto message = 111 | "● Avoid recursive relationships: " 112 | "It’s common for data to have recursive relationships. Data may be organized in a " 113 | "treelike or hierarchical way. However, creating a foreign key constraint to enforce " 114 | "the relationship between two columns in the same table lends to awkward querying. " 115 | "Each level of the tree corresponds to another join. You will need to issue recursive " 116 | "queries to get all descendants or all ancestors of a node. " 117 | "A solution is to construct an additional closure table. It involves storing all paths " 118 | "through the tree, not just those with a direct parent-child relationship. " 119 | "You might want to compare different hierarchical data designs -- closure table, " 120 | "path enumeration, nested sets -- and pick one based on your application's needs."; 121 | 122 | CheckPattern(state, 123 | sql_statement, 124 | print_statement, 125 | pattern, 126 | RISK_LEVEL_HIGH, 127 | pattern_type, 128 | title, 129 | message, 130 | true); 131 | 132 | } 133 | 134 | void CheckPrimaryKeyExists(Configuration& state, 135 | const std::string& sql_statement, 136 | bool& print_statement){ 137 | 138 | auto create_statement = IsCreateStatement(sql_statement); 139 | if(create_statement == false){ 140 | return; 141 | } 142 | 143 | std::regex pattern("(primary key)"); 144 | std::string title = "Primary Key Does Not Exist"; 145 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 146 | 147 | auto message = 148 | "● Consider adding a primary key: " 149 | "A primary key constraint is important when you need to do the following: " 150 | "prevent a table from containing duplicate rows, " 151 | "reference individual rows in queries, and " 152 | "support foreign key references " 153 | "If you don’t use primary key constraints, you create a chore for yourself: " 154 | "checking for duplicate rows. More often than not, you will need to define " 155 | "a primary key for every table. Use compound keys when they are appropriate."; 156 | 157 | CheckPattern(state, 158 | sql_statement, 159 | print_statement, 160 | pattern, 161 | RISK_LEVEL_MEDIUM, 162 | pattern_type, 163 | title, 164 | message, 165 | false); 166 | 167 | } 168 | 169 | void CheckGenericPrimaryKey(Configuration& state, 170 | const std::string& sql_statement, 171 | bool& print_statement){ 172 | 173 | auto ddl_statement = IsDDLStatement(sql_statement); 174 | if(ddl_statement == false){ 175 | return; 176 | } 177 | 178 | std::regex pattern("(\\s+[\\(]?id\\s+)|(,id\\s+)|(\\s+id\\s+serial)"); 179 | std::string title = "Generic Primary Key"; 180 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 181 | 182 | auto message = 183 | "● Skip using a generic primary key (id): " 184 | "Adding an id column to every table causes several effects that make its " 185 | "use seem arbitrary. You might end up creating a redundant key or allow " 186 | "duplicate rows if you add this column in a compound key. " 187 | "The name id is so generic that it holds no meaning. This is especially " 188 | "important when you join two tables and they have the same primary " 189 | "key column name."; 190 | 191 | CheckPattern(state, 192 | sql_statement, 193 | print_statement, 194 | pattern, 195 | RISK_LEVEL_HIGH, 196 | pattern_type, 197 | title, 198 | message, 199 | true); 200 | 201 | } 202 | 203 | void CheckForeignKeyExists(Configuration& state, 204 | const std::string& sql_statement, 205 | bool& print_statement){ 206 | 207 | auto create_statement = IsCreateStatement(sql_statement); 208 | if(create_statement == false){ 209 | return; 210 | } 211 | 212 | std::regex pattern("(foreign key)"); 213 | std::string title = "Foreign Key Does Not Exist"; 214 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 215 | 216 | auto message = 217 | "● Consider adding a foreign key: " 218 | "Are you leaving out the application constraints? Even though it seems at " 219 | "first that skipping foreign key constraints makes your database design " 220 | "simpler, more flexible, or speedier, you pay for this in other ways. " 221 | "It becomes your responsibility to write code to ensure referential integrity " 222 | "manually. Use foreign key constraints to enforce referential integrity. " 223 | "Foreign keys have another feature you can’t mimic using application code: " 224 | "cascading updates to multiple tables. This feature allows you to " 225 | "update or delete the parent row and lets the database takes care of any child " 226 | "rows that reference it. The way you declare the ON UPDATE or ON DELETE clauses " 227 | "in the foreign key constraint allow you to control the result of a cascading " 228 | "operation. Make your database mistake-proof with constraints."; 229 | 230 | CheckPattern(state, 231 | sql_statement, 232 | print_statement, 233 | pattern, 234 | RISK_LEVEL_MEDIUM, 235 | pattern_type, 236 | title, 237 | message, 238 | false); 239 | 240 | } 241 | 242 | void CheckVariableAttribute(Configuration& state, 243 | const std::string& sql_statement, 244 | bool& print_statement){ 245 | 246 | std::string table_name = GetTableName(sql_statement); 247 | if(table_name.empty()){ 248 | return; 249 | } 250 | 251 | auto found = table_name.find("attribute"); 252 | if (found == std::string::npos) { 253 | return; 254 | } 255 | 256 | std::regex pattern("(attribute)"); 257 | std::string title = "Entity-Attribute-Value Pattern"; 258 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 259 | 260 | auto message = 261 | "● Dynamic schema with variable attributes: " 262 | "Are you trying to create a schema where you can define new attributes " 263 | "at runtime.? This involves storing attributes as rows in an attribute table. " 264 | "This is referred to as the Entity-Attribute-Value or schemaless pattern. " 265 | "When you use this pattern, you sacrifice many advantages that a conventional " 266 | "database design would have given you. You can't make mandatory attributes. " 267 | "You can't enforce referential integrity. You might find that attributes are " 268 | "not being named consistently. A solution is to store all related types in one table, " 269 | "with distinct columns for every attribute that exists in any type " 270 | "(Single Table Inheritance). Use one attribute to define the subtype of a given row. " 271 | "Many attributes are subtype-specific, and these columns must " 272 | "be given a null value on any row storing an object for which the attribute " 273 | "does not apply; the columns with non-null values become sparse. " 274 | "Another solution is to create a separate table for each subtype " 275 | "(Concrete Table Inheritance). A third solution mimics inheritance, " 276 | "as though tables were object-oriented classes (Class Table Inheritance). " 277 | "Create a single table for the base type, containing attributes common to " 278 | "all subtypes. Then for each subtype, create another table, with a primary key " 279 | "that also serves as a foreign key to the base table. " 280 | "If you have many subtypes or if you must support new attributes frequently, " 281 | "you can add a BLOB column to store data in a format such as XML or JSON, " 282 | "which encodes both the attribute names and their values. " 283 | "This design is best when you can’t limit yourself to a finite set of subtypes " 284 | "and when you need complete flexibility to define new attributes at any time."; 285 | 286 | CheckPattern(state, 287 | sql_statement, 288 | print_statement, 289 | pattern, 290 | RISK_LEVEL_MEDIUM, 291 | pattern_type, 292 | title, 293 | message, 294 | true); 295 | 296 | } 297 | 298 | void CheckMetadataTribbles(Configuration& state, 299 | const std::string& sql_statement, 300 | bool& print_statement){ 301 | 302 | auto ddl_statement = IsDDLStatement(sql_statement); 303 | if(ddl_statement == false){ 304 | return; 305 | } 306 | 307 | std::regex pattern("[A-za-z\\-_@]+[0-9]+ "); 308 | std::string title = "Metadata Tribbles"; 309 | PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN; 310 | 311 | std::string message1 = 312 | "● Breaking down a table or column by year/user/etc.: " 313 | "You might be trying to split a single column into multiple columns, " 314 | "using column names based on distinct values in another attribute. " 315 | "For each year or user, you will need to add one more column or table. " 316 | "You are mixing metadata with data. You will now need to make sure that " 317 | "the primary key values are unique across all the split columns or tables. " 318 | "The solution is to use a feature called sharding or horizontal partitioning. " 319 | "(PARTITION BY HASH ( YEAR(...) ). With this feature, you can gain the " 320 | "benefits of splitting a large table without the drawbacks. " 321 | "Partitioning is not defined in the SQL standard, so each brand of database " 322 | "implements it in their own nonstandard way. " 323 | "Another remedy for metadata tribbles is to create a dependent table. " 324 | "Instead of one row per entity with multiple columns for each year, " 325 | "use multiple rows. Don't let data spawn metadata."; 326 | 327 | std::string message2 = 328 | "● Store each value with the same meaning in a single column: " 329 | "Creating multiple columns in a table with the same prefix " 330 | "indicates that you are trying to store a multivalued attribute. " 331 | "This design makes it hard to add or remove values, " 332 | "to ensure the uniqueness of values, and handling growing sets of values. " 333 | "The best solution is to create a dependent table with one column for the " 334 | "multivalued attribute. Store the multiple values in multiple rows instead of " 335 | "multiple columns and define a foreign key in the dependent table to associate " 336 | "the values to its parent row."; 337 | 338 | auto message = message1 + "\n" + message2; 339 | 340 | CheckPattern(state, 341 | sql_statement, 342 | print_statement, 343 | pattern, 344 | RISK_LEVEL_MEDIUM, 345 | pattern_type, 346 | title, 347 | message, 348 | true); 349 | 350 | } 351 | 352 | // PHYSICAL DATABASE DESIGN 353 | 354 | void CheckFloat(Configuration& state, 355 | const std::string& sql_statement, 356 | bool& print_statement){ 357 | 358 | std::regex pattern("(float)|(real)|(double precision)|(0\\.000[0-9]*)"); 359 | std::string title = "Imprecise Data Type"; 360 | PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN; 361 | 362 | auto message = 363 | "● Use precise data types: " 364 | "Virtually any use of FLOAT, REAL, or DOUBLE PRECISION data types is suspect. " 365 | "Most applications that use floating-point numbers don't require the range of " 366 | "values supported by IEEE 754 formats. The cumulative impact of inexact " 367 | "floating-point numbers is severe when calculating aggregates. " 368 | "Instead of FLOAT or its siblings, use the NUMERIC or DECIMAL SQL data types " 369 | "for fixed-precision fractional numbers. These data types store numeric values " 370 | "exactly, up to the precision you specify in the column definition. " 371 | "Do not use FLOAT if you can avoid it."; 372 | 373 | CheckPattern(state, 374 | sql_statement, 375 | print_statement, 376 | pattern, 377 | RISK_LEVEL_MEDIUM, 378 | pattern_type, 379 | title, 380 | message, 381 | true); 382 | 383 | } 384 | 385 | void CheckValuesInDefinition(Configuration& state, 386 | const std::string& sql_statement, 387 | bool& print_statement){ 388 | 389 | auto ddl_statement = IsDDLStatement(sql_statement); 390 | if(ddl_statement == false){ 391 | return; 392 | } 393 | 394 | std::regex pattern("( enum)|( in \\()"); 395 | std::string title = "Values In Definition"; 396 | PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN; 397 | 398 | auto message = 399 | "● Don't specify values in column definition: " 400 | "With enum, you declare the values as strings, " 401 | "but internally the column is stored as the ordinal number of the string " 402 | "in the enumerated list. The storage is therefore compact, but when you " 403 | "sort a query by this column, the result is ordered by the ordinal value, " 404 | "not alphabetically by the string value. You may not expect this behavior. " 405 | "There's no syntax to add or remove a value from an ENUM or check constraint; " 406 | "you can only redefine the column with a new set of values. " 407 | "Moreover, if you make a value obsolete, you could upset historical data. " 408 | "As a matter of policy, changing metadata — that is, changing the definition " 409 | "of tables and columns—should be infrequent and with attention to testing and " 410 | "quality assurance. There's a better solution to restrict values in a column: " 411 | "create a lookup table with one row for each value you allow. " 412 | "Then declare a foreign key constraint on the old table referencing " 413 | "the new table. " 414 | "Use metadata when validating against a fixed set of values. " 415 | "Use data when validating against a fluid set of values."; 416 | 417 | CheckPattern(state, 418 | sql_statement, 419 | print_statement, 420 | pattern, 421 | RISK_LEVEL_MEDIUM, 422 | pattern_type, 423 | title, 424 | message, 425 | true); 426 | 427 | } 428 | 429 | void CheckExternalFiles(Configuration& state, 430 | const std::string& sql_statement, 431 | bool& print_statement){ 432 | 433 | std::regex pattern("(path varchar)|(unlink\\s?\\()"); 434 | std::string title = "Files Are Not SQL Data Types"; 435 | PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN; 436 | 437 | auto message = 438 | "● Resources outside the database are not managed by the database: " 439 | "It's common for programmers to be unequivocal that we should always " 440 | "store files external to the database. " 441 | "Files don't obey DELETE, transaction isolation, rollback, or work well with " 442 | "database backup tools. They do not obey SQL access privileges and are not SQL " 443 | "data types. " 444 | "Resources outside the database are not managed by the database. " 445 | "You should consider storing blobs inside the database instead of in " 446 | "external files. You can save the contents of a BLOB column to a file."; 447 | 448 | CheckPattern(state, 449 | sql_statement, 450 | print_statement, 451 | pattern, 452 | RISK_LEVEL_MEDIUM, 453 | pattern_type, 454 | title, 455 | message, 456 | true); 457 | 458 | } 459 | 460 | void CheckIndexCount(Configuration& state, 461 | const std::string& sql_statement, 462 | bool& print_statement){ 463 | 464 | auto create_statement = IsCreateStatement(sql_statement); 465 | if(create_statement == false){ 466 | return; 467 | } 468 | 469 | std::size_t min_count = 3; 470 | std::regex pattern("(index)"); 471 | std::string title = "Too Many Indexes"; 472 | PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN; 473 | 474 | auto message = 475 | "● Don't create too many indexes: " 476 | "You benefit from an index only if you run queries that use that index. " 477 | "There's no benefit to creating indexes that you don't use. " 478 | "If you cover a database table with indexes, you incur a lot of overhead " 479 | "with no assurance of payoff. " 480 | "Consider dropping unnecessary indexes. " 481 | "If an index provides all the columns we need, then we don't need to read " 482 | "rows of data from the table at all. Consider using such covering indexes. " 483 | "Know your data, know your queries, and maintain the right set of indexes."; 484 | 485 | CheckPattern(state, 486 | sql_statement, 487 | print_statement, 488 | pattern, 489 | RISK_LEVEL_MEDIUM, 490 | pattern_type, 491 | title, 492 | message, 493 | true, 494 | min_count); 495 | 496 | } 497 | 498 | void CheckIndexAttributeOrder(Configuration& state, 499 | const std::string& sql_statement, 500 | bool& print_statement){ 501 | 502 | 503 | std::regex pattern("(create index)"); 504 | std::string title = "Index Attribute Order"; 505 | PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN; 506 | 507 | auto message = 508 | "● Align the index attribute order with queries: " 509 | "If you create a compound index for the columns, make sure that the query " 510 | "attributes are in the same order as the index attributes, so that the DBMS " 511 | "can use the index while processing the query. " 512 | "If the query and index attribute orders are not aligned, then the DBMS might " 513 | "be unable to use the index during query processing. " 514 | "EX: CREATE INDEX TelephoneBook ON Accounts(last_name, first_name); " 515 | "SELECT * FROM Accounts ORDER BY first_name, last_name;"; 516 | 517 | CheckPattern(state, 518 | sql_statement, 519 | print_statement, 520 | pattern, 521 | RISK_LEVEL_LOW, 522 | pattern_type, 523 | title, 524 | message, 525 | true); 526 | 527 | } 528 | 529 | 530 | // QUERY 531 | 532 | void CheckSelectStar(Configuration& state, 533 | const std::string& sql_statement, 534 | bool& print_statement){ 535 | 536 | std::regex pattern("(select\\s+\\*)"); 537 | std::string title = "SELECT *"; 538 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 539 | 540 | std::string message1 = 541 | "● Inefficiency in moving data to the consumer: " 542 | "When you SELECT *, you're often retrieving more columns from the database than " 543 | "your application really needs to function. This causes more data to move from " 544 | "the database server to the client, slowing access and increasing load on your " 545 | "machines, as well as taking more time to travel across the network. This is " 546 | "especially true when someone adds new columns to underlying tables that didn't " 547 | "exist and weren't needed when the original consumers coded their data access."; 548 | 549 | std::string message2 = 550 | "● Indexing issues: " 551 | "Consider a scenario where you want to tune a query to a high level of performance. " 552 | "If you were to use *, and it returned more columns than you actually needed, " 553 | "the server would often have to perform more expensive methods to retrieve your " 554 | "data than it otherwise might. For example, you wouldn't be able to create an index " 555 | "which simply covered the columns in your SELECT list, and even if you did " 556 | "(including all columns [shudder]), the next guy who came around and added a column " 557 | "to the underlying table would cause the optimizer to ignore your optimized covering " 558 | "index, and you'd likely find that the performance of your query would drop " 559 | "substantially for no readily apparent reason."; 560 | 561 | std::string message3 = 562 | "● Binding Problems: " 563 | "When you SELECT *, it's possible to retrieve two columns of the same name from two " 564 | "different tables. This can often crash your data consumer. Imagine a query that joins " 565 | "two tables, both of which contain a column called \"ID\". How would a consumer know " 566 | "which was which? SELECT * can also confuse views (at least in some versions SQL Server) " 567 | "when underlying table structures change -- the view is not rebuilt, and the data which " 568 | "comes back can be nonsense. And the worst part of it is that you can take care to name " 569 | "your columns whatever you want, but the next guy who comes along might have no way of " 570 | "knowing that he has to worry about adding a column which will collide with your " 571 | "already-developed names."; 572 | 573 | auto message = message1 + "\n" + message2 + "\n" + message3; 574 | 575 | CheckPattern(state, 576 | sql_statement, 577 | print_statement, 578 | pattern, 579 | RISK_LEVEL_HIGH, 580 | pattern_type, 581 | title, 582 | message, 583 | true); 584 | 585 | } 586 | 587 | void CheckJoinWithoutEquality(Configuration& state, 588 | const std::string& sql_statement, 589 | bool& print_statement) { 590 | std::regex pattern("join[\\s\\._]?[^=]+?(left|right|join|where|case)"); 591 | std::string title = "JOIN Without Equality Check"; 592 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 593 | 594 | auto message = 595 | "● Use = with JOIN: " 596 | "JOIN should always have an equality check to ensure proper scope of records. "; 597 | 598 | CheckPattern(state, 599 | sql_statement, 600 | print_statement, 601 | pattern, 602 | RISK_LEVEL_HIGH, 603 | pattern_type, 604 | title, 605 | message, 606 | true); 607 | } 608 | 609 | void CheckNullUsage(Configuration& state, 610 | const std::string& sql_statement, 611 | bool& print_statement) { 612 | 613 | std::regex pattern("(null)"); 614 | std::string title = "NULL Usage"; 615 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 616 | 617 | auto message = 618 | "● Use NULL as a Unique Value: " 619 | "NULL is not the same as zero. A number ten greater than an unknown is still an unknown. " 620 | "NULL is not the same as a string of zero length. " 621 | "Combining any string with NULL in standard SQL returns NULL. " 622 | "NULL is not the same as false. Boolean expressions with AND, OR, and NOT also produce " 623 | "results that some people find confusing. " 624 | "When you declare a column as NOT NULL, it should be because it would make no sense " 625 | "for the row to exist without a value in that column. " 626 | "Use null to signify a missing value for any data type."; 627 | 628 | CheckPattern(state, 629 | sql_statement, 630 | print_statement, 631 | pattern, 632 | RISK_LEVEL_NONE, 633 | pattern_type, 634 | title, 635 | message, 636 | true); 637 | 638 | } 639 | 640 | void CheckNotNullUsage(Configuration& state, 641 | const std::string& sql_statement, 642 | bool& print_statement) { 643 | 644 | auto create_statement = IsCreateStatement(sql_statement); 645 | if(create_statement == false){ 646 | return; 647 | } 648 | 649 | std::regex pattern("(not null)"); 650 | std::string title = "NOT NULL Usage"; 651 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 652 | 653 | auto message = 654 | "● Use NOT NULL only if the column cannot have a missing value: " 655 | "When you declare a column as NOT NULL, it should be because it would make no sense " 656 | "for the row to exist without a value in that column. " 657 | "Use null to signify a missing value for any data type."; 658 | 659 | CheckPattern(state, 660 | sql_statement, 661 | print_statement, 662 | pattern, 663 | RISK_LEVEL_NONE, 664 | pattern_type, 665 | title, 666 | message, 667 | true); 668 | 669 | } 670 | 671 | void CheckConcatenation(Configuration& state, 672 | const std::string& sql_statement, 673 | bool& print_statement) { 674 | 675 | 676 | std::regex pattern("\\|\\|"); 677 | std::string title = "String Concatenation"; 678 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 679 | 680 | auto message = 681 | "● Use COALESCE for string concatenation of nullable columns: " 682 | "You may need to force a column or expression to be non-null for the sake of " 683 | "simplifying the query logic, but you don't want that value to be stored. " 684 | "Use COALESCE function to construct the concatenated expression so that a " 685 | "null-valued column doesn't make the whole expression become null. " 686 | "EX: SELECT first_name || COALESCE(' ' || middle_initial || ' ', ' ') || last_name " 687 | "AS full_name FROM Accounts;"; 688 | 689 | CheckPattern(state, 690 | sql_statement, 691 | print_statement, 692 | pattern, 693 | RISK_LEVEL_LOW, 694 | pattern_type, 695 | title, 696 | message, 697 | true); 698 | 699 | } 700 | 701 | void CheckGroupByUsage(Configuration& state, 702 | const std::string& sql_statement, 703 | bool& print_statement){ 704 | 705 | std::regex pattern("(group by)"); 706 | std::string title = "GROUP BY Usage"; 707 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 708 | 709 | auto message = 710 | "● Do not reference non-grouped columns: " 711 | "Every column in the select-list of a query must have a single value row " 712 | "per row group. This is called the Single-Value Rule. " 713 | "Columns named in the GROUP BY clause are guaranteed to be exactly one value " 714 | "per group, no matter how many rows the group matches. " 715 | "Most DBMSs report an error if you try to run any query that tries to return " 716 | "a column other than those columns named in the GROUP BY clause or as " 717 | "arguments to aggregate functions. " 718 | "Every expression in the select list must be contained in either an " 719 | "aggregate function or the GROUP BY clause. " 720 | "Follow the single-value rule to avoid ambiguous query results."; 721 | 722 | CheckPattern(state, 723 | sql_statement, 724 | print_statement, 725 | pattern, 726 | RISK_LEVEL_LOW, 727 | pattern_type, 728 | title, 729 | message, 730 | true); 731 | 732 | 733 | } 734 | 735 | void CheckOrderByRand(Configuration& state, 736 | const std::string& sql_statement, 737 | bool& print_statement){ 738 | 739 | std::regex pattern("(order by rand\\()"); 740 | std::string title = "ORDER BY RAND Usage"; 741 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 742 | 743 | auto message = 744 | "● Sorting by a nondeterministic expression (RAND()) means the sorting cannot benefit from an index: " 745 | "There is no index containing the values returned by the random function. " 746 | "That’s the point of them being ran- dom: they are different and " 747 | "unpredictable each time they're selected. This is a problem for the performance " 748 | "of the query, because using an index is one of the best ways of speeding up " 749 | "sorting. The consequence of not using an index is that the query result set " 750 | "has to be sorted by the database using a slow table scan. " 751 | "One technique that avoids sorting the table is to choose a random value " 752 | "between 1 and the greatest primary key value. " 753 | "Still another technique that avoids problems found in the preceding alternatives " 754 | "is to count the rows in the data set and return a random number between 0 and " 755 | "the count. Then use this number as an offset when querying the data set. " 756 | "Some queries just cannot be optimized; consider taking a different approach."; 757 | 758 | CheckPattern(state, 759 | sql_statement, 760 | print_statement, 761 | pattern, 762 | RISK_LEVEL_MEDIUM, 763 | pattern_type, 764 | title, 765 | message, 766 | true); 767 | 768 | } 769 | 770 | void CheckPatternMatching(Configuration& state, 771 | const std::string& sql_statement, 772 | bool& print_statement){ 773 | 774 | std::regex pattern("(\blike\b)|(\bregexp\b)|(\bsimilar to\b)"); 775 | std::string title = "Pattern Matching Usage"; 776 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 777 | 778 | auto message = 779 | "● Avoid using vanilla pattern matching: " 780 | "The most important disadvantage of pattern-matching operators is that " 781 | "they have poor performance. A second problem of simple pattern-matching using LIKE " 782 | "or regular expressions is that it can find unintended matches. " 783 | "It's best to use a specialized search engine technology like Apache Lucene, instead of SQL. " 784 | "Another alternative is to reduce the recurring cost of search by saving the result. " 785 | "Consider using vendor extensions like FULLTEXT INDEX in MySQL. " 786 | "More broadly, you don't have to use SQL to solve every problem."; 787 | 788 | CheckPattern(state, 789 | sql_statement, 790 | print_statement, 791 | pattern, 792 | RISK_LEVEL_MEDIUM, 793 | pattern_type, 794 | title, 795 | message, 796 | true); 797 | 798 | } 799 | 800 | void CheckSpaghettiQuery(Configuration& state, 801 | const std::string& sql_statement, 802 | bool& print_statement){ 803 | 804 | std::regex true_pattern(".+?"); 805 | std::regex false_pattern("pattern must not exist"); 806 | std::regex pattern; 807 | 808 | std::string title = "Spaghetti Query Alert"; 809 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 810 | std::size_t spaghetti_query_char_count = 500; 811 | 812 | if(sql_statement.size() >= spaghetti_query_char_count){ 813 | pattern = true_pattern; 814 | } 815 | else { 816 | pattern = false_pattern; 817 | } 818 | 819 | auto message = 820 | "● Split up a complex spaghetti query into several simpler queries: " 821 | "SQL is a very expressive language—you can accomplish a lot in a single query or statement. " 822 | "But that doesn't mean it's mandatory or even a good idea to approach every task with the " 823 | "assumption it has to be done in one line of code. " 824 | "One common unintended consequence of producing all your results in one query is " 825 | "a Cartesian product. This happens when two of the tables in the query have no condition " 826 | "restricting their relationship. Without such a restriction, the join of two tables pairs " 827 | "each row in the first table to every row in the other table. Each such pairing becomes a " 828 | "row of the result set, and you end up with many more rows than you expect. " 829 | "It's important to consider that these queries are simply hard to write, hard to modify, " 830 | "and hard to debug. You should expect to get regular requests for incremental enhancements " 831 | "to your database applications. Managers want more complex reports and more fields in a " 832 | "user interface. If you design intricate, monolithic SQL queries, it's more costly and " 833 | "time-consuming to make enhancements to them. Your time is worth something, both to you " 834 | "and to your project. " 835 | "Split up a complex spaghetti query into several simpler queries. " 836 | "When you split up a complex SQL query, the result may be many similar queries, " 837 | "perhaps varying slightly depending on data values. Writing these queries is a chore, " 838 | "so it's a good application of SQL code generation. " 839 | "Although SQL makes it seem possible to solve a complex problem in a single line of code, " 840 | "don't be tempted to build a house of cards."; 841 | 842 | CheckPattern(state, 843 | sql_statement, 844 | print_statement, 845 | pattern, 846 | RISK_LEVEL_LOW, 847 | pattern_type, 848 | title, 849 | message, 850 | true); 851 | 852 | } 853 | 854 | void CheckJoinCount(Configuration& state, 855 | const std::string& sql_statement, 856 | bool& print_statement){ 857 | 858 | std::regex pattern("(\bjoin\b)"); 859 | std::string title = "Reduce Number of JOINs"; 860 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 861 | std::size_t min_count = 5; 862 | 863 | auto message = 864 | "● Reduce Number of JOINs: " 865 | "Too many JOINs is a symptom of complex spaghetti queries. Consider splitting " 866 | "up the complex query into many simpler queries, and reduce the number of JOINs"; 867 | 868 | CheckPattern(state, 869 | sql_statement, 870 | print_statement, 871 | pattern, 872 | RISK_LEVEL_LOW, 873 | pattern_type, 874 | title, 875 | message, 876 | true, 877 | min_count); 878 | 879 | } 880 | 881 | void CheckDistinctCount(Configuration& state, 882 | const std::string& sql_statement, 883 | bool& print_statement){ 884 | 885 | std::regex pattern("(\bdistinct\b)"); 886 | std::string title = "Eliminate Unnecessary DISTINCT Conditions"; 887 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 888 | std::size_t min_count = 5; 889 | 890 | auto message = 891 | "● Eliminate Unnecessary DISTINCT Conditions: " 892 | "Too many DISTINCT conditions is a symptom of complex spaghetti queries. " 893 | "Consider splitting up the complex query into many simpler queries, " 894 | "and reduce the number of DISTINCT conditions " 895 | "It is possible that the DISTINCT condition has no effect if a primary key " 896 | "column is part of the result set of columns"; 897 | 898 | CheckPattern(state, 899 | sql_statement, 900 | print_statement, 901 | pattern, 902 | RISK_LEVEL_LOW, 903 | pattern_type, 904 | title, 905 | message, 906 | true, 907 | min_count); 908 | 909 | } 910 | 911 | void CheckImplicitColumns(Configuration& state, 912 | const std::string& sql_statement, 913 | bool& print_statement){ 914 | 915 | std::regex pattern("(insert into \\S+ values)"); 916 | std::string title = "Implicit Column Usage"; 917 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 918 | 919 | auto message = 920 | "● Explicitly name columns: " 921 | "Although using wildcards and unnamed columns satisfies the goal " 922 | "of less typing, this habit creates several hazards. " 923 | "This can break application refactoring and can harm performance. " 924 | "Always spell out all the columns you need, instead of relying on " 925 | "wild-cards or implicit column lists."; 926 | 927 | CheckPattern(state, 928 | sql_statement, 929 | print_statement, 930 | pattern, 931 | RISK_LEVEL_LOW, 932 | pattern_type, 933 | title, 934 | message, 935 | true); 936 | 937 | } 938 | 939 | void CheckHaving(Configuration& state, 940 | const std::string& sql_statement, 941 | bool& print_statement){ 942 | 943 | std::regex pattern("(\bhaving\b)"); 944 | std::string title = "HAVING Clause Usage"; 945 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 946 | 947 | auto message = 948 | "● Consider removing the HAVING clause: " 949 | "Rewriting the query's HAVING clause into a predicate will enable the " 950 | "use of indexes during query processing. " 951 | "EX: SELECT s.cust_id,count(s.cust_id) FROM SH.sales s GROUP BY s.cust_id " 952 | "HAVING s.cust_id != '1660' AND s.cust_id != '2'; can be rewritten as: " 953 | "SELECT s.cust_id,count(cust_id) FROM SH.sales s WHERE s.cust_id != '1660' " 954 | "AND s.cust_id !='2' GROUP BY s.cust_id;"; 955 | 956 | CheckPattern(state, 957 | sql_statement, 958 | print_statement, 959 | pattern, 960 | RISK_LEVEL_LOW, 961 | pattern_type, 962 | title, 963 | message, 964 | true); 965 | 966 | } 967 | 968 | void CheckNesting(Configuration& state, 969 | const std::string& sql_statement, 970 | bool& print_statement){ 971 | 972 | std::regex pattern("(\bselect\b)"); 973 | std::string title = "Nested sub queries"; 974 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 975 | std::size_t min_count = 2; 976 | 977 | auto message = 978 | "● Un-nest sub queries: " 979 | " Rewriting nested queries as joins often leads to more efficient " 980 | "execution and more effective optimization. In general, sub-query unnesting " 981 | "is always done for correlated sub-queries with, at most, one table in " 982 | "the FROM clause, which are used in ANY, ALL, and EXISTS predicates. " 983 | "A uncorrelated sub-query, or a sub-query with more than one table in " 984 | "the FROM clause, is flattened if it can be decided, based on the query " 985 | "semantics, that the sub-query returns at most one row. " 986 | "EX: SELECT * FROM SH.products p WHERE p.prod_id = (SELECT s.prod_id FROM SH.sales " 987 | "s WHERE s.cust_id = 100996 AND s.quantity_sold = 1 ); can be rewritten as: " 988 | "SELECT p.* FROM SH.products p, sales s WHERE p.prod_id = s.prod_id AND " 989 | "s.cust_id = 100996 AND s.quantity_sold = 1;"; 990 | 991 | CheckPattern(state, 992 | sql_statement, 993 | print_statement, 994 | pattern, 995 | RISK_LEVEL_LOW, 996 | pattern_type, 997 | title, 998 | message, 999 | true, 1000 | min_count); 1001 | 1002 | 1003 | } 1004 | 1005 | void CheckOr(Configuration& state, 1006 | const std::string& sql_statement, 1007 | bool& print_statement){ 1008 | 1009 | std::regex pattern("(\bor\b)"); 1010 | std::string title = "OR Usage"; 1011 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 1012 | 1013 | auto message = 1014 | "● Consider using an IN predicate when querying an indexed column: " 1015 | "The IN-list predicate can be exploited for indexed retrieval and also, " 1016 | "the optimizer can sort the IN-list to match the sort sequence of the index, " 1017 | "leading to more efficient retrieval. Note that the IN-list must contain only " 1018 | "constants, or values that are constant during one execution of the query block, " 1019 | "such as outer references. " 1020 | "EX: SELECT s.* FROM SH.sales s WHERE s.prod_id = 14 OR s.prod_id = 17; " 1021 | "can be rewritten as: " 1022 | "SELECT s.* FROM SH.sales s WHERE s.prod_id IN (14, 17);"; 1023 | 1024 | CheckPattern(state, 1025 | sql_statement, 1026 | print_statement, 1027 | pattern, 1028 | RISK_LEVEL_LOW, 1029 | pattern_type, 1030 | title, 1031 | message, 1032 | true); 1033 | 1034 | } 1035 | 1036 | void CheckUnion(Configuration& state, 1037 | const std::string& sql_statement, 1038 | bool& print_statement){ 1039 | 1040 | std::regex pattern("(union)"); 1041 | std::string title = "UNION Usage"; 1042 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 1043 | 1044 | auto message = 1045 | "● Consider using UNION ALL if you do not care about duplicates: " 1046 | "Unlike UNION which removes duplicates, UNION ALL allows duplicate tuples. " 1047 | "If you do not care about duplicate tuples, then using UNION ALL would be " 1048 | "a faster option."; 1049 | 1050 | CheckPattern(state, 1051 | sql_statement, 1052 | print_statement, 1053 | pattern, 1054 | RISK_LEVEL_LOW, 1055 | pattern_type, 1056 | title, 1057 | message, 1058 | true); 1059 | 1060 | } 1061 | 1062 | void CheckDistinctJoin(Configuration& state, 1063 | const std::string& sql_statement, 1064 | bool& print_statement){ 1065 | 1066 | std::regex pattern("(distinct.*join)"); 1067 | std::string title = "DISTINCT & JOIN Usage"; 1068 | PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY; 1069 | 1070 | auto message = 1071 | "● Consider using a sub-query with EXISTS instead of DISTINCT: " 1072 | "The DISTINCT keyword removes duplicates after sorting the tuples. " 1073 | "Instead, consider using a sub query with the EXISTS keyword, you can avoid " 1074 | "having to return an entire table. " 1075 | "EX: SELECT DISTINCT c.country_id, c.country_name FROM SH.countries c, " 1076 | "SH.customers e WHERE e.country_id = c.country_id; " 1077 | "can be rewritten to: " 1078 | "SELECT c.country_id, c.country_name FROM SH.countries c WHERE EXISTS " 1079 | "(SELECT 'X' FROM SH.customers e WHERE e.country_id = c.country_id);"; 1080 | 1081 | CheckPattern(state, 1082 | sql_statement, 1083 | print_statement, 1084 | pattern, 1085 | RISK_LEVEL_LOW, 1086 | pattern_type, 1087 | title, 1088 | message, 1089 | true); 1090 | 1091 | } 1092 | 1093 | 1094 | 1095 | // APPLICATION 1096 | 1097 | void CheckReadablePasswords(Configuration& state, 1098 | const std::string& sql_statement, 1099 | bool& print_statement){ 1100 | 1101 | std::regex pattern("(password varchar)|(password text)|(password =)| " 1102 | "(pwd varchar)|(pwd text)|(pwd =)"); 1103 | std::string title = "Readable Passwords"; 1104 | PatternType pattern_type = PatternType::PATTERN_TYPE_APPLICATION; 1105 | 1106 | auto message = 1107 | "● Do not store readable passwords: " 1108 | "It’s not secure to store a password in clear text or even to pass it over the " 1109 | "network in the clear. If an attacker can read the SQL statement you use to " 1110 | "insert a password, they can see the password plainly. " 1111 | "Additionally, interpolating the user's input string into the SQL query in plain text " 1112 | "exposes it to discovery by an attacker. " 1113 | "If you can read passwords, so can a hacker. " 1114 | "The solution is to encode the password using a one-way cryptographic hash " 1115 | "function. This function transforms its input string into a new string, " 1116 | "called the hash, that is unrecognizable. " 1117 | "Use a salt to thwart dictionary attacks. Don't put the plain-text password " 1118 | "into the SQL query. Instead, compute the hash in your application code, " 1119 | "and use only the hash in the SQL query."; 1120 | 1121 | CheckPattern(state, 1122 | sql_statement, 1123 | print_statement, 1124 | pattern, 1125 | RISK_LEVEL_LOW, 1126 | pattern_type, 1127 | title, 1128 | message, 1129 | true); 1130 | 1131 | } 1132 | 1133 | } // namespace machine 1134 | 1135 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | // MAIN SOURCE 2 | 3 | #include 4 | #include 5 | 6 | #include "checker.h" 7 | #include "include/configuration.h" 8 | 9 | #include "gflags/gflags.h" 10 | 11 | namespace sqlcheck { 12 | 13 | Configuration state; 14 | 15 | } // namespace sqlcheck 16 | 17 | DEFINE_bool(c, false, "Display warnings in color mode"); 18 | DEFINE_bool(color_mode, false, "Display warnings in color mode"); 19 | DEFINE_bool(v, false, "Display verbose warnings"); 20 | DEFINE_bool(verbose, false, "Display verbose warnings"); 21 | DEFINE_string(d, "", "Query delimiter string (default -- ;)"); 22 | DEFINE_string(delimiter, "", "Query delimiter string (default -- ;)"); 23 | DEFINE_bool(h, false, "Print help message"); 24 | DEFINE_uint64(r, sqlcheck::RISK_LEVEL_ALL, 25 | "Set of anti-patterns to check \n" 26 | "1 (all anti-patterns, default) \n" 27 | "2 (only medium and high risk anti-patterns) \n" 28 | "3 (only high risk anti-patterns) \n"); 29 | DEFINE_uint64(risk_level, sqlcheck::RISK_LEVEL_ALL, 30 | "Set of anti-patterns to check \n" 31 | "1 (all anti-patterns, default) \n" 32 | "2 (only medium and high risk anti-patterns) \n" 33 | "3 (only high risk anti-patterns) \n"); 34 | DEFINE_string(f, "", "SQL file name"); // standard input 35 | DEFINE_string(file_name, "", "SQL file name"); // standard input 36 | 37 | void ConfigureChecker(sqlcheck::Configuration &state) { 38 | 39 | // Default Values 40 | state.risk_level = sqlcheck::RISK_LEVEL_ALL; 41 | state.file_name = ""; 42 | state.delimiter = ";"; 43 | state.testing_mode = false; 44 | state.verbose = false; 45 | state.color_mode = false; 46 | state.line_number = 1; 47 | 48 | // Configure checker 49 | state.color_mode = FLAGS_c || FLAGS_color_mode; 50 | state.verbose = FLAGS_v || FLAGS_verbose; 51 | if(FLAGS_f.empty() == false){ 52 | state.file_name = FLAGS_f; 53 | } 54 | if(FLAGS_file_name.empty() == false){ 55 | state.file_name = FLAGS_file_name; 56 | } 57 | if(FLAGS_d.empty() == false){ 58 | state.delimiter = FLAGS_f; 59 | } 60 | if(FLAGS_delimiter.empty() == false){ 61 | state.delimiter = FLAGS_delimiter; 62 | } 63 | if(FLAGS_r != 0){ 64 | state.risk_level = (sqlcheck::RiskLevel) FLAGS_r; 65 | } 66 | if(FLAGS_risk_level != 0){ 67 | state.risk_level = (sqlcheck::RiskLevel) FLAGS_risk_level; 68 | } 69 | 70 | // Run validators 71 | std::cout << "+-------------------------------------------------+\n" 72 | << "| SQLCHECK |\n" 73 | << "+-------------------------------------------------+\n"; 74 | 75 | ValidateRiskLevel(state); 76 | ValidateFileName(state); 77 | ValidateColorMode(state); 78 | ValidateVerbose(state); 79 | ValidateDelimiter(state); 80 | 81 | std::cout << "-------------------------------------------------\n"; 82 | 83 | } 84 | 85 | void Usage() { 86 | std::cout << 87 | "Command line options : sqlcheck \n" 88 | " -f -file_name : SQL file name\n" 89 | " -r -risk_level : Set of anti-patterns to check\n" 90 | " : 1 (all anti-patterns, default) \n" 91 | " : 2 (only medium and high risk anti-patterns) \n" 92 | " : 3 (only high risk anti-patterns) \n" 93 | " -c -color_mode : Display warnings in color mode \n" 94 | " -v -verbose : Display verbose warnings \n" 95 | " -d -delimiter : Query delimiter string (; by default) \n" 96 | " -h -help : Print help message \n"; 97 | } 98 | 99 | int main(int argc, char **argv) { 100 | 101 | bool has_issues = false; 102 | 103 | try { 104 | 105 | // Parse the input arguments from the user 106 | gflags::SetUsageMessage(""); 107 | gflags::SetVersionString("1.2.1"); 108 | 109 | gflags::ParseCommandLineFlags(&argc, &argv, true); 110 | 111 | // Print help message 112 | if(FLAGS_h == true){ 113 | FLAGS_h = false; 114 | Usage(); 115 | 116 | gflags::ShutDownCommandLineFlags(); 117 | return (EXIT_SUCCESS); 118 | } 119 | 120 | // Customize the checker configuration 121 | ConfigureChecker(sqlcheck::state); 122 | 123 | // Invoke the checker 124 | has_issues = sqlcheck::Check(sqlcheck::state); 125 | 126 | } 127 | // Catching at the top level ensures that 128 | // destructors are always called 129 | catch (std::exception& exc) { 130 | std::cerr << exc.what() << std::endl; 131 | gflags::ShutDownCommandLineFlags(); 132 | exit(EXIT_FAILURE); 133 | } 134 | 135 | gflags::ShutDownCommandLineFlags(); 136 | 137 | (has_issues) ? exit(EXIT_FAILURE) : exit(EXIT_SUCCESS); 138 | } 139 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | 3 | ## TESTS 4 | 5 | # Make sure the compiler can find include files for our sqlcheck library 6 | include_directories (${CMAKE_SOURCE_DIR}/src/include) 7 | 8 | # ---[ TEST SUITE 9 | add_executable(test_suite test_suite.cpp) 10 | target_link_libraries(test_suite sqlcheck_library 11 | ${GTEST_BOTH_LIBRARIES} 12 | googletest 13 | ${GLOG_LIBRARIES} 14 | ${CMAKE_THREAD_LIBS_INIT} 15 | ) 16 | add_test(NAME TestSuite COMMAND test_suite) 17 | 18 | # --[ Add "make check" target 19 | 20 | set(CTEST_FLAGS "") 21 | add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} ${CTEST_FLAGS} --verbose) 22 | 23 | -------------------------------------------------------------------------------- /test/test_suite.cpp: -------------------------------------------------------------------------------- 1 | // TEST SUITE 2 | 3 | #include 4 | 5 | #include "checker.h" 6 | 7 | #include 8 | 9 | namespace sqlcheck { 10 | 11 | TEST(TestSuite, SelectStarTest) { 12 | 13 | Configuration default_conf; 14 | default_conf.testing_mode = true; 15 | default_conf.verbose = false; 16 | 17 | std::unique_ptr stream(new std::istringstream()); 18 | stream->str( 19 | "SELECT * FROM FOO;\n" 20 | "\n" 21 | 22 | "SELECT *\n" 23 | "FROM BAR;\n" 24 | ); 25 | 26 | default_conf.test_stream.reset(stream.release()); 27 | 28 | Check(default_conf); 29 | 30 | } 31 | 32 | TEST(TestSuite, MultiValuedAttributeTest) { 33 | 34 | Configuration default_conf; 35 | default_conf.testing_mode = true; 36 | default_conf.verbose = false; 37 | 38 | std::unique_ptr stream(new std::istringstream()); 39 | stream->str( 40 | "SELECT product_id FROM Products WHERE account_id REGEXP '[[:<:]]12[[:>:]]';\n" 41 | 42 | "CREATE TABLE Products (" 43 | "product_id SERIAL PRIMARY KEY," 44 | "product_name VARCHAR(1000)," 45 | "account_id VARCHAR(100));\n" 46 | 47 | ); 48 | 49 | default_conf.test_stream.reset(stream.release()); 50 | 51 | Check(default_conf); 52 | 53 | } 54 | 55 | TEST(TestSuite, RecursiveDependencyTest) { 56 | 57 | Configuration default_conf; 58 | default_conf.testing_mode = true; 59 | default_conf.verbose = false; 60 | 61 | std::unique_ptr stream(new std::istringstream()); 62 | stream->str( 63 | 64 | "CREATE TABLE Comments (" 65 | "comment_id SERIAL PRIMARY KEY," 66 | "parent_id BIGINT UNSIGNED," 67 | "FOREIGN KEY (parent_id) REFERENCES Comments(comment_id));\n" 68 | 69 | ); 70 | 71 | default_conf.test_stream.reset(stream.release()); 72 | 73 | Check(default_conf); 74 | 75 | } 76 | 77 | TEST(TestSuite, PrimaryKeyExistsTest) { 78 | 79 | Configuration default_conf; 80 | default_conf.testing_mode = true; 81 | default_conf.verbose = false; 82 | 83 | std::unique_ptr stream(new std::istringstream()); 84 | stream->str( 85 | 86 | "CREATE TABLE ArticleTags (" 87 | "article_id BIGINT UNSIGNED NOT NULL," 88 | "tag_id BIGINT UNSIGNED NOT NULL,);\n" 89 | 90 | "CREATE TABLE BugsProducts (" 91 | "id SERIAL PRIMARY KEY," 92 | "bug_id BIGINT UNSIGNED NOT NULL," 93 | "product_id BIGINT UNSIGNED NOT NULL," 94 | "UNIQUE KEY (bug_id, product_id)," 95 | "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id)," 96 | "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n" 97 | 98 | "CREATE TABLE BugsProducts (" 99 | "bug_id BIGINT UNSIGNED NOT NULL," 100 | "id SERIAL PRIMARY KEY," 101 | "product_id BIGINT UNSIGNED NOT NULL," 102 | "UNIQUE KEY (bug_id, product_id)," 103 | "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id)," 104 | "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n" 105 | ); 106 | 107 | default_conf.test_stream.reset(stream.release()); 108 | 109 | Check(default_conf); 110 | 111 | } 112 | 113 | TEST(TestSuite, GenericPrimaryKeyTest) { 114 | 115 | Configuration default_conf; 116 | default_conf.testing_mode = true; 117 | default_conf.verbose = false; 118 | 119 | std::unique_ptr stream(new std::istringstream()); 120 | stream->str( 121 | 122 | "CREATE TABLE BugsProducts (" 123 | "id SERIAL PRIMARY KEY," 124 | "bug_id BIGINT UNSIGNED NOT NULL," 125 | "product_id BIGINT UNSIGNED NOT NULL," 126 | "UNIQUE KEY (bug_id, product_id)," 127 | "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id)," 128 | "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n" 129 | 130 | "CREATE TABLE BugsProducts (" 131 | "bug_id BIGINT UNSIGNED NOT NULL," 132 | "id SERIAL PRIMARY KEY," 133 | "product_id BIGINT UNSIGNED NOT NULL," 134 | "UNIQUE KEY (bug_id, product_id)," 135 | "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id)," 136 | "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n" 137 | ); 138 | 139 | default_conf.test_stream.reset(stream.release()); 140 | 141 | Check(default_conf); 142 | 143 | } 144 | 145 | TEST(TestSuite, ForeignKeyExistsTest) { 146 | 147 | Configuration default_conf; 148 | default_conf.testing_mode = true; 149 | default_conf.verbose = false; 150 | 151 | std::unique_ptr stream(new std::istringstream()); 152 | stream->str( 153 | 154 | "CREATE TABLE BugsProducts (" 155 | "id SERIAL PRIMARY KEY," 156 | "bug_id BIGINT UNSIGNED NOT NULL," 157 | "product_id BIGINT UNSIGNED NOT NULL," 158 | "UNIQUE KEY (bug_id, product_id));\n" 159 | 160 | ); 161 | 162 | default_conf.test_stream.reset(stream.release()); 163 | 164 | Check(default_conf); 165 | 166 | } 167 | 168 | TEST(TestSuite, VariableAttributeTest) { 169 | 170 | Configuration default_conf; 171 | default_conf.testing_mode = true; 172 | default_conf.verbose = false; 173 | 174 | std::unique_ptr stream(new std::istringstream()); 175 | stream->str( 176 | 177 | "CREATE TABLE IssueAttributes (" 178 | "issue_id BIGINT UNSIGNED NOT NULL," 179 | "attr_name VARCHAR(100) NOT NULL," 180 | "attr_value VARCHAR(100)," 181 | "PRIMARY KEY (issue_id, attr_name)," 182 | "FOREIGN KEY (issue_id) REFERENCES Issues(issue_id)" 183 | ");" 184 | 185 | ); 186 | 187 | default_conf.test_stream.reset(stream.release()); 188 | 189 | Check(default_conf); 190 | 191 | } 192 | 193 | TEST(TestSuite, MultiColumnAttributeTest) { 194 | 195 | Configuration default_conf; 196 | default_conf.testing_mode = true; 197 | default_conf.verbose = false; 198 | 199 | std::unique_ptr stream(new std::istringstream()); 200 | stream->str( 201 | 202 | "CREATE TABLE Bugs (" 203 | "bug_id SERIAL PRIMARY KEY," 204 | "description VARCHAR(1000)," 205 | "tag1 VARCHAR(20)," 206 | "tag2 VARCHAR(20)," 207 | "tag3 VARCHAR(20)" 208 | ");" 209 | 210 | ); 211 | 212 | default_conf.test_stream.reset(stream.release()); 213 | 214 | Check(default_conf); 215 | 216 | } 217 | 218 | TEST(TestSuite, MetadataTribblesTest) { 219 | 220 | Configuration default_conf; 221 | default_conf.testing_mode = true; 222 | default_conf.verbose = false; 223 | 224 | std::unique_ptr stream(new std::istringstream()); 225 | stream->str( 226 | 227 | "CREATE TABLE ProjectHistory (\n" // 1 228 | "bugs_fixed_2008 INT,\n" // 2 229 | "bugs_fixed_2009 INT,\n" // 3 230 | "bugs_fixed_2010 INT,\n" // 4 231 | "2010 INT" // 5 232 | ");\n" // 5 233 | 234 | "CREATE TABLE Bugs_2008 ( . . . );\n" // 6 235 | 236 | "ALTER TABLE Customers ADD (revenue2002 NUMBER(9,2));\n" // 7 237 | 238 | "CREATE TABLE Bugs_2009 (-- other columns" // 8 239 | "date_reported DATE CHECK (EXTRACT(YEAR FROM date_reported) = 2009));\n" // 8 240 | 241 | ); 242 | 243 | default_conf.test_stream.reset(stream.release()); 244 | 245 | Check(default_conf); 246 | 247 | } 248 | 249 | TEST(TestSuite, FloatTest) { 250 | 251 | Configuration default_conf; 252 | default_conf.testing_mode = true; 253 | default_conf.verbose = false; 254 | 255 | std::unique_ptr stream(new std::istringstream()); 256 | stream->str( 257 | 258 | "ALTER TABLE Bugs ADD COLUMN hours FLOAT;\n" 259 | 260 | "SELECT * FROM Accounts WHERE ABS(hourly_rate - 59.95) < 0.0000001;\n" 261 | 262 | "ALTER TABLE Bugs ADD COLUMN hours NUMERIC(9,2);\n" 263 | 264 | ); 265 | 266 | default_conf.test_stream.reset(stream.release()); 267 | 268 | Check(default_conf); 269 | 270 | } 271 | 272 | void PrintMatches(const std::string& in, const std::string& re){ 273 | 274 | std::smatch m; 275 | std::regex_search(in, m, std::regex(re)); 276 | 277 | if(m.empty()) { 278 | std::cout << "input=[" << in << "], regex=[" << re << "]: NO MATCH\n"; 279 | } else { 280 | std::cout << "input=[" << in << "], regex=[" << re << "]: "; 281 | std::cout << "prefix=[" << m.prefix() << "] "; 282 | for(std::size_t n = 0; n < m.size(); ++n) 283 | std::cout << " m[" << n << "]=[" << m[n] << "] "; 284 | std::cout << "suffix=[" << m.suffix() << "]\n"; 285 | } 286 | 287 | } 288 | 289 | TEST(TestSuite, RegexTests) { 290 | 291 | // greedy match, repeats [a-z] 4 times 292 | PrintMatches("abcdefghi", "a[a-z]{2,4}"); 293 | 294 | // non-greedy match, repeats [a-z] 2 times 295 | PrintMatches("abcdefghi", "a[a-z]{2,4}?"); 296 | 297 | // Choice point ordering for quantifiers results in a match 298 | // with two repetitions, first matching the substring "aa", 299 | // second matching the substring "ba", leaving "ac" not matched 300 | // ("ba" appears in the capture clause m[1]) 301 | PrintMatches("aabaac", "(aa|aabaac|ba|b|c)*"); 302 | 303 | // Choice point ordering for quantifiers makes this regex 304 | // calculate the greatest common divisor between 10 and 15 305 | // (the answer is 5, and it populates m[1] with "aaaaa") 306 | PrintMatches("aaaaaaaaaa,aaaaaaaaaaaaaaa", "^(a+)\\1*,\\1+$"); 307 | 308 | // the substring "bbb" does not appear in the capture clause m[4] 309 | // because it is cleared when the second repetition of the atom 310 | // (a+)?(b+)?(c) is matching the substring "ac" 311 | PrintMatches("zaacbbbcac", "(z)((a+)?(b+)?(c))*"); 312 | 313 | } 314 | 315 | TEST(TestSuite, MetadataRegexTests) { 316 | 317 | std::string regex = "[A-za-z\\-_@]+[0-9]+ "; 318 | 319 | // metadata tribbles 320 | PrintMatches("bugs_2004", regex); 321 | PrintMatches("bugs8_foo", regex); 322 | 323 | // metadata tribbles 324 | PrintMatches("bugs_2004", regex); 325 | PrintMatches("CREATE TABLE Bugs_2008 ( . . . )", regex); 326 | PrintMatches("CREATE TABLE Bugs_200843214 ( . . . )", regex); 327 | PrintMatches("CREATE TABLE tag8 ( . . . )", regex); 328 | 329 | } 330 | 331 | TEST(TestSuite, PhysicalDesignTests) { 332 | 333 | Configuration default_conf; 334 | default_conf.testing_mode = true; 335 | default_conf.verbose = false; 336 | 337 | std::unique_ptr stream(new std::istringstream()); 338 | stream->str( 339 | 340 | "CREATE TABLE Bugs (" 341 | "-- other columns" 342 | "status ENUM('NEW', 'IN PROGRESS', 'FIXED'), );\n" 343 | 344 | "CREATE TABLE Bugs (" 345 | "-- other columns" 346 | "status VARCHAR(20) CHECK (status IN ('NEW', 'IN PROGRESS', 'FIXED')) );\n" 347 | 348 | "ALTER TABLE Bugs MODIFY COLUMN status ENUM('NEW', 'IN PROGRESS', 'CODE COMPLETE', 'VERIFIED');\n" 349 | 350 | "CREATE TABLE Screenshots (" 351 | "bug_id BIGINT UNSIGNED NOT NULL," 352 | "image_id BIGINT UNSIGNED NOT NULL," 353 | "screenshot_path VARCHAR(100)," 354 | "caption VARCHAR(100)," 355 | "PRIMARY KEY (bug_id, image_id)," 356 | "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id)" 357 | ");\n" 358 | 359 | "unlink('images/screenshot1234-1.jpg' );\n" 360 | 361 | "CREATE TABLE Bugs (" 362 | "bug_id SERIAL PRIMARY KEY," 363 | "date_reported DATE NOT NULL," 364 | "INDEX (bug_id)," 365 | "INDEX (summary)," 366 | "INDEX (hours)," 367 | "INDEX (bug_id, date_reported, status)" 368 | ");\n" 369 | 370 | "CREATE INDEX TelephoneBook ON Accounts(last_name, first_name);\n" 371 | 372 | ); 373 | 374 | default_conf.test_stream.reset(stream.release()); 375 | 376 | Check(default_conf); 377 | 378 | } 379 | 380 | TEST(TestSuite, EnumFalsePositive) { 381 | Configuration default_conf; 382 | default_conf.testing_mode = true; 383 | default_conf.verbose = false; 384 | 385 | std::unique_ptr stream(new std::istringstream()); 386 | stream->str( 387 | "CREATE TABLE Bugsin (" 388 | "bug_id SERIAL PRIMARY KEY," 389 | "phonenum varchar(20)," 390 | "date_reported DATE NOT NULL" 391 | ");\n" 392 | 393 | "CREATE TABLE Screenshots (" 394 | "bug_id BIGINT UNSIGNED NOT NULL REFERENCES Bugsin (bug_id)," 395 | "image_id BIGINT UNSIGNED NOT NULL," 396 | "screenshot_path VARCHAR(100)," 397 | "caption VARCHAR(100)," 398 | "PRIMARY KEY (bug_id, image_id)," 399 | "FOREIGN KEY (bug_id) REFERENCES Bugsin(bug_id)" 400 | ");\n" 401 | ); 402 | default_conf.test_stream.reset(stream.release()); 403 | Check(default_conf); 404 | } 405 | 406 | TEST(TestSuite, QueryTests) { 407 | 408 | Configuration default_conf; 409 | default_conf.testing_mode = true; 410 | default_conf.verbose = false; 411 | 412 | std::unique_ptr stream(new std::istringstream()); 413 | stream->str( 414 | 415 | "SELECT * FROM Bugs WHERE assigned_to IS NULL OR assigned_to <> 1;\n" 416 | 417 | "SELECT first_name || ' ' || last_name AS full_name FROM Accounts;\n" 418 | 419 | "SELECT product_id, MAX(date_reported) AS latest," 420 | "MIN(date_reported) AS earliest, bug_id" 421 | "FROM Bugs JOIN BugsProducts USING (bug_id)" 422 | "GROUP BY product_id;\n" 423 | 424 | "SELECT product_id, MAX(date_reported) AS latest" 425 | "FROM Bugs JOIN BugsProducts USING (bug_id)" 426 | "GROUP BY product_id;\n" 427 | 428 | "SELECT * FROM Bugs ORDER BY RAND() LIMIT 1;\n" 429 | 430 | "SELECT b1.*" 431 | "FROM Bugs AS b1" 432 | "JOIN (SELECT CEIL(RAND() * (SELECT MAX(bug_id) FROM Bugs)) AS rand_id) AS b2" 433 | "ON (b1.bug_id = b2.rand_id);\n" 434 | 435 | "SELECT * FROM Bugs WHERE description LIKE '%crash%';\n" 436 | 437 | "SELECT * FROM Bugs WHERE description REGEXP 'crash';\n" 438 | 439 | "SELECT COUNT(bp.product_id) AS how_many_products," 440 | "COUNT(dev.account_id) AS how_many_developers," 441 | "COUNT(b.bug_id)/COUNT(dev.account_id) AS avg_bugs_per_developer," 442 | "COUNT(cust.account_id) AS how_many_customers" 443 | "FROM Bugs b JOIN BugsProducts bp ON (b.bug_id = bp.bug_id)" 444 | "JOIN Accounts dev ON (b.assigned_to = dev.account_id)" 445 | "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'" 446 | "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'" 447 | "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'" 448 | "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'" 449 | "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'" 450 | "GROUP BY bp.product_id;" 451 | 452 | "SELECT AVG(bugs_per_developer) AS average_bugs_per_developer" 453 | "FROM (SELECT dev.account_id, COUNT(*) AS bugs_per_developer" 454 | "FROM Bugs b JOIN Accounts dev" 455 | "ON (b.assigned_to = dev.account_id)" 456 | "WHERE b.status = 'FIXED' GROUP BY dev.account_id) t;\n" 457 | 458 | "INSERT INTO Bugs VALUES (DEFAULT, CURDATE(), 'New bug', 'Test T987 fails...'," 459 | "NULL, 123, NULL, NULL, DEFAULT, 'Medium', NULL);\n" 460 | 461 | "INSERT INTO Accounts (account_name, first_name, last_name, email," 462 | "password_hash, portrait_image, hourly_rate)" 463 | "VALUES ('bkarwin', 'Bill', 'Karwin', 'bill@example.com', SHA2('xyzzy'), NULL, 49.95);\n" 464 | 465 | "CREATE TABLE Accounts (" 466 | "account_id SERIAL PRIMARY KEY," 467 | "account_name VARCHAR(20) NOT NULL," 468 | "email VARCHAR(100) NOT NULL," 469 | "password VARCHAR(30) NOT NULL" 470 | ");\n" 471 | 472 | "SELECT * FROM Accounts" 473 | "WHERE account_name = 'bill' AND password = 'opensesame';\n" 474 | 475 | "SELECT s.cust_id,count(s.cust_id) FROM SH.sales s GROUP BY s.cust_id\n" 476 | "HAVING s.cust_id != '1660' AND s.cust_id != '2';\n" 477 | 478 | "SELECT * FROM SH.products p WHERE p.prod_id = (SELECT s.prod_id FROM SH.sales s" 479 | "WHERE s.cust_id = 100996 AND s.quantity_sold = 1 );\n" 480 | 481 | "SELECT s.* FROM SH.sales s WHERE s.prod_id = 14 OR s.prod_id = 17;\n" 482 | 483 | "SELECT cust_id FROM SH.sales UNION SELECT cust_id FROM customers;\n" 484 | 485 | "SELECT DISTINCT c.country_id, c.country_name FROM SH.countries c JOIN SH.customers e" 486 | "WHERE e.country_id = c.country_id;\n" 487 | 488 | ); 489 | 490 | default_conf.test_stream.reset(stream.release()); 491 | 492 | Check(default_conf); 493 | 494 | } 495 | 496 | TEST(TestSuite, JoinEqualityTest) { 497 | Configuration default_conf; 498 | default_conf.testing_mode = true; 499 | default_conf.verbose = false; 500 | 501 | std::unique_ptr stream(new std::istringstream()); 502 | stream->str( 503 | "SELECT baz.id\n" 504 | "FROM foo\n" 505 | "LEFT JOIN bar ON bar\n" 506 | "JOIN baz ON baz = foo.baz;" 507 | ); 508 | 509 | default_conf.test_stream.reset(stream.release()); 510 | 511 | Check(default_conf); 512 | 513 | } 514 | 515 | TEST(TestSuite, SingleColumnTable) { 516 | Configuration default_conf; 517 | default_conf.testing_mode = true; 518 | default_conf.verbose = true; 519 | 520 | std::unique_ptr stream(new std::istringstream()); 521 | stream->str( 522 | "CREATE TABLE sometable(singlecolumn int);" 523 | ); 524 | 525 | default_conf.test_stream.reset(stream.release()); 526 | 527 | Check(default_conf); 528 | } 529 | 530 | } // End machine sqlcheck 531 | --------------------------------------------------------------------------------