├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── bootstrap
├── docs
    ├── application
    │   └── 4001.md
    ├── logical
    │   ├── 1001.md
    │   ├── 1002.md
    │   ├── 1003.md
    │   ├── 1004.md
    │   ├── 1005.md
    │   ├── 1006.md
    │   └── 1007.md
    ├── physical
    │   ├── 2001.md
    │   ├── 2002.md
    │   ├── 2003.md
    │   ├── 2004.md
    │   └── 2005.md
    └── query
    │   ├── 3001.md
    │   ├── 3002.md
    │   ├── 3003.md
    │   ├── 3004.md
    │   ├── 3005.md
    │   ├── 3006.md
    │   ├── 3007.md
    │   ├── 3008.md
    │   ├── 3009.md
    │   ├── 3010.md
    │   ├── 3011.md
    │   ├── 3012.md
    │   ├── 3013.md
    │   ├── 3014.md
    │   ├── 3015.md
    │   └── 3016.md
├── examples
    ├── auctionmark-ddl.sql
    ├── query4.sql
    ├── si-ddl.sql
    └── top_mutexes.sql
├── script
    └── packages.sh
├── src
    ├── CMakeLists.txt
    ├── checker.cpp
    ├── configuration.cpp
    ├── include
    │   ├── checker.h
    │   ├── color.h
    │   ├── configuration.h
    │   └── list.h
    ├── list.cpp
    └── main.cpp
└── test
    ├── CMakeLists.txt
    └── test_suite.cpp


/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: main
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - master
 8 |   pull_request:
 9 |     branches:
10 |       - master
11 | 
12 | jobs:
13 |   build-linux-macos:
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       matrix:
17 |         os: ['ubuntu-latest', 'macos-latest']
18 |       fail-fast: false
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - name: run build
23 |         run: |
24 |           git submodule update --init --recursive
25 |           mkdir build
26 |           cd build && cmake -DCMAKE_BUILD_TYPE=RELEASE ..
27 |           make
28 | 
29 |       - name: run tests
30 |         run: ./build/test/test_suite
31 | 
32 |       - name: create release artifacts/packages
33 |         run: |
34 |           cd build
35 |           make package
36 | 
37 |       - name: upload artifacts
38 |         if: github.event_name == 'workflow_dispatch'
39 |         uses: actions/upload-artifact@v4
40 |         with:
41 |           name: release-packages
42 |           path: ./build/sqlcheck-x86_64.*
43 | 
44 |   build-windows:
45 |     runs-on: ${{ matrix.os }}
46 |     strategy:
47 |       matrix:
48 |         os: ['windows-latest']
49 |       fail-fast: false
50 |     steps:
51 |       - uses: actions/checkout@v4
52 | 
53 |       - name: Add msbuild to PATH
54 |         uses: microsoft/setup-msbuild@v1.1
55 |         with:
56 |           msbuild-architecture: x64
57 | 
58 |       - name: run build
59 |         run: |
60 |           git submodule update --init --recursive
61 |           mkdir build
62 |           cd build
63 |           cmake -DCMAKE_BUILD_TYPE=RELEASE ..
64 |           msbuild ALL_BUILD.vcxproj /t:Build /p:Configuration=Release
65 | 
66 |       - name: run tests
67 |         run: ./build/test/Release/test_suite.exe
68 | 
69 |       - name: upload artifacts
70 |         if: github.event_name == 'workflow_dispatch'
71 |         uses: actions/upload-artifact@v4
72 |         with:
73 |           name: release-packages
74 |           path: ./build/bin/Release/sqlcheck.*
75 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | #==============================================================================#
  2 | # This file specifies intentionally untracked files that git should ignore.
  3 | #==============================================================================#
  4 | 
  5 | #==============================================================================#
  6 | # File extensions to be ignored anywhere in the tree.
  7 | #==============================================================================#
  8 | # Temp files created by most text editors.
  9 | *~
 10 | # Merge files created by git.
 11 | *.orig
 12 | # Byte compiled python modules.
 13 | *.pyc
 14 | # vim swap files
 15 | .*.sw?
 16 | .sw?
 17 | #OS X specific files.
 18 | .DS_store
 19 | # Core files
 20 | #core
 21 | 
 22 | #==============================================================================#
 23 | # Explicit files to ignore (only matches one).
 24 | #==============================================================================#
 25 | # Various tag programs
 26 | /tags
 27 | /TAGS
 28 | /GPATH
 29 | /GRTAGS
 30 | /GSYMS
 31 | /GTAGS
 32 | .gitusers
 33 | autom4te.cache
 34 | cscope.files
 35 | cscope.out
 36 | autoconf/aclocal.m4
 37 | autoconf/autom4te.cache
 38 | /compile_commands.json
 39 | 
 40 | #==============================================================================#
 41 | # Directories to ignore (do not add trailing '/'s, they skip symlinks).
 42 | #==============================================================================#
 43 | # External projects that are tracked independently.
 44 | projects/*
 45 | !projects/*.*
 46 | !projects/Makefile
 47 | 
 48 | 
 49 | #==============================================================================#
 50 | # Autotools artifacts
 51 | #==============================================================================#
 52 | config/
 53 | configure
 54 | config-h.in
 55 | autom4te.cache
 56 | *Makefile.in
 57 | *Makefile
 58 | libtool
 59 | aclocal.m4
 60 | config.log
 61 | config.status
 62 | stamp-h1
 63 | config.h
 64 | m4/libtool.m4
 65 | m4/ltoptions.m4
 66 | m4/ltsugar.m4
 67 | m4/ltversion.m4
 68 | m4/lt~obsolete.m4
 69 | 
 70 | #==============================================================================#
 71 | # Build artifacts
 72 | #==============================================================================#
 73 | #m4/
 74 | build/
 75 | #*.m4
 76 | *.o
 77 | *.lo
 78 | *.la
 79 | *~
 80 | *.pdf
 81 | *.swp
 82 | a.out
 83 | 
 84 | #==============================================================================#
 85 | # Kate Swap Files
 86 | #==============================================================================#
 87 | *.kate-swp
 88 | .#kate-*
 89 | 
 90 | #==============================================================================#
 91 | # Backup artifacts
 92 | #==============================================================================#
 93 | ~*
 94 | *~
 95 | tmp/
 96 | 
 97 | #==============================================================================#
 98 | # Doxygen artifacts
 99 | #==============================================================================#
100 | doc/
101 | 
102 | #==============================================================================#
103 | # KDevelop files
104 | #==============================================================================#
105 | .kdev4
106 | *.kdev4
107 | .dirstamp
108 | .deps
109 | .libs
110 | 
111 | #==============================================================================#
112 | # Eclipse files
113 | #==============================================================================#
114 | .wtpmodules
115 | .classpath
116 | .project
117 | .cproject
118 | .pydevproject
119 | .settings
120 | .autotools
121 | 
122 | /Debug/
123 | /misc/
124 | 
125 | #==============================================================================#
126 | # Intellij files
127 | #==============================================================================#
128 | .idea
129 | *.iml
130 | 
131 | #==============================================================================#
132 | # Code Coverage files
133 | #==============================================================================#
134 | *.gcno
135 | *.gcda
136 | 
137 | #==============================================================================#
138 | # Scripts
139 | #==============================================================================#
140 | *.jar
141 | scripts/PelotonTest/out
142 | scripts/PelotonTest/lib
143 | 
144 | #==============================================================================#
145 | # Protobuf
146 | #==============================================================================#
147 | *.pb-c.c
148 | *.pb-c.h
149 | *.pb.cc
150 | *.pb.h
151 | *.pb.go
152 | 
153 | #==============================================================================#
154 | # Third party
155 | #==============================================================================#
156 | third_party/nanomsg/
157 | third_party/nvml/
158 | third_party/logcabin/
159 | 
160 | #==============================================================================#
161 | # Eclipse
162 | #==============================================================================#
163 | 
164 | .metadata
165 | bin/
166 | tmp/
167 | *.tmp
168 | *.bak
169 | *.swp
170 | *~.nib
171 | local.properties
172 | .settings/
173 | .loadpath
174 | .recommenders
175 | 
176 | # Eclipse Core
177 | .project
178 | 
179 | # External tool builders
180 | .externalToolBuilders/
181 | 
182 | # Locally stored "Eclipse launch configurations"
183 | *.launch
184 | 
185 | # PyDev specific (Python IDE for Eclipse)
186 | *.pydevproject
187 | 
188 | # CDT-specific (C/C++ Development Tooling)
189 | .cproject
190 | 
191 | # JDT-specific (Eclipse Java Development Tools)
192 | .classpath
193 | 
194 | # Java annotation processor (APT)
195 | .factorypath
196 | 
197 | # PDT-specific (PHP Development Tools)
198 | .buildpath
199 | 
200 | # sbteclipse plugin
201 | .target
202 | 
203 | # Tern plugin
204 | .tern-project
205 | 
206 | # TeXlipse plugin
207 | .texlipse
208 | 
209 | # STS (Spring Tool Suite)
210 | .springBeans
211 | 
212 | # Code Recommenders
213 | .recommenders/
214 | CMakeLists.txt
215 | io_file
216 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "external/googletest"]
2 | 	path = external/googletest
3 |     url = https://github.com/google/googletest.git
4 | [submodule "external/gflags"]
5 | 	path = external/gflags
6 | 	url = https://github.com/gflags/gflags.git
7 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | 
  3 | #Preamble
  4 | cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
  5 | project (sqlcheck)
  6 | 
  7 | enable_language(C)
  8 | enable_language(CXX)
  9 | 
 10 | # ---[ C++ Flags
 11 | if(CMAKE_CXX_COMPILER_ID MATCHES GNU)
 12 |     set(CMAKE_CXX_FLAGS         "-Wall -Wno-unknown-pragmas -Wno-sign-compare -Woverloaded-virtual -Wno-write-strings -Wno-error=maybe-uninitialized")
 13 |     set(CMAKE_CXX_FLAGS_DEBUG   "-O0 -g3")
 14 |     set(CMAKE_CXX_FLAGS_RELEASE "-O3")
 15 | endif()
 16 | 
 17 | IF(CMAKE_BUILD_TYPE MATCHES COVERAGE)
 18 |     message("Coverage mode")
 19 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage")
 20 | ENDIF(CMAKE_BUILD_TYPE MATCHES COVERAGE)
 21 | 
 22 | # ---[ CTest
 23 | include(CTest)
 24 | 
 25 | #Environment detection
 26 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/")
 27 | 
 28 | # Compiler version
 29 | # Compiler-specific C++11 activation.
 30 | if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
 31 |     execute_process(
 32 |         COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
 33 |     if (NOT (GCC_VERSION VERSION_GREATER 4.9 OR GCC_VERSION VERSION_EQUAL 4.9))
 34 |         message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.9 or greater.")
 35 |     endif ()
 36 | elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
 37 |     execute_process(
 38 |         COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE CLANG_VERSION)
 39 | else()
 40 |     message(WARNING "You are using an unsupported compiler! Compilation has only been tested with GCC.")
 41 | endif ()
 42 | 
 43 | # ---[ C++11 Flags
 44 | include(CheckCXXCompilerFlag)
 45 | CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11)
 46 | if(COMPILER_SUPPORTS_CXX11)
 47 | 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
 48 | else()
 49 |     message(STATUS "The compiler ${CMAKE_CXX_COMPILER} has no C++11 support. Please use a different C++ compiler.")
 50 | endif()
 51 | 
 52 | # -- [ Debug Flags
 53 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb")
 54 | 
 55 | # -- [ GTest
 56 | enable_testing()
 57 | set(GOOGLETEST_ROOT external/googletest/googletest CACHE STRING "Google Test source root")
 58 | include_directories(
 59 |     ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}
 60 |     ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}/include
 61 |     )
 62 | set(GOOGLETEST_SOURCES
 63 |     ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}/src/gtest-all.cc
 64 |     ${PROJECT_SOURCE_DIR}/${GOOGLETEST_ROOT}/src/gtest_main.cc
 65 |     )
 66 | foreach(_source ${GOOGLETEST_SOURCES})
 67 |     set_source_files_properties(${_source} PROPERTIES GENERATED 1)
 68 | endforeach()
 69 | 
 70 | add_library(googletest ${GOOGLETEST_SOURCES})
 71 | 
 72 | # -- [ GFlags
 73 | 
 74 | set(GFLAGS_ROOT external/gflags CACHE STRING "Google Flags")
 75 | 
 76 | add_subdirectory(${PROJECT_SOURCE_DIR}/${GFLAGS_ROOT})
 77 | 
 78 | include_directories(
 79 |     ${PROJECT_BINARY_DIR}/${GFLAGS_ROOT}/include
 80 | )
 81 | 
 82 | # --[ Threads
 83 | 
 84 | find_package(Threads REQUIRED)
 85 | 
 86 | 
 87 | # --[ Flags
 88 | if(UNIX OR APPLE)
 89 |     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall -Wextra -Werror -Wno-writable-strings")
 90 | endif()
 91 | 
 92 | # -- [Info
 93 | 
 94 | # Choose the type of build.  Example: SET(CMAKE_BUILD_TYPE Debug) 
 95 | MESSAGE( STATUS "CMAKE_BUILD_TYPE: " ${CMAKE_BUILD_TYPE} )
 96 | # the compiler used for C++ files 
 97 | MESSAGE( STATUS "CMAKE_CXX_COMPILER: " ${CMAKE_CXX_COMPILER} )
 98 | # the compiler flags for compiling C++ sources 
 99 | MESSAGE( STATUS "CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS} )
100 | 
101 | 
102 | # --[ Packaging
103 | 
104 | SET(VERSION "1.3")
105 | SET(CPACK_PACKAGE_NAME "sqlcheck")
106 | SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Automatically identify anti-patterns in SQL queries.")
107 | SET(CPACK_PACKAGE_DESCRIPTION "SQLCheck automates the detection of common anti-patterns in SQL queries. 
108 | Such anti-patterns often increase the time taken to run the queries on a database management system. 
109 | Addressing these anti-patterns will, therefore, help improve the runtime performance of the queries. 
110 | This tool targets all SQL dialects.")
111 | SET(CPACK_PACKAGE_VERSION ${VERSION})
112 | SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "Joy Arulraj") #required
113 | 
114 | set(CPACK_PACKAGE_EXECUTABLES "sqlcheck")
115 | set(CPACK_MONOLITHIC_INSTALL TRUE)
116 | if (WIN32)
117 |     # NSIS Setup
118 |     SET(CPACK_GENERATOR "NSIS")
119 |     SET(CPACK_NSIS_MODIFY_PATH TRUE)
120 |     SET(CPACK_NSIS_EXECUTABLES_DIRECTORY "${DISPLAZ_BIN_DIR}")
121 |     SET(CPACK_NSIS_ENABLE_UNINSTALL_BEFORE_INSTALL TRUE)
122 | elseif(APPLE)
123 |     # OSX Bundle generator setup
124 |     SET(CPACK_GENERATOR "DragNDrop")
125 |     SET(CPACK_BUNDLE_NAME "sqlcheck")
126 | elseif(UNIX)
127 |     SET(CPACK_GENERATOR "TGZ;RPM;DEB;ZIP")
128 | endif()
129 | SET(CPACK_STRIP_FILES TRUE)
130 | 
131 | SET(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
132 | 
133 | INCLUDE(CPack)
134 | 
135 | # ---[ Subdirectories
136 | add_subdirectory(src)
137 | add_subdirectory(test)
138 | 
139 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![GitHub license](https://img.shields.io/badge/license-apache-blue.svg?style=flat)](https://www.apache.org/licenses/LICENSE-2.0)
  3 | [![Version](https://img.shields.io/badge/version-1.3-red.svg)](https://github.com/jarulraj/sqlcheck/)
  4 | [![main](https://github.com/jarulraj/sqlcheck/actions/workflows/main.yml/badge.svg)](https://github.com/jarulraj/sqlcheck/actions/workflows/main.yml)
  5 | [![Coverage Status](https://coveralls.io/repos/github/jarulraj/sqlcheck/badge.svg?branch=master)](https://coveralls.io/github/jarulraj/sqlcheck?branch=master)
  6 | 
  7 | ## What Is SQLCheck?
  8 | 
  9 | `sqlcheck` automatically detects common SQL anti-patterns. Such anti-patterns often slow down queries. Addressing them will, therefore, help accelerate queries.
 10 | 
 11 | `sqlcheck` targets **all major SQL dialects**.
 12 | 
 13 | For development updates on `sqlcheck` and general news on <a href="https://faculty.cc.gatech.edu/~jarulraj/">next generation  database systems</a>, follow me at <a class="twitter-follow-button" href="https://twitter.com/joy_arulraj">@joy_arulraj</a>.
 14 | 
 15 | ## What it can do
 16 | 
 17 | Right now SQLCheck can detect the following anti-patterns:
 18 | 
 19 | ### Logical Database Design Anti-Patterns
 20 | 
 21 |   * [Multi-Valued Attribute](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1001.md)
 22 |   * [Recursive Dependency](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1002.md)
 23 |   * [Primary Key Does Not Exist](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1003.md)
 24 |   * [Generic Primary Key](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1004.md)
 25 |   * [Foreign Key Does Not Exist](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1005.md)
 26 |   * [Entity-Attribute-Value Pattern](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1006.md)
 27 |   * [Metadata Tribbles](https://github.com/jarulraj/sqlcheck/blob/master/docs/logical/1007.md)
 28 | 
 29 | ### Physical Database Design Anti-Patterns
 30 | 
 31 |   * [Imprecise Data Type](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2001.md)
 32 |   * [Values In Definition](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2002.md)
 33 |   * [Files Are Not SQL Data Types](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2003.md)
 34 |   * [Too Many Indexes](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2004.md)
 35 |   * [Index Attribute Order](https://github.com/jarulraj/sqlcheck/blob/master/docs/physical/2005.md)
 36 | 
 37 | ### Query Anti-Patterns
 38 | 
 39 |   * [SELECT *](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3001.md)
 40 |   * [NULL Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3002.md)
 41 |   * [NOT NULL Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3003.md)
 42 |   * [String Concatenation](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3004.md)
 43 |   * [GROUP BY Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3005.md)
 44 |   * [ORDER BY RAND Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3006.md)
 45 |   * [Pattern Matching Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3007.md)
 46 |   * [Spaghetti Query Alert](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3008.md)
 47 |   * [Reduce Number of JOINs](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3009.md)
 48 |   * [Eliminate Unnecessary DISTINCT Conditions](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3010.md)
 49 |   * [Implicit Column Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3011.md)
 50 |   * [HAVING Clause Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3012.md)
 51 |   * [Nested sub queries](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3013.md)
 52 |   * [OR Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3014.md)
 53 |   * [UNION Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3015.md)
 54 |   * [DISTINCT & JOIN Usage](https://github.com/jarulraj/sqlcheck/blob/master/docs/query/3016.md)
 55 | 
 56 | ### Application Development Anti-Patterns
 57 | 
 58 |   * [Readable Passwords](https://github.com/jarulraj/sqlcheck/blob/master/docs/application/4001.md)
 59 | 
 60 | ## Installation
 61 | 
 62 | ###UBUNTU/MINT (DEBIAN)
 63 | 
 64 | 1. Download the **debian package** from the latest release.
 65 | ```shell
 66 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.deb
 67 | ```
 68 | 
 69 | 2. Install it by running the following command.
 70 | ```shell
 71 | dpkg -i sqlcheck-x86_64.deb
 72 | ```
 73 | 
 74 | ### FEDORA/CENTOS (RPM)
 75 | 
 76 | 1. Download the **rpm package** from the latest release.
 77 | ```shell
 78 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.rpm
 79 | ```
 80 | 
 81 | 2. Install it by running the following command.
 82 | ```shell
 83 | yum --nogpgcheck localinstall sqlcheck-x86_64.rpm 
 84 | ```
 85 | 
 86 | ### MAC (DMG)
 87 | 
 88 | 1. Download the **dmg package** from the latest release.
 89 | ```shell
 90 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.dmg
 91 | ```
 92 | 
 93 | 2. Click the dmg to mount the package. This will mount it in the `Volumes` directory.
 94 | 
 95 | 3. Open the `Terminal` app. [This page](http://blog.teamtreehouse.com/introduction-to-the-mac-os-x-command-line) contains more details on finding the app.
 96 | 
 97 | 4. Copy over the SQLCheck binary file to desired local directory.
 98 | ```shell
 99 | cp /Volumes/sqlcheck-x86_64/bin/sqlcheck /usr/local/bin/
100 | ```
101 | 
102 | 
103 | ### WINDOWS (EXE)
104 | 
105 | 1. Download the **exe file** from the latest release.
106 | ```shell
107 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck.exe
108 | ```
109 | 
110 | 2. Open a terminal (like `cmd.exe`) and run the executable.
111 | 
112 | ### ZIP
113 | 
114 | 1. Download the **zip package** from the latest release.
115 | ```shell
116 | wget https://github.com/jarulraj/sqlcheck/releases/download/v1.3/sqlcheck-x86_64.zip
117 | ```
118 | 
119 | 2. Unzip it and find the SQLCheck binary (`bin/sqlcheck`).
120 | ```shell
121 | unzip sqlcheck-x86_64.zip
122 | ```
123 | 
124 | ## SOURCE CODE
125 | 
126 | SQLCheck has the following software dependencies:
127 | 
128 | - **g++ 4.9+** 
129 | - **cmake** ([Cmake installation guide](https://cmake.org/install/))
130 | 
131 | First, clone the repository (with **--recursive** option).
132 | 
133 | ```shell
134 | git clone --recursive https://github.com/jarulraj/sqlcheck.git
135 | ```
136 | 
137 | Next, run the following commands to build and install SQLCheck:
138 | 
139 | ```shell
140 | ./bootstrap
141 | cd build
142 | cmake -DCMAKE_BUILD_TYPE=RELEASE ..
143 | make
144 | make install
145 | ```
146 | 
147 | 
148 | ### TESTS
149 | 
150 | To run SQLCheck unit tests during development, run:
151 | 
152 | ```shell
153 | ./build/test/test_suite
154 | ```
155 | 
156 | ## Usage
157 | 
158 | ```
159 | $ sqlcheck -h
160 | 
161 | Command line options : sqlcheck <options>
162 |    -f --file_name          :  file name
163 |    -r --risk_level         :  set of anti-patterns to check
164 |                            :  1 (all anti-patterns, default) 
165 |                            :  2 (only medium and high risk anti-patterns) 
166 |                            :  3 (only high risk anti-patterns) 
167 |    -c --color_mode         :  color mode 
168 |    -v --verbose_mode       :  verbose mode
169 | ```   
170 | 
171 | ```sql
172 | $ sqlcheck -f examples/top_mutexes.sql -v 
173 | 
174 | -------------------------------------------------
175 | > RISK LEVEL    :: ALL ANTI-PATTERNS
176 | > SQL FILE NAME :: examples/top_mutexes.sql
177 | -------------------------------------------------
178 | ==================== Results ===================
179 | 
180 | -------------------------------------------------
181 | SQL Statement: with top_mutexes as ( select--+ leading(t1 s1 v1 v2 t2 s2) use_hash(s1)
182 | use_nl(v1) use_hash(s2) materialize t1.hsecs ,s1.* ,s2.sleeps as end_sleeps
183 | ,s2.wait_time as end_wait_time ,s2.sleeps-s1.sleeps as delta_sleeps ,t2.hsecs -
184 | t1.hsecs as delta_hsecs --,s2.* from v$timer t1 ,v$mutex_sleep s1 ,(select/*+
185 | no_merge */ sum(level) a from dual connect by level<=1e6) v1 ,v$timer t2
186 | ,v$mutex_sleep s2 where s1.mutex_type=s2.mutex_type and s1.location=s2.location
187 | ) select * from top_mutexes order by delta_sleeps desc;
188 | ```
189 | 
190 | ```
191 | [examples/top_mutexes.sql]: (HIGH RISK) (QUERY ANTI-PATTERN) SELECT *
192 | 
193 | ● Inefficiency in moving data to the consumer:
194 | 
195 | When you SELECT *, you're often retrieving more columns from the database than
196 | your application really needs to function. This causes more data to move from
197 | the database server to the client, slowing access and increasing load on your
198 | machines, as well as taking more time to travel across the network. This is
199 | especially true when someone adds new columns to underlying tables that didn't
200 | exist and weren't needed when the original consumers coded their data access.   
201 | 
202 | 
203 | ● Indexing issues:
204 | 
205 | Consider a scenario where you want to tune a query to a high level of
206 | performance. If you were to use *, and it returned more columns than you
207 | actually needed, the server would often have to perform more expensive methods
208 | to retrieve your data than it otherwise might. For example, you wouldn't be able
209 | to create an index which simply covered the columns in your SELECT list, and
210 | even if you did (including all columns [shudder]), the next developer who came around
211 | and added a column to the underlying table would cause the optimizer to ignore
212 | your optimized covering index, and you'd likely find that the performance of
213 | your query would drop substantially for no readily apparent reason.    
214 | [Matching Expression: select *]
215 | 
216 | [examples/top_mutexes.sql]: (LOW RISK) (QUERY ANTI-PATTERN) Spaghetti Query Alert   
217 | 
218 | ● Split up a complex spaghetti query into several simpler queries:
219 | 
220 | SQL is a very expressive language—you can accomplish a lot in a single query
221 | or statement. But that doesn't mean it's mandatory or even a good idea to
222 | approach every task with the assumption it has to be done in one line of code.
223 | One common unintended consequence of producing all your results in one query is
224 | a Cartesian product. This happens when two of the tables in the query have no
225 | condition restricting their relationship. Without such a restriction, the join
226 | of two tables pairs each row in the first table to every row in the other table.
227 | Each such pairing becomes a row of the result set, and you end up with many more
228 | rows than you expect. 
229 | 
230 | It's important to consider that these queries are simply
231 | hard to write, hard to modify, and hard to debug. You should expect to get
232 | regular requests for incremental enhancements to your database applications.
233 | Managers want more complex reports and more fields in a user interface. If you
234 | design intricate, monolithic SQL queries, it's more costly and time-consuming to
235 | make enhancements to them. Your time is worth something, both to you and to your
236 | project. Split up a complex spaghetti query into several simpler queries. When
237 | you split up a complex SQL query, the result may be many similar queries,
238 | perhaps varying slightly depending on data values. Writing these queries is a
239 | chore, so it's a good application of SQL code generation. Although SQL makes it
240 | seem possible to solve a complex problem in a single line of code, don't be
241 | tempted to build a house of cards.   
242 | 
243 | ==================== Summary ===================   
244 | All Anti-Patterns  :: 2   
245 | >  High Risk   :: 1   
246 | >  Medium Risk :: 0   
247 | >  Low Risk    :: 1   
248 | 
249 | ```
250 | 
251 | ## References
252 | 
253 | (1) SQL Anti-patterns: Avoiding the Pitfalls of Database Programming, Bill Karwin  
254 | (2) [Common SQL Anti-patterns](https://stackoverflow.com/questions/346659/what-are-the-most-common-sql-anti-patterns), StackOverflow
255 | 
256 | ## Contributions
257 | 
258 | Contributions to SQLCheck are always welcome. You can contribute in different ways:
259 | * Open an issue with suggestions for improvements and errors you're facing;
260 | * Fork this repository and submit a pull request;
261 | * Improve the documentation.
262 | 
263 | ## License
264 | 
265 | Licensed under the [Apache License](LICENSE).
266 | 


--------------------------------------------------------------------------------
/bootstrap:
--------------------------------------------------------------------------------
1 | #! /bin/sh -e
2 | # bootstrap -- Use this script to create generated files from the CVS dist
3 | 
4 | if [ ! -d "build" ] ; then
5 |     mkdir build
6 | fi
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/application/4001.md:
--------------------------------------------------------------------------------
 1 | # Readable Passwords
 2 | 
 3 | ## Do not store readable passwords:   
 4 | It's not secure to store a password in clear text or even to pass it over the
 5 | network in the clear. If an attacker can read the SQL statement you use to
 6 | insert a password, they can see the password plainly.
 7 | Additionally, interpolating the user's input string into the SQL query in plain text
 8 | exposes it to discovery by an attacker.
 9 | If you can read passwords, so can a hacker.
10 | The solution is to encode the password using a one-way cryptographic hash 
11 | function. This function transforms its input string into a new string,
12 | called the hash, that is unrecognizable.
13 | Use a salt to thwart dictionary attacks. Don't put the plain-text password
14 | into the SQL query. Instead, compute the hash in your application code,
15 | and use only the hash in the SQL query.


--------------------------------------------------------------------------------
/docs/logical/1001.md:
--------------------------------------------------------------------------------
 1 | # Multi-Valued Attribute
 2 | 
 3 | ## **Store each value in its own column and row:**     
 4 | Storing a list of IDs as a VARCHAR/TEXT column can cause performance and data integrity
 5 | problems. Querying against such a column would require using pattern-matching
 6 | expressions. It is awkward and costly to join a comma-separated list to matching rows.
 7 | This will make it harder to validate IDs. Think about what is the greatest number of
 8 | entries this list must support? Instead of using a multi-valued attribute,
 9 | consider storing it in a separate table, so that each individual value of that attribute
10 | occupies a separate row. Such an intersection table implements a many-to-many relationship
11 | between the two referenced tables. This will greatly simplify querying and validating
12 | the IDs.


--------------------------------------------------------------------------------
/docs/logical/1002.md:
--------------------------------------------------------------------------------
 1 | # Recursive Dependency
 2 | 
 3 |   ## **Avoid recursive relationships:**   
 4 |   It's common for data to have recursive relationships. Data may be organized in a
 5 |   treelike or hierarchical way. However, creating a foreign key constraint to enforce
 6 |   the relationship between two columns in the same table lends to awkward querying.
 7 |   Each level of the tree corresponds to another join. You will need to issue recursive
 8 |   queries to get all descendants or all ancestors of a node.
 9 |   A solution is to construct an additional closure table. It involves storing all paths
10 |   through the tree, not just those with a direct parent-child relationship.
11 |   You might want to compare different hierarchical data designs -- closure table,
12 |   path enumeration, nested sets -- and pick one based on your application's needs.


--------------------------------------------------------------------------------
/docs/logical/1003.md:
--------------------------------------------------------------------------------
 1 | # Primary Key Does Not Exist
 2 | 
 3 |   ## **Consider adding a primary key:**
 4 |   A primary key constraint is important when you need to do the following:
 5 |   prevent a table from containing duplicate rows,
 6 |   reference individual rows in queries, and
 7 |   support foreign key references
 8 |   If you don't use primary key constraints, you create a chore for yourself:
 9 |   checking for duplicate rows. More often than not, you will need to define
10 |   a primary key for every table. Use compound keys when they are appropriate.
11 | 


--------------------------------------------------------------------------------
/docs/logical/1004.md:
--------------------------------------------------------------------------------
1 | # Generic Primary Key
2 | 
3 |  ## **Skip using a generic primary key (id):**   
4 |   Adding an id column to every table causes several effects that make its
5 |   use seem arbitrary. You might end up creating a redundant key or allow
6 |   duplicate rows if you add this column in a compound key.
7 |   The name id is so generic that it holds no meaning. This is especially
8 |   important when you join two tables and they have the same primary
9 |   key column name.


--------------------------------------------------------------------------------
/docs/logical/1005.md:
--------------------------------------------------------------------------------
 1 | # Foreign Key Does Not Exist
 2 | 
 3 |   ## **Consider adding a foreign key:**   
 4 |   Are you leaving out the application constraints? Even though it seems at
 5 |   first that skipping foreign key constraints makes your database design
 6 |   simpler, more flexible, or speedier, you pay for this in other ways.
 7 |   It becomes your responsibility to write code to ensure referential integrity
 8 |   manually. Use foreign key constraints to enforce referential integrity.
 9 |   Foreign keys have another feature you can't mimic using application code:
10 |   cascading updates to multiple tables. This feature allows you to
11 |   update or delete the parent row and lets the database takes care of any child
12 |   rows that reference it. The way you declare the ON UPDATE or ON DELETE clauses
13 |   in the foreign key constraint allow you to control the result of a cascading
14 |   operation. Make your database mistake-proof with constraints.
15 | 


--------------------------------------------------------------------------------
/docs/logical/1006.md:
--------------------------------------------------------------------------------
 1 | # Entity-Attribute-Value Pattern
 2 | 
 3 | 
 4 |   ## **Dynamic schema with variable attributes:**  
 5 |   Are you trying to create a schema where you can define new attributes
 6 |   at runtime.? This involves storing attributes as rows in an attribute table.
 7 |   This is referred to as the Entity-Attribute-Value or schemaless pattern.
 8 |   When you use this pattern,  you sacrifice many advantages that a conventional
 9 |   database design would have given you. You can't make mandatory attributes.
10 |   You can't enforce referential integrity. You might find that attributes are
11 |   not being named consistently. A solution is to store all related types in one table,
12 |   with distinct columns for every attribute that exists in any type
13 |   (Single Table Inheritance). Use one attribute to define the subtype of a given row.
14 |   Many attributes are subtype-specific, and these columns must
15 |   be given a null value on any row storing an object for which the attribute
16 |   does not apply the columns with non-null values become sparse.
17 |   Another solution is to create a separate table for each subtype
18 |   (Concrete Table Inheritance). A third solution mimics inheritance,
19 |   as though tables were object-oriented classes (Class Table Inheritance).
20 |   Create a single table for the base type, containing attributes common to
21 |   all subtypes. Then for each subtype, create another table, with a primary key
22 |   that also serves as a foreign key to the base table.
23 |   If you have many subtypes or if you must support new attributes frequently,
24 |   you can add a BLOB column to store data in a format such as XML or JSON,
25 |   which encodes both the attribute names and their values.
26 |   This design is best when you can't limit yourself to a finite set of subtypes
27 |   and when you need complete flexibility to define new attributes at any time.


--------------------------------------------------------------------------------
/docs/logical/1007.md:
--------------------------------------------------------------------------------
 1 | # Metadata Tribbles
 2 | 
 3 |   ## **Breaking down a table or column by year/user/etc.:**   
 4 |   You might be trying to split a single column into multiple columns,
 5 |   using column names based on distinct values in another attribute.
 6 |   For each year or user, you will need to add one more column or table.
 7 |   You are mixing metadata with data. You will now need to make sure that
 8 |   the primary key values are unique across all the split columns or tables.
 9 |   The solution is to use a feature called sharding or horizontal partitioning.
10 |   ```(PARTITION BY HASH ( YEAR(...) )```. With this feature, you can gain the
11 |   benefits of splitting a large table without the drawbacks.
12 |   Partitioning is not defined in the SQL standard, so each brand of database
13 |   implements it in their own nonstandard way.
14 |   Another remedy for metadata tribbles is to create a dependent table.
15 |   Instead of one row per entity with multiple columns for each year,
16 |   use multiple rows. Don't let data spawn metadata.
17 |    
18 |   ## **Store each value with the same meaning in a single column:**   
19 |   Creating multiple columns in a table with the same prefix 
20 |   indicates that you are trying to store a multivalued attribute. 
21 |   This design makes it hard to add or remove values,
22 |   to ensure the uniqueness of values, and handling growing sets of values.
23 |   The best solution is to create a dependent table with one column for the
24 |   multivalued attribute. Store the multiple values in multiple rows instead of
25 |   multiple columns and define a foreign key in the dependent table to associate
26 |   the values to its parent row.


--------------------------------------------------------------------------------
/docs/physical/2001.md:
--------------------------------------------------------------------------------
 1 | # Imprecise Data Type
 2 | 
 3 | ## Use precise data types:   
 4 | Virtually any use of FLOAT, REAL, or DOUBLE PRECISION data types is suspect.
 5 | Most applications that use floating-point numbers don't require the range of
 6 | values supported by IEEE 754 formats. The cumulative impact of inexact 
 7 | floating-point numbers is severe when calculating aggregates.
 8 | Instead of FLOAT or its siblings, use the NUMERIC or DECIMAL SQL data types
 9 | for fixed-precision fractional numbers. These data types store numeric values
10 | exactly, up to the precision you specify in the column definition.
11 | Do not use FLOAT if you can avoid it.


--------------------------------------------------------------------------------
/docs/physical/2002.md:
--------------------------------------------------------------------------------
 1 | # Values In Definition
 2 | 
 3 | ## Don't specify values in column definition:
 4 | With enum, you declare the values as strings,
 5 | but internally the column is stored as the ordinal number of the string
 6 | in the enumerated list. The storage is therefore compact, but when you
 7 | sort a query by this column, the result is ordered by the ordinal value,
 8 | not alphabetically by the string value. You may not expect this behavior.
 9 | Not all databases allow you to add or remove a value from an ENUM or check
10 | constraint; you can only redefine the column with a new set of values.
11 | Moreover, if you make a value obsolete, you could upset historical data.
12 | As a matter of policy, changing metadata — that is, changing the definition
13 | of tables and columns—should be infrequent and with attention to testing and
14 | quality assurance. There's a better solution to restrict values in a column:
15 | create a lookup table with one row for each value you allow.
16 | Then declare a foreign key constraint on the old table referencing
17 | the new table.
18 | Use metadata when validating against a fixed set of values.
19 | Use data when validating against a fluid set of values.
20 | 


--------------------------------------------------------------------------------
/docs/physical/2003.md:
--------------------------------------------------------------------------------
 1 | # Files Are Not SQL Data Types
 2 | 
 3 | ## Resources outside the database are not managed by the database:   
 4 | It's common for programmers to be unequivocal that we should always
 5 | store files external to the database.
 6 | Files don't obey DELETE, transaction isolation, rollback, or work well with
 7 | database backup tools. They do not obey SQL access privileges and are not SQL
 8 | data types.
 9 | Resources outside the database are not managed by the database.
10 | You should consider storing blobs inside the database instead of in
11 | external files. You can save the contents of a BLOB column to a file.


--------------------------------------------------------------------------------
/docs/physical/2004.md:
--------------------------------------------------------------------------------
 1 | # Too Many Indexes
 2 | 
 3 | ## Don't create too many indexes:   
 4 | You benefit from an index only if you run queries that use that index.
 5 | There's no benefit to creating indexes that you don't use.
 6 | If you cover a database table with indexes, you incur a lot of overhead
 7 | with no assurance of payoff.
 8 | Consider dropping unnecessary indexes.
 9 | If an index provides all the columns we need, then we don't need to read
10 | rows of data from the table at all. Consider using such covering indexes.
11 | Know your data, know your queries, and maintain the right set of indexes.
12 | 


--------------------------------------------------------------------------------
/docs/physical/2005.md:
--------------------------------------------------------------------------------
 1 | # Index Attribute Order
 2 | 
 3 | ## Align the index attribute order with queries:   
 4 | If you create a compound index for the columns, make sure that the query
 5 | attributes are in the same order as the index attributes, so that the DBMS
 6 | can use the index while processing the query.
 7 | If the query and index attribute orders are not aligned, then the DBMS might
 8 | be unable to use the index during query processing.
 9 | 
10 | ### Example
11 | 
12 | ```
13 | CREATE INDEX TelephoneBook ON Accounts(last_name, first_name)   
14 | SELECT * FROM Accounts ORDER BY first_name, last_name
15 | ```
16 | 


--------------------------------------------------------------------------------
/docs/query/3001.md:
--------------------------------------------------------------------------------
 1 | # SELECT *
 2 | 
 3 | ## Inefficiency in moving data to the consumer:   
 4 | When you SELECT *, you're often retrieving more columns from the database than
 5 | your application really needs to function. This causes more data to move from
 6 | the database server to the client, slowing access and increasing load on your
 7 | machines, as well as taking more time to travel across the network. This is
 8 | especially true when someone adds new columns to underlying tables that didn't
 9 | exist and weren't needed when the original consumers coded their data access.   
10 | 
11 | ## Indexing issues:   
12 | Consider a scenario where you want to tune a query to a high level of performance.
13 | If you were to use *, and it returned more columns than you actually needed,
14 | the server would often have to perform more expensive methods to retrieve your
15 | data than it otherwise might. For example, you wouldn't be able to create an index
16 | which simply covered the columns in your SELECT list, and even if you did
17 | (including all columns [shudder]), the next developer who came around and added a column
18 | to the underlying table would cause the optimizer to ignore your optimized covering
19 | index, and you'd likely find that the performance of your query would drop
20 | substantially for no readily apparent reason.   
21 | 
22 | ## Binding Problems:   
23 | When you SELECT *, it's possible to retrieve two columns of the same name from two
24 | different tables. This can often crash your data consumer. Imagine a query that joins
25 | two tables, both of which contain a column called \ID\. How would a consumer know
26 | which was which? SELECT * can also confuse views (at least in some versions SQL Server)
27 | when underlying table structures change -- the view is not rebuilt, and the data which
28 | comes back can be nonsense. And the worst part of it is that you can take care to name
29 | your columns whatever you want, but the next developer who comes along might have no way of
30 | knowing that he has to worry about adding a column which will collide with your
31 | already-developed names.   
32 | 


--------------------------------------------------------------------------------
/docs/query/3002.md:
--------------------------------------------------------------------------------
 1 | # NULL Usage
 2 | 
 3 | 
 4 | ## Use NULL as a Unique Value:   
 5 | NULL is not the same as zero. A number ten greater than an unknown is still an unknown.
 6 | NULL is not the same as a string of zero length.
 7 | Combining any string with NULL in standard SQL returns NULL.
 8 | NULL is not the same as false. Boolean expressions with AND, OR, and NOT also produce
 9 | results that some people find confusing.
10 | When you declare a column as NOT NULL, it should be because it would make no sense
11 | for the row to exist without a value in that column.
12 | Use null to signify a missing value for any data type.
13 | 


--------------------------------------------------------------------------------
/docs/query/3003.md:
--------------------------------------------------------------------------------
1 | # NOT NULL Usage
2 | 
3 | ## Use NOT NULL only if the column cannot have a missing value:   
4 | When you declare a column as NOT NULL, it should be because it would make no sense
5 | for the row to exist without a value in that column.
6 | Use null to signify a missing value for any data type.
7 | 


--------------------------------------------------------------------------------
/docs/query/3004.md:
--------------------------------------------------------------------------------
 1 | # String Concatenation
 2 | 
 3 | ## Use COALESCE for string concatenation of nullable columns:   
 4 | You may need to force a column or expression to be non-null for the sake of
 5 | simplifying the query logic, but you don't want that value to be stored.
 6 | Use COALESCE function to construct the concatenated expression so that a
 7 | null-valued column doesn't make the whole expression become null.
 8 | 
 9 | ### Example
10 | 
11 | ```
12 | SELECT first_name || COALESCE(' ' || middle_initial || ' ', ' ') || last_name
13 | AS full_name FROM Accounts
14 | ```


--------------------------------------------------------------------------------
/docs/query/3005.md:
--------------------------------------------------------------------------------
 1 | # GROUP BY Usage
 2 | 
 3 | ## Do not reference non-grouped columns:   
 4 | Every column in the select-list of a query must have a single value row
 5 | per row group. This is called the Single-Value Rule.
 6 | Columns named in the GROUP BY clause are guaranteed to be exactly one value
 7 | per group, no matter how many rows the group matches.
 8 | Most DBMSs report an error if you try to run any query that tries to return
 9 | a column other than those columns named in the GROUP BY clause or as
10 | arguments to aggregate functions.
11 | Every expression in the select list must be contained in either an
12 | aggregate function or the GROUP BY clause.
13 | Follow the single-value rule to avoid ambiguous query results.
14 | 


--------------------------------------------------------------------------------
/docs/query/3006.md:
--------------------------------------------------------------------------------
 1 | # ORDER BY RAND Usage
 2 | 
 3 | ## Sorting by a nondeterministic expression (RAND()) means the sorting cannot benefit from an index:   
 4 | There is no index containing the values returned by the random function.
 5 | That's the point of them being ran- dom: they are different and
 6 | unpredictable each time they're selected. This is a problem for the performance
 7 | of the query, because using an index is one of the best ways of speeding up
 8 | sorting. The consequence of not using an index is that the query result set
 9 | has to be sorted by the database using a slow table scan.
10 | One technique that avoids sorting the table is to choose a random value
11 | between 1 and the greatest primary key value.
12 | Still another technique that avoids problems found in the preceding alternatives
13 | is to count the rows in the data set and return a random number between 0 and
14 | the count. Then use this number as an offset when querying the data set.
15 | Some queries just cannot be optimized consider taking a different approach.


--------------------------------------------------------------------------------
/docs/query/3007.md:
--------------------------------------------------------------------------------
 1 | # Pattern Matching Usage
 2 | 
 3 | ## Avoid using vanilla pattern matching:   
 4 | The most important disadvantage of pattern-matching operators is that
 5 | they have poor performance. A second problem of simple pattern-matching using LIKE
 6 | or regular expressions is that it can find unintended matches.
 7 | It's best to use a specialized search engine technology like Apache Lucene, instead of SQL.
 8 | Another alternative is to reduce the recurring cost of search by saving the result.
 9 | Consider using vendor extensions like FULLTEXT INDEX in MySQL.
10 | More broadly, you don't have to use SQL to solve every problem.


--------------------------------------------------------------------------------
/docs/query/3008.md:
--------------------------------------------------------------------------------
 1 | # Spaghetti Query Alert
 2 | 
 3 | ## Split up a complex spaghetti query into several simpler queries:   
 4 | SQL is a very expressive language—you can accomplish a lot in a single query or statement.
 5 | But that doesn't mean it's mandatory or even a good idea to approach every task with the
 6 | assumption it has to be done in one line of code.
 7 | 
 8 | One common unintended consequence of producing all your results in one query is
 9 | a Cartesian product. This happens when two of the tables in the query have no condition
10 | restricting their relationship. Without such a restriction, the join of two tables pairs
11 | each row in the first table to every row in the other table. Each such pairing becomes a
12 | row of the result set, and you end up with many more rows than you expect.
13 | 
14 | It's important to consider that these queries are simply hard to write, hard to modify,
15 | and hard to debug. You should expect to get regular requests for incremental enhancements
16 | to your database applications. Managers want more complex reports and more fields in a
17 | user interface. If you design intricate, monolithic SQL queries, it's more costly and
18 | time-consuming to make enhancements to them. Your time is worth something, both to you
19 | and to your project.
20 | 
21 | Split up a complex spaghetti query into several simpler queries.
22 | When you split up a complex SQL query, the result may be many similar queries,
23 | perhaps varying slightly depending on data values. Writing these queries is a chore,
24 | so it's a good application of SQL code generation.
25 | Although SQL makes it seem possible to solve a complex problem in a single line of code,
26 | don't be tempted to build a house of cards.
27 | 


--------------------------------------------------------------------------------
/docs/query/3009.md:
--------------------------------------------------------------------------------
1 | # Reduce Number of JOINs
2 | 
3 | ## Reduce Number of JOINs:   
4 | Too many JOINs is a symptom of complex spaghetti queries. Consider splitting
5 | up the complex query into many simpler queries, and reduce the number of JOINs
6 | 


--------------------------------------------------------------------------------
/docs/query/3010.md:
--------------------------------------------------------------------------------
1 | # Eliminate Unnecessary DISTINCT Conditions
2 | 
3 | ## Eliminate Unnecessary DISTINCT Conditions:   
4 | Too many DISTINCT conditions is a symptom of complex spaghetti queries.
5 | Consider splitting up the complex query into many simpler queries,
6 | and reduce the number of DISTINCT conditions
7 | It is possible that the DISTINCT condition has no effect if a primary key
8 | column is part of the result set of columns


--------------------------------------------------------------------------------
/docs/query/3011.md:
--------------------------------------------------------------------------------
1 | # Implicit Column Usage
2 | 
3 | ## Explicitly name columns:   
4 | Although using wildcards and unnamed columns satisfies the goal
5 | of less typing, this habit creates several hazards.
6 | This can break application refactoring and can harm performance.
7 | Always spell out all the columns you need, instead of relying on
8 | wild-cards or implicit column lists.


--------------------------------------------------------------------------------
/docs/query/3012.md:
--------------------------------------------------------------------------------
 1 | # HAVING Clause Usage
 2 | 
 3 | ## Consider removing the HAVING clause:   
 4 | Rewriting the query's HAVING clause into a predicate will enable the
 5 | use of indexes during query processing.
 6 | 
 7 | ### Example
 8 | 
 9 | ```
10 | SELECT s.cust_id,count(s.cust_id) FROM SH.sales s GROUP BY s.cust_id
11 | HAVING s.cust_id != '1660' AND s.cust_id != '2'
12 | ```
13 | can be rewritten as:   
14 | ```
15 | SELECT s.cust_id,count(cust_id) FROM SH.sales s WHERE s.cust_id != '1660'
16 | AND s.cust_id !='2' GROUP BY s.cust_id
17 | ```


--------------------------------------------------------------------------------
/docs/query/3013.md:
--------------------------------------------------------------------------------
 1 | # Nested sub queries
 2 | 
 3 | ## Un-nest sub queries:   
 4 |  Rewriting nested queries as joins often leads to more efficient
 5 | execution and more effective optimization. In general, sub-query unnesting
 6 | is always done for correlated sub-queries with, at most, one table in
 7 | the FROM clause, which are used in ANY, ALL, and EXISTS predicates.
 8 | A uncorrelated sub-query, or a sub-query with more than one table in
 9 | the FROM clause, is flattened if it can be decided, based on the query
10 | semantics, that the sub-query returns at most one row.
11 | 
12 | ### Example
13 | 
14 | ```
15 | SELECT * FROM SH.products p WHERE p.prod_id = (SELECT s.prod_id FROM SH.sales
16 | s WHERE s.cust_id = 100996 AND s.quantity_sold = 1 )
17 | ```
18 | can be rewritten as:   
19 | ```
20 | SELECT p.* FROM SH.products p, sales s WHERE p.prod_id = s.prod_id AND
21 | s.cust_id = 100996 AND s.quantity_sold = 1
22 | ```
23 | 


--------------------------------------------------------------------------------
/docs/query/3014.md:
--------------------------------------------------------------------------------
 1 | # OR Usage
 2 | 
 3 | ## Consider using an IN predicate when querying an indexed column:   
 4 | The IN-list predicate can be exploited for indexed retrieval and also,
 5 | the optimizer can sort the IN-list to match the sort sequence of the index,
 6 | leading to more efficient retrieval. Note that the IN-list must contain only
 7 | constants, or values that are constant during one execution of the query block,
 8 | such as outer references.
 9 | 
10 | ### Example
11 | 
12 | ```
13 | SELECT s.* FROM SH.sales s WHERE s.prod_id = 14 OR s.prod_id = 17
14 | ```  
15 | can be rewritten as:   
16 | ```
17 | SELECT s.* FROM SH.sales s WHERE s.prod_id IN (14, 17)
18 | ```


--------------------------------------------------------------------------------
/docs/query/3015.md:
--------------------------------------------------------------------------------
1 | # UNION Usage
2 | 
3 | ## Consider using UNION ALL if you do not care about duplicates:   
4 | Unlike UNION which removes duplicates, UNION ALL allows duplicate tuples.
5 | If you do not care about duplicate tuples, then using UNION ALL would be
6 | a faster option.


--------------------------------------------------------------------------------
/docs/query/3016.md:
--------------------------------------------------------------------------------
 1 | # DISTINCT & JOIN Usage
 2 | 
 3 | ## Consider using a sub-query with EXISTS instead of DISTINCT:   
 4 | The DISTINCT keyword removes duplicates after sorting the tuples.
 5 | Instead, consider using a sub query with the EXISTS keyword, you can avoid
 6 | having to return an entire table.
 7 | 
 8 | ### Example
 9 | 
10 | ```
11 | SELECT DISTINCT c.country_id, c.country_name FROM SH.countries c,
12 | SH.customers e WHERE e.country_id = c.country_id
13 | ```
14 | can be rewritten to:   
15 | ```
16 | SELECT c.country_id, c.country_name FROM SH.countries c WHERE  EXISTS
17 | (SELECT 'X' FROM  SH.customers e WHERE e.country_id = c.country_id)
18 | ```
19 | 


--------------------------------------------------------------------------------
/examples/auctionmark-ddl.sql:
--------------------------------------------------------------------------------
  1 | /***************************************************************************
  2 |  *  Copyright (C) 2010 by H-Store Project                                  *
  3 |  *  Brown University                                                       *
  4 |  *  Massachusetts Institute of Technology                                  *
  5 |  *  Yale University                                                        *
  6 |  *                                                                         *
  7 |  *  Andy Pavlo (pavlo@cs.brown.edu)                                        *
  8 |  *  http://www.cs.brown.edu/~pavlo/                                        *
  9 |  *                                                                         *
 10 |  *  Visawee Angkanawaraphan (visawee@cs.brown.edu)                         *
 11 |  *  http://www.cs.brown.edu/~visawee/                                      *
 12 |  *                                                                         *
 13 |  *  Permission is hereby granted, free of charge, to any person obtaining  *
 14 |  *  a copy of this software and associated documentation files (the        *
 15 |  *  "Software"), to deal in the Software without restriction, including    *
 16 |  *  without limitation the rights to use, copy, modify, merge, publish,    *
 17 |  *  distribute, sublicense, and/or sell copies of the Software, and to     *
 18 |  *  permit persons to whom the Software is furnished to do so, subject to  *
 19 |  *  the following conditions:                                              *
 20 |  *                                                                         *
 21 |  *  The above copyright notice and this permission notice shall be         *
 22 |  *  included in all copies or substantial portions of the Software.        *
 23 |  *                                                                         *
 24 |  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        *
 25 |  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     *
 26 |  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. *
 27 |  *  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR      *
 28 |  *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,  *
 29 |  *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR  *
 30 |  *  OTHER DEALINGS IN THE SOFTWARE.                                        *
 31 |  ***************************************************************************/
 32 | 
 33 | -- ================================================================ 
 34 | -- CONFIG_PROFILE
 35 | -- ================================================================
 36 | CREATE TABLE CONFIG_PROFILE (
 37 |     cfp_scale_factor            FLOAT NOT NULL,
 38 |     cfp_loader_start            TIMESTAMP NOT NULL,
 39 |     cfp_loader_stop             TIMESTAMP NOT NULL,
 40 |     cfp_user_item_histogram     VARCHAR(12000) NOT NULL
 41 | );
 42 | 
 43 | -- ================================================================
 44 | -- REGION
 45 | -- Represents regions of users
 46 | -- r_id             Region's ID
 47 | -- r_name           Region's name
 48 | -- ================================================================
 49 | CREATE TABLE REGION (
 50 |     r_id                BIGINT NOT NULL,
 51 |     r_name              VARCHAR(32),
 52 |     PRIMARY KEY (r_id)
 53 | );
 54 | 
 55 | -- ================================================================
 56 | -- USERACCT
 57 | -- Represents user accounts 
 58 | -- u_id             User ID
 59 | -- u_firstname      User's first name
 60 | -- u_lastname       User's last name
 61 | -- u_password       User's password
 62 | -- u_email          User's email
 63 | -- u_rating         User's rating as a seller
 64 | -- u_balance        User's balance
 65 | -- u_created        User's create date
 66 | -- u_r_id           User's region ID
 67 | -- ================================================================
 68 | CREATE TABLE USERACCT (
 69 |     u_id                BIGINT NOT NULL,
 70 |     u_rating            BIGINT NOT NULL,
 71 |     u_balance           FLOAT NOT NULL,
 72 |     u_comments          INTEGER DEFAULT 0,
 73 |     u_r_id              BIGINT NOT NULL REFERENCES REGION (r_id),
 74 |     u_created           TIMESTAMP,
 75 |     u_updated           TIMESTAMP,
 76 |     u_sattr0            VARCHAR(64),
 77 |     u_sattr1            VARCHAR(64),
 78 |     u_sattr2            VARCHAR(64),
 79 |     u_sattr3            VARCHAR(64),
 80 |     u_sattr4            VARCHAR(64),
 81 |     u_sattr5            VARCHAR(64),
 82 |     u_sattr6            VARCHAR(64),
 83 |     u_sattr7            VARCHAR(64),
 84 |     u_iattr0            BIGINT DEFAULT NULL,
 85 |     u_iattr1            BIGINT DEFAULT NULL,
 86 |     u_iattr2            BIGINT DEFAULT NULL,
 87 |     u_iattr3            BIGINT DEFAULT NULL,
 88 |     u_iattr4            BIGINT DEFAULT NULL,
 89 |     u_iattr5            BIGINT DEFAULT NULL,
 90 |     u_iattr6            BIGINT DEFAULT NULL,
 91 |     u_iattr7            BIGINT DEFAULT NULL, 
 92 |     PRIMARY KEY (u_id)
 93 | );
 94 | CREATE INDEX IDX_USERACCT_REGION ON USERACCT (u_id, u_r_id);
 95 | 
 96 | -- ================================================================
 97 | -- USERACCT_ATTRIBUTES
 98 | -- Represents user's attributes 
 99 | -- ================================================================
100 | CREATE TABLE USERACCT_ATTRIBUTES (
101 |     ua_id               BIGINT NOT NULL,
102 |     ua_u_id             BIGINT NOT NULL REFERENCES USERACCT (u_id),
103 |     ua_name             VARCHAR(32) NOT NULL,
104 |     ua_value            VARCHAR(32) NOT NULL,
105 |     u_created           TIMESTAMP,
106 |     PRIMARY KEY (ua_id, ua_u_id)
107 | );
108 | 
109 | -- ================================================================
110 | -- CATEGORY
111 | -- Represents merchandises' categories. Category can be hierarchical aligned using c_parent_id.
112 | -- c_id                Category's ID
113 | -- c_name            Category's name
114 | -- c_parent_id        Parent category's ID
115 | -- ================================================================
116 | CREATE TABLE CATEGORY (
117 |     c_id                BIGINT NOT NULL,
118 |     c_name              VARCHAR(50),
119 |     c_parent_id         BIGINT REFERENCES CATEGORY (c_id),
120 |     PRIMARY KEY (c_id)
121 | );
122 | CREATE INDEX IDX_CATEGORY_PARENT ON CATEGORY (c_parent_id);
123 | 
124 | -- ================================================================
125 | -- GLOBAL_ATTRIBUTE_GROUP
126 | -- Represents merchandises' global attribute groups (for example, brand, material, feature etc.).
127 | -- gag_id            Global attribute group's ID
128 | -- gag_c_id            Associated Category's ID
129 | -- gag_name            Global attribute group's name
130 | -- ================================================================
131 | CREATE TABLE GLOBAL_ATTRIBUTE_GROUP (
132 |     gag_id              BIGINT NOT NULL,
133 |     gag_c_id            BIGINT NOT NULL REFERENCES CATEGORY (c_id),
134 |     gag_name            VARCHAR(100) NOT NULL,
135 |     PRIMARY KEY (gag_id)
136 | );
137 | 
138 | -- ================================================================
139 | -- GLOBAL_ATTRIBUTE_VALUE
140 | -- Represents merchandises' global attributes within each attribute
141 | -- groups (for example, Rolex, Casio, Seiko within brand)
142 | -- gav_id            Global attribute value's ID
143 | -- gav_gag_id        Associated Global attribute group's ID
144 | -- gav_name            Global attribute value's name
145 | -- ================================================================
146 | CREATE TABLE GLOBAL_ATTRIBUTE_VALUE (
147 |     gav_id              BIGINT NOT NULL,
148 |     gav_gag_id          BIGINT NOT NULL REFERENCES GLOBAL_ATTRIBUTE_GROUP (gag_id),
149 |     gav_name            VARCHAR(100) NOT NULL,
150 |     PRIMARY KEY (gav_id, gav_gag_id)
151 | );
152 | 
153 | -- ================================================================
154 | -- ITEM
155 | -- Represents merchandises
156 | -- i_id                  Item's ID
157 | -- i_u_id                Seller's ID
158 | -- i_c_id                Category's ID
159 | -- i_name                Item's name
160 | -- i_description      Item's description
161 | -- i_initial_price    Item's initial price
162 | -- i_reserve_price    Item's reserve price
163 | -- i_buy_now            Item's buy now price
164 | -- i_nb_of_bids        Item's number of bids
165 | -- i_max_bid            Item's max bid price
166 | -- i_user_attributes Text field for attributes defined just for this item
167 | -- i_start_date        Item's bid start date
168 | -- i_end_date          Item's bid end date
169 | -- i_status            Items' status (0 = open, 1 = wait for purchase, 2 = close)
170 | -- ================================================================
171 | CREATE TABLE ITEM (
172 |     i_id                BIGINT NOT NULL,
173 |     i_u_id              BIGINT NOT NULL REFERENCES USERACCT (u_id),
174 |     i_c_id              BIGINT NOT NULL REFERENCES CATEGORY (c_id),
175 |     i_name              VARCHAR(100),
176 |     i_description       VARCHAR(1024),
177 |     i_user_attributes   VARCHAR(255) DEFAULT NULL,
178 |     i_initial_price     FLOAT NOT NULL,
179 |     i_current_price     FLOAT NOT NULL,
180 |     i_num_bids          BIGINT,
181 |     i_num_images        BIGINT,
182 |     i_num_global_attrs  BIGINT,
183 |     i_num_comments      BIGINT,
184 |     i_start_date        TIMESTAMP,
185 |     i_end_date          TIMESTAMP,
186 |     i_status            INT DEFAULT 0,
187 |     i_created           TIMESTAMP,
188 |     i_updated           TIMESTAMP,
189 |     i_iattr0            BIGINT DEFAULT NULL,
190 |     i_iattr1            BIGINT DEFAULT NULL,
191 |     i_iattr2            BIGINT DEFAULT NULL,
192 |     i_iattr3            BIGINT DEFAULT NULL,
193 |     i_iattr4            BIGINT DEFAULT NULL,
194 |     i_iattr5            BIGINT DEFAULT NULL,
195 |     i_iattr6            BIGINT DEFAULT NULL,
196 |     i_iattr7            BIGINT DEFAULT NULL, 
197 |     PRIMARY KEY (i_id, i_u_id)
198 | );
199 | CREATE INDEX IDX_ITEM_SELLER ON ITEM (i_u_id);
200 | 
201 | -- ================================================================
202 | -- ITEM_ATTRIBUTE
203 | -- Represents mappings between attribute values and items
204 | -- ia_id            Item attribute's ID
205 | -- ia_i_id            Item's ID
206 | -- ia_gav_id        Global attribute value's ID
207 | -- ================================================================
208 | CREATE TABLE ITEM_ATTRIBUTE (
209 |     ia_id               BIGINT NOT NULL,
210 |     ia_i_id             BIGINT NOT NULL,
211 |     ia_u_id             BIGINT NOT NULL,
212 |     ia_gav_id           BIGINT NOT NULL,
213 |     ia_gag_id           BIGINT NOT NULL,
214 |     ia_sattr0           VARCHAR(64) DEFAULT NULL,
215 |     FOREIGN KEY (ia_i_id, ia_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
216 |     FOREIGN KEY (ia_gav_id, ia_gag_id) REFERENCES GLOBAL_ATTRIBUTE_VALUE (gav_id, gav_gag_id),
217 |     PRIMARY KEY (ia_id, ia_i_id, ia_u_id)
218 | );
219 | 
220 | -- ================================================================
221 | -- ITEM_IMAGE
222 | -- Represents images of items
223 | -- ii_id            Image's ID
224 | -- ii_i_id            Item's ID
225 | -- ii_path            Image's path
226 | -- ================================================================
227 | CREATE TABLE ITEM_IMAGE (
228 |     ii_id               BIGINT NOT NULL,
229 |     ii_i_id             BIGINT NOT NULL,
230 |     ii_u_id             BIGINT NOT NULL,
231 |     ii_sattr0           VARCHAR(128) NOT NULL,
232 |     FOREIGN KEY (ii_i_id, ii_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
233 |     PRIMARY KEY (ii_id, ii_i_id, ii_u_id)
234 | );
235 | 
236 | -- ================================================================
237 | -- ITEM_COMMENT
238 | -- Represents comments provided by buyers
239 | -- ic_id            Comment's ID
240 | -- ic_i_id            Item's ID
241 | -- ic_u_id            Buyer's ID
242 | -- ic_date            Comment's create date
243 | -- ic_question        Comment by buyer
244 | -- ic_response        Response from seller
245 | -- ================================================================
246 | CREATE TABLE ITEM_COMMENT (
247 |     ic_id               BIGINT NOT NULL,
248 |     ic_i_id             BIGINT NOT NULL,
249 |     ic_u_id             BIGINT NOT NULL,
250 |     ic_buyer_id         BIGINT NOT NULL REFERENCES USERACCT (u_id),
251 |     ic_question         VARCHAR(128) NOT NULL,
252 |     ic_response         VARCHAR(128) DEFAULT NULL,
253 |     ic_created          TIMESTAMP,
254 |     ic_updated          TIMESTAMP,
255 |     FOREIGN KEY (ic_i_id, ic_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
256 |     PRIMARY KEY (ic_id, ic_i_id, ic_u_id)
257 | ); 
258 | -- CREATE INDEX IDX_ITEM_COMMENT ON ITEM_COMMENT (ic_i_id, ic_u_id);
259 | 
260 | -- ================================================================
261 | -- ITEM_BID
262 | -- Represents merchandises' bids
263 | -- ib_id            Bid's ID
264 | -- ib_i_id            Item's ID
265 | -- ib_u_id            Buyer's ID
266 | -- ib_type            Type of transaction (bid or buy_now)
267 | -- ib_bid            Bid's price
268 | -- ib_max_bid        ???
269 | -- ib_date            Bid's date
270 | -- ================================================================
271 | CREATE TABLE ITEM_BID (
272 |     ib_id               BIGINT NOT NULL,
273 |     ib_i_id             BIGINT NOT NULL,
274 |     ib_u_id             BIGINT NOT NULL,
275 |     ib_buyer_id         BIGINT NOT NULL REFERENCES USERACCT (u_id),
276 |     ib_bid              FLOAT NOT NULL,
277 |     ib_max_bid          FLOAT NOT NULL,
278 |     ib_created          TIMESTAMP,
279 |     ib_updated          TIMESTAMP,
280 |     FOREIGN KEY (ib_i_id, ib_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
281 |     PRIMARY KEY (ib_id, ib_i_id, ib_u_id)
282 | );
283 | 
284 | -- ================================================================
285 | -- ITEM_MAX_BID
286 | -- Cross-reference table to the current max bid for an auction
287 | -- ================================================================
288 | CREATE TABLE ITEM_MAX_BID (
289 |     imb_i_id            BIGINT NOT NULL,
290 |     imb_u_id            BIGINT NOT NULL,
291 |     imb_ib_id           BIGINT NOT NULL,
292 |     imb_ib_i_id         BIGINT NOT NULL,
293 |     imb_ib_u_id         BIGINT NOT NULL,
294 |     imb_created         TIMESTAMP,
295 |     imb_updated         TIMESTAMP,
296 |     FOREIGN KEY (imb_i_id, imb_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
297 |     FOREIGN KEY (imb_ib_id, imb_ib_i_id, imb_ib_u_id) REFERENCES ITEM_BID (ib_id, ib_i_id, ib_u_id) ON DELETE CASCADE,
298 |     PRIMARY KEY (imb_i_id, imb_u_id)
299 | );
300 | 
301 | -- ================================================================
302 | -- ITEM_PURCHASE
303 | -- Represents purchase transaction (buy_now bid or win bid)
304 | -- ip_id               Purchase's ID
305 | -- ip_ib_id            Bid's ID
306 | -- ip_date             Purchase's date
307 | -- ================================================================
308 | CREATE TABLE ITEM_PURCHASE (
309 |     ip_id               BIGINT NOT NULL,
310 |     ip_ib_id            BIGINT NOT NULL,
311 |     ip_ib_i_id          BIGINT NOT NULL,
312 |     ip_ib_u_id          BIGINT NOT NULL,
313 |     ip_date             TIMESTAMP,
314 |     FOREIGN KEY (ip_ib_id, ip_ib_i_id, ip_ib_u_id) REFERENCES ITEM_BID (ib_id, ib_i_id, ib_u_id) ON DELETE CASCADE,
315 |     PRIMARY KEY (ip_id, ip_ib_id, ip_ib_i_id, ip_ib_u_id)
316 | );
317 | 
318 | -- ================================================================
319 | -- USERACCT_FEEDBACK
320 | -- Represents feedbacks between buyers and sellers for a transaction
321 | -- uf_id             Feedback's ID
322 | -- uf_u_id           The user receiving the feedback
323 | -- uf_i_id           Item's ID
324 | -- uf_i_u_id         Item's seller id
325 | -- uf_from_id        The other user writing the feedback
326 | -- uf_date           Feedback's create date
327 | -- uf_comment        Feedback by other user
328 | -- ================================================================
329 | CREATE TABLE USERACCT_FEEDBACK (
330 |     uf_u_id             BIGINT NOT NULL REFERENCES USERACCT (u_id),
331 |     uf_i_id             BIGINT NOT NULL,
332 |     uf_i_u_id           BIGINT NOT NULL,
333 |     uf_from_id          BIGINT NOT NULL REFERENCES USERACCT (u_id),
334 |     uf_rating           TINYINT NOT NULL,
335 |     uf_date             TIMESTAMP,
336 |     uf_sattr0           VARCHAR(80) NOT NULL,
337 |     FOREIGN KEY (uf_i_id, uf_i_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
338 |     PRIMARY KEY (uf_u_id, uf_i_id, uf_i_u_id, uf_from_id),
339 |     CHECK (uf_u_id <> uf_from_id)
340 | );
341 | 
342 | -- ================================================================
343 | -- USERACCT_ITEM
344 | -- The items that a user has recently purchased
345 | -- ================================================================
346 | CREATE TABLE USERACCT_ITEM (
347 |     ui_u_id             BIGINT NOT NULL REFERENCES USERACCT (u_id),
348 |     ui_i_id             BIGINT NOT NULL,
349 |     ui_i_u_id           BIGINT NOT NULL,
350 |     ui_ip_id            BIGINT,
351 |     ui_ip_ib_id         BIGINT,
352 |     ui_ip_ib_i_id       BIGINT,
353 |     ui_ip_ib_u_id       BIGINT,
354 |     ui_created          TIMESTAMP,
355 |     FOREIGN KEY (ui_i_id, ui_i_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
356 |     FOREIGN KEY (ui_ip_id, ui_ip_ib_id, ui_ip_ib_i_id, ui_ip_ib_u_id) REFERENCES ITEM_PURCHASE (ip_id, ip_ib_id, ip_ib_i_id, ip_ib_u_id) ON DELETE CASCADE,
357 |     PRIMARY KEY (ui_u_id, ui_i_id, ui_i_u_id)
358 | );
359 | -- CREATE INDEX IDX_USERACCT_ITEM_ID ON USERACCT_ITEM (ui_i_id);
360 | 
361 | -- ================================================================
362 | -- USERACCT_WATCH
363 | -- The items that a user is watching
364 | -- ================================================================
365 | CREATE TABLE USERACCT_WATCH (
366 |     uw_u_id             BIGINT NOT NULL REFERENCES USERACCT (u_id),
367 |     uw_i_id             BIGINT NOT NULL,
368 |     uw_i_u_id           BIGINT NOT NULL,
369 |     uw_created          TIMESTAMP,
370 |     FOREIGN KEY (uw_i_id, uw_i_u_id) REFERENCES ITEM (i_id, i_u_id) ON DELETE CASCADE,
371 |     PRIMARY KEY (uw_u_id, uw_i_id, uw_i_u_id)
372 | );
373 | 


--------------------------------------------------------------------------------
/examples/query4.sql:
--------------------------------------------------------------------------------
 1 | select
 2 | 	o_orderpriority,
 3 | 	count(*) as order_count
 4 | from
 5 | 	orders
 6 | where
 7 | 	o_orderdate >= date '1994-08-01'
 8 | 	and o_orderdate < date '1994-08-01' + interval '3' month
 9 | 	and exists (
10 | 		select
11 | 			*
12 | 		from
13 | 			lineitem
14 | 		where
15 | 			l_orderkey = o_orderkey
16 | 			and l_commitdate < l_receiptdate
17 | 	)
18 | group by
19 | 	o_orderpriority
20 | order by
21 | 	o_orderpriority;
22 | 


--------------------------------------------------------------------------------
/examples/si-ddl.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS SITEST;
2 | CREATE TABLE SITEST (
3 |   	id INT PRIMARY KEY,
4 |     value INT NOT NULL
5 | );
6 | 


--------------------------------------------------------------------------------
/examples/top_mutexes.sql:
--------------------------------------------------------------------------------
 1 | with top_mutexes as (
 2 |    select--+ leading(t1 s1 v1 v2 t2 s2)  use_hash(s1) use_nl(v1) use_hash(s2) materialize
 3 |       t1.hsecs
 4 |      ,s1.*
 5 |      ,s2.sleeps           as end_sleeps
 6 |      ,s2.wait_time        as end_wait_time
 7 |      ,s2.sleeps-s1.sleeps as delta_sleeps
 8 |      ,t2.HSECS - t1.HSECS as delta_hsecs
 9 |      --,s2.* 
10 |    from v$timer t1
11 |        ,v$mutex_sleep s1
12 |        ,(select/*+ no_merge */ sum(level) a from dual connect by level<=1e6) v1
13 |        ,v$timer t2
14 |        ,v$mutex_sleep s2
15 |    where s1.MUTEX_TYPE=s2.MUTEX_TYPE
16 |    and s1.location=s2.location
17 | )
18 | select *
19 | from top_mutexes
20 | order by delta_sleeps desc;
21 | 


--------------------------------------------------------------------------------
/script/packages.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | sudo apt-get --ignore-missing -y install \
 4 |     git \
 5 |     g++ \
 6 |     cmake \
 7 |     libgflags-dev \
 8 |     libgtest-dev
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ##################################################################################
 2 | 
 3 | # --[ SQLCheck
 4 | 
 5 | # Set output directory
 6 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 7 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 8 | 
 9 | # Make sure the compiler can find include files for our sqlcheck library
10 | include_directories (${CMAKE_CURRENT_SOURCE_DIR}/include)
11 | 
12 | # Create our sqlcheck library
13 | add_library (sqlcheck_library checker.cpp configuration.cpp list.cpp)
14 | 
15 | # Create our executable
16 | add_executable(sqlcheck main.cpp)
17 | target_link_libraries(sqlcheck sqlcheck_library 
18 | ${GTEST_BOTH_LIBRARIES} 
19 | ${GLOG_LIBRARIES} 
20 | ${CMAKE_THREAD_LIBS_INIT}
21 | gflags
22 | )
23 | 
24 | # Add installation target
25 | install (TARGETS sqlcheck sqlcheck_library DESTINATION bin)
26 | 


--------------------------------------------------------------------------------
/src/checker.cpp:
--------------------------------------------------------------------------------
  1 | // CHECKER SOURCE
  2 | 
  3 | #include <fstream>
  4 | #include <istream>
  5 | #include <sstream>
  6 | #include <string>
  7 | #include <memory>
  8 | #include <iostream>
  9 | #include <cstdlib>
 10 | #include <cctype>
 11 | #include <functional>
 12 | #include <regex>
 13 | #include <map>
 14 | 
 15 | #include "include/checker.h"
 16 | 
 17 | #include "include/configuration.h"
 18 | #include "include/list.h"
 19 | #include "include/color.h"
 20 | 
 21 | namespace sqlcheck {
 22 | 
 23 | bool Check(Configuration& state) {
 24 | 
 25 |   bool has_issues = false;
 26 |   std::unique_ptr<std::istream> input;
 27 | 
 28 |   // Set up stream
 29 |   if(state.testing_mode == true){
 30 |     input.reset(state.test_stream.release());
 31 |   }
 32 |   else if (state.file_name.empty()) {
 33 |     input.reset(&std::cin);
 34 |   }
 35 |   else {
 36 |     //std::cout << "Checking " << state.file_name << "...\n";
 37 |     input.reset(new std::ifstream(state.file_name.c_str()));
 38 |   }
 39 | 
 40 |   std::stringstream sql_statement;
 41 |   state.line_number = 1;
 42 | 
 43 |   std::cout << "==================== Results ===================\n";
 44 | 
 45 |   // Go over the input stream
 46 |   while(!input->eof()){
 47 | 
 48 |     // Get a statement from the input stream
 49 |     std::string statement_fragment;
 50 |     std::getline(*input, statement_fragment, state.delimiter[0]);
 51 | 
 52 |     // Append fragment to statement
 53 |     if(statement_fragment.empty() == false){
 54 |       sql_statement << statement_fragment << " ";
 55 |     }
 56 | 
 57 |     // Check the statement
 58 |     CheckStatement(state, sql_statement.str());
 59 | 
 60 |     // Reset statement
 61 |     sql_statement.str(std::string());
 62 |   }
 63 | 
 64 |   // Print summary
 65 |   if(state.checker_stats[RISK_LEVEL_ALL] == 0){
 66 |     std::cout << "No issues found.\n";
 67 |   }
 68 |   else {
 69 |     std::cout << "\n==================== Summary ===================\n";
 70 |     std::cout << "All Anti-Patterns and Hints  :: " << state.checker_stats[RISK_LEVEL_ALL] << "\n";
 71 |     std::cout << ">  High Risk   :: " << state.checker_stats[RISK_LEVEL_HIGH] << "\n";
 72 |     std::cout << ">  Medium Risk :: " << state.checker_stats[RISK_LEVEL_MEDIUM] << "\n";
 73 |     std::cout << ">  Low Risk    :: " << state.checker_stats[RISK_LEVEL_LOW] << "\n";
 74 |     std::cout << ">  Hints       :: " << state.checker_stats[RISK_LEVEL_NONE] << "\n";
 75 |     has_issues = true;
 76 |   }
 77 | 
 78 |   // Skip destroying std::cin
 79 |   if (state.file_name.empty()) {
 80 |     input.release();
 81 |   }
 82 | 
 83 |   return has_issues;
 84 | 
 85 | }
 86 | 
 87 | // Wrap the text
 88 | std::string WrapText(const std::string& text){
 89 | 
 90 |   size_t line_length = 80;
 91 | 
 92 |   std::istringstream words(text);
 93 |   std::ostringstream wrapped;
 94 |   std::string word;
 95 |   bool newline = false;
 96 |   bool newpara = false;
 97 | 
 98 |   if (words >> word) {
 99 | 
100 |     wrapped << word;
101 | 
102 |     size_t space_left = line_length - word.length();
103 |     while (words >> word) {
104 |       if(word == "●"){
105 |         wrapped << "\n\n";
106 |         newpara = true;
107 |       }
108 |       else{
109 |         newpara = false;
110 |       }
111 | 
112 |       if (space_left < word.length() + 1 || newline) {
113 |         wrapped << '\n' << word;
114 |         space_left = line_length - word.length();
115 |       }
116 |       else {
117 |         if(newpara == false){
118 |           wrapped << ' ' << word;
119 |         }
120 |         else{
121 |           wrapped << word;
122 |         }
123 |         space_left -= word.length() + 1;
124 |       }
125 | 
126 |       if(word.back() == ':'){
127 |         newline = true;
128 |       }
129 |       else{
130 |         newline = false;
131 |       }
132 |     }
133 | 
134 |   }
135 | 
136 |   return wrapped.str();
137 | }
138 | 
139 | void PrintMessage(Configuration& state,
140 |                   const std::string sql_statement,
141 |                   const bool print_statement,
142 |                   const RiskLevel pattern_risk_level,
143 |                   const PatternType pattern_type,
144 |                   const std::string title,
145 |                   const std::string message){
146 | 
147 |   ColorModifier red(ColorCode::FG_RED, state.color_mode, true);
148 |   ColorModifier green(ColorCode::FG_GREEN, state.color_mode, true);
149 |   ColorModifier blue(ColorCode::FG_BLUE, state.color_mode, true);
150 |   ColorModifier regular(ColorCode::FG_DEFAULT, state.color_mode, false);
151 | 
152 |   if(print_statement == true){
153 |     std::cout << "\n-------------------------------------------------\n";
154 |     ColorModifier regular(ColorCode::FG_DEFAULT, state.color_mode, false);
155 | 
156 |     if(state.color_mode == true){
157 |       std::cout << "SQL Statement at line " << state.line_number <<": " << red << WrapText(sql_statement) << state.delimiter << regular << "\n";
158 |     }
159 |     else {
160 |       std::cout << "SQL Statement at line " << state.line_number << ": " << WrapText(sql_statement) << state.delimiter << "\n";
161 |     }
162 |   }
163 | 
164 |   if(state.color_mode == true){
165 |     if(state.file_name.empty() == false){
166 |       std::cout << "[" << state.file_name << "]: ";
167 |     }
168 | 
169 |     std::cout << "(" << green << RiskLevelToString(pattern_risk_level) << regular << ") ";
170 |     std::cout << blue << title << regular << "\n";
171 |   }
172 |   else {
173 |     if(state.file_name.empty() == false){
174 |       std::cout << "[" << state.file_name << "]: ";
175 |     }
176 | 
177 |     std::cout << "(" << RiskLevelToString(pattern_risk_level) << ") ";
178 |     std::cout << "(" << PatternTypeToString(pattern_type) << ") ";
179 |     std::cout << title << "\n";
180 |   }
181 | 
182 |   // Print detailed message only in verbose mode
183 |   if(state.verbose == true){
184 |     std::cout << WrapText(message) << "\n";
185 |   }
186 | 
187 |   // Update checker stats
188 |   state.checker_stats[pattern_risk_level]++;
189 |   state.checker_stats[RISK_LEVEL_ALL]++;
190 | 
191 | }
192 | 
193 | void CheckPattern(Configuration& state,
194 |                   const std::string& sql_statement,
195 |                   bool& print_statement,
196 |                   const std::regex& anti_pattern,
197 |                   const RiskLevel pattern_risk_level,
198 |                   const PatternType pattern_type,
199 |                   const std::string title,
200 |                   const std::string message,
201 |                   const bool exists,
202 |                   const size_t min_count){
203 | 
204 |   //std::cout << "PATTERN LEVEL: " << pattern_risk_level << "\n";
205 |   //std::cout << "CHECKER LEVEL: " << state.log_level << "\n";
206 | 
207 |   // Check log level
208 |   if(pattern_risk_level < state.risk_level){
209 |     return;
210 |   }
211 | 
212 |   bool found = false;
213 |   std::smatch match;
214 |   std::size_t count = 0;
215 | 
216 |   // create an vector for the match positions
217 |   std::vector<size_t> positions;
218 |   try {
219 |     std::sregex_iterator sqlsearch = std::sregex_iterator(sql_statement.begin(), sql_statement.end(), anti_pattern);
220 |     std::sregex_iterator sqlend = std::sregex_iterator();
221 |     count = std::distance(sqlsearch, sqlend);
222 |     if (count > 0) {
223 |       found = true;
224 |     }
225 | 
226 |     if(found == exists && count > min_count){
227 |       for (std::sregex_iterator next = sqlsearch; next != sqlend; ++next)
228 |       {
229 |           match = *next;
230 |           // add match position to the vector
231 |           positions.push_back(match.position(0));
232 |       }
233 | 
234 |       // update positions from character number to line number
235 |       uint32_t position_checker = 0;
236 |       uint32_t num_lines = state.line_number;
237 |       if (positions.size() > 0) {
238 |         for (size_t statement_char = 0; statement_char < sql_statement.length(); statement_char++) {
239 |           if (positions[position_checker] == statement_char) {
240 |             positions[position_checker] = num_lines;
241 |             position_checker++;
242 |           }
243 |           if (sql_statement[statement_char] == '\n') {
244 |             num_lines++;
245 |           }
246 |         }
247 |       }
248 | 
249 |       std::stringstream linelocations;
250 |       // convert line numbers to output string
251 |       if (positions.size() > 1) {
252 |         linelocations << " at lines ";
253 |       } else {
254 |         linelocations << " at line ";
255 |       }
256 |       for (size_t i = 0; i < positions.size(); i++) {
257 |           linelocations << positions[i];
258 |           if (i < positions.size() - 1) {
259 |               linelocations << ", ";
260 |           }
261 |       }
262 |       PrintMessage(state,
263 |                   sql_statement,
264 |                   print_statement,
265 |                   pattern_risk_level,
266 |                   pattern_type,
267 |                   title,
268 |                   message);
269 | 
270 |       if(exists == true){
271 |         ColorModifier blue(ColorCode::FG_BLUE, state.color_mode, true);
272 |         ColorModifier regular(ColorCode::FG_DEFAULT, state.color_mode, false);
273 |         if(state.color_mode == true){
274 |           std::cout << "[Matching Expression: " << blue << WrapText(match.str(0)) << regular << linelocations.str()  << "]";
275 |         }
276 |         else{
277 |           std::cout << "[Matching Expression: " << WrapText(match.str(0)) << linelocations.str() << "]";
278 |         }
279 |         std::cout << "\n\n";
280 |       }
281 | 
282 |       // TOGGLE PRINT STATEMENT
283 |       print_statement = false;
284 |     }
285 |   } catch (std::regex_error& e) {
286 |     // Syntax error in the regular expression
287 |   }
288 | }
289 | 
290 | void CheckStatement(Configuration& state,
291 |                     const std::string& sql_statement){
292 | 
293 |   // TRANSFORM TO LOWER CASE
294 |   auto statement = sql_statement;
295 | 
296 |   std::transform(statement.begin(),
297 |                  statement.end(),
298 |                  statement.begin(),
299 |                  ::tolower);
300 | 
301 |   // REMOVE SPACE
302 |   statement = std::regex_replace(statement, std::regex("^ +| +$|( ) +"), "$1");
303 | 
304 |   // CHECK FOR LEADING NEWLINE
305 |   if (statement[0] == '\n') {
306 |     statement = statement.erase(0,1);
307 |     state.line_number++;
308 |   }
309 | 
310 |   // RESET
311 |   bool print_statement = true;
312 | 
313 |   // LOGICAL DATABASE DESIGN
314 | 
315 |   CheckMultiValuedAttribute(state, statement, print_statement);
316 | 
317 |   CheckRecursiveDependency(state, statement, print_statement);
318 | 
319 |   CheckPrimaryKeyExists(state, statement, print_statement);
320 | 
321 |   CheckGenericPrimaryKey(state, statement, print_statement);
322 | 
323 |   CheckForeignKeyExists(state, statement, print_statement);
324 | 
325 |   CheckVariableAttribute(state, statement, print_statement);
326 | 
327 |   CheckMetadataTribbles(state, statement, print_statement);
328 | 
329 |   // PHYSICAL DATABASE DESIGN
330 | 
331 |   CheckFloat(state, statement, print_statement);
332 | 
333 |   CheckValuesInDefinition(state, statement, print_statement);
334 | 
335 |   CheckExternalFiles(state, statement, print_statement);
336 | 
337 |   CheckIndexCount(state, statement, print_statement);
338 | 
339 |   CheckIndexAttributeOrder(state, statement, print_statement);
340 | 
341 |   // QUERY
342 | 
343 |   CheckSelectStar(state, statement, print_statement);
344 | 
345 |   CheckJoinWithoutEquality(state, statement, print_statement);
346 | 
347 |   CheckNullUsage(state, statement, print_statement);
348 | 
349 |   CheckNotNullUsage(state, statement, print_statement);
350 | 
351 |   CheckConcatenation(state, statement, print_statement);
352 | 
353 |   CheckGroupByUsage(state, statement, print_statement);
354 | 
355 |   CheckOrderByRand(state, statement, print_statement);
356 | 
357 |   CheckPatternMatching(state, statement, print_statement);
358 | 
359 |   CheckSpaghettiQuery(state, statement, print_statement);
360 | 
361 |   CheckJoinCount(state, statement, print_statement);
362 | 
363 |   CheckDistinctCount(state, statement, print_statement);
364 | 
365 |   CheckImplicitColumns(state, statement, print_statement);
366 | 
367 |   CheckHaving(state, statement, print_statement);
368 | 
369 |   CheckNesting(state, statement, print_statement);
370 | 
371 |   CheckOr(state, statement, print_statement);
372 | 
373 |   CheckUnion(state, statement, print_statement);
374 | 
375 |   CheckDistinctJoin(state, statement, print_statement);
376 | 
377 |   // APPLICATION
378 | 
379 |   CheckReadablePasswords(state, statement, print_statement);
380 | 
381 | 
382 | 
383 | 
384 |   // update state.line_number with number of line breaks in the statement that was just checked
385 |   for (size_t i = 0; i < statement.length(); i++)
386 |   {
387 |       if (statement[i] == '\n')
388 |       {
389 |           state.line_number++;
390 |       }
391 |   }
392 | }
393 | 
394 | }  // namespace machine
395 | 
396 | 


--------------------------------------------------------------------------------
/src/configuration.cpp:
--------------------------------------------------------------------------------
  1 | // CONFIGURATION SOURCE
  2 | 
  3 | #include "include/configuration.h"
  4 | 
  5 | #include "gflags/gflags.h"
  6 | 
  7 | namespace sqlcheck {
  8 | 
  9 | std::string RiskLevelToString(const RiskLevel& risk_level){
 10 | 
 11 |   switch (risk_level) {
 12 |     case RISK_LEVEL_HIGH:
 13 |       return "HIGH RISK";
 14 |     case RISK_LEVEL_MEDIUM:
 15 |       return "MEDIUM RISK";
 16 |     case RISK_LEVEL_LOW:
 17 |       return "LOW RISK";
 18 |     case RISK_LEVEL_NONE:
 19 |       return "HINTS";
 20 |     case RISK_LEVEL_ALL:
 21 |       return "ALL ANTI-PATTERNS";
 22 | 
 23 |     case RISK_LEVEL_INVALID:
 24 |     default:
 25 |       return "INVALID";
 26 |   }
 27 | 
 28 | }
 29 | 
 30 | std::string RiskLevelToDetailedString(const RiskLevel& risk_level){
 31 | 
 32 |   switch (risk_level) {
 33 |     case RISK_LEVEL_HIGH:
 34 |       return "ONLY HIGH RISK ANTI-PATTERNS";
 35 |     case RISK_LEVEL_MEDIUM:
 36 |       return "ONLY MEDIUM AND HIGH RISK ANTI-PATTERNS";
 37 |     case RISK_LEVEL_LOW:
 38 |       return "ONLY ANTI-PATTERNS";
 39 |     case RISK_LEVEL_NONE:
 40 |       return "ALL ANTI-PATTERNS AND HINTS";
 41 |     case RISK_LEVEL_ALL:
 42 |       return "ALL ANTI-PATTERNS";
 43 | 
 44 |     case RISK_LEVEL_INVALID:
 45 |     default:
 46 |       return "INVALID";
 47 |   }
 48 | 
 49 | }
 50 | 
 51 | 
 52 | std::string PatternTypeToString(const PatternType& pattern_type){
 53 | 
 54 |   switch (pattern_type) {
 55 |     case PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN:
 56 |       return "PHYSICAL_DATABASE_DESIGN ANTI-PATTERN";
 57 |     case PATTERN_TYPE_LOGICAL_DATABASE_DESIGN:
 58 |       return "LOGICAL_DATABASE_DESIGN ANTI-PATTERN";
 59 |     case PATTERN_TYPE_QUERY:
 60 |       return "QUERY ANTI-PATTERN";
 61 |     case PATTERN_TYPE_APPLICATION:
 62 |       return "APPLICATION ANTI-PATTERN";
 63 | 
 64 |     case PATTERN_TYPE_INVALID:
 65 |     default:
 66 |       return "INVALID";
 67 |   }
 68 | 
 69 | }
 70 | 
 71 | std::string GetBooleanString(const bool& status){
 72 |   if(status == true){
 73 |     return "ENABLED";
 74 |   }
 75 |   else {
 76 |     return "DISABLED";
 77 |   }
 78 | }
 79 | 
 80 | void ValidateRiskLevel(const Configuration &state) {
 81 |   if (state.risk_level < RISK_LEVEL_ALL || state.risk_level > RISK_LEVEL_HIGH) {
 82 |     printf("INVALID RISK LEVEL :: %d\n", state.risk_level);
 83 |     exit(EXIT_FAILURE);
 84 |   }
 85 |   else {
 86 |     printf("> %s :: %s\n", "RISK LEVEL   ",
 87 |            RiskLevelToDetailedString(state.risk_level).c_str());
 88 |   }
 89 | }
 90 | 
 91 | void ValidateFileName(const Configuration &state) {
 92 |   if (state.file_name.empty() == false) {
 93 |     printf("> %s :: %s\n", "SQL FILE NAME",
 94 |            state.file_name.c_str());
 95 |   }
 96 | }
 97 | 
 98 | 
 99 | void ValidateColorMode(const Configuration &state) {
100 |     printf("> %s :: %s\n", "COLOR MODE   ",
101 |            GetBooleanString(state.color_mode).c_str());
102 | }
103 | 
104 | void ValidateVerbose(const Configuration &state) {
105 |     printf("> %s :: %s\n", "VERBOSE MODE ",
106 |          GetBooleanString(state.verbose).c_str());
107 | }
108 | 
109 | void ValidateDelimiter(const Configuration &state) {
110 |     printf("> %s :: %s\n", "DELIMITER    ",
111 |          state.delimiter.c_str());
112 | }
113 | 
114 | }  // namespace sqlcheck
115 | 


--------------------------------------------------------------------------------
/src/include/checker.h:
--------------------------------------------------------------------------------
 1 | // CHECKER HEADER
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <regex>
 6 | 
 7 | #include "configuration.h"
 8 | 
 9 | namespace sqlcheck {
10 | 
11 | // Check a set of SQL statements
12 | bool Check(Configuration& state);
13 | 
14 | // Check a SQL statement
15 | void CheckStatement(Configuration& state,
16 |                     const std::string& sql_statement);
17 | 
18 | // Check a pattern
19 | void CheckPattern(Configuration& state,
20 |                   const std::string& sql_statement,
21 |                   bool& print_statement,
22 |                   const std::regex& anti_pattern,
23 |                   const RiskLevel pattern_level,
24 |                   const PatternType pattern_type,
25 |                   const std::string title,
26 |                   const std::string message,
27 |                   const bool exists,
28 |                   const size_t min_count = 0);
29 | 
30 | }  // namespace machine
31 | 


--------------------------------------------------------------------------------
/src/include/color.h:
--------------------------------------------------------------------------------
 1 | // COLOR HEADER
 2 | 
 3 | #pragma once
 4 | 
 5 | #include <ostream>
 6 | 
 7 | namespace sqlcheck {
 8 | 
 9 | enum ColorCode {
10 | 
11 |   FG_DEFAULT = 39,
12 |   FG_BLACK = 30,
13 |   FG_RED = 31,
14 |   FG_GREEN = 32,
15 |   FG_YELLOW = 33,
16 |   FG_BLUE = 34,
17 |   FG_MAGENTA = 35,
18 |   FG_CYAN = 36,
19 |   FG_LIGHT_GRAY = 37,
20 | 
21 |   FG_DARK_GRAY = 90,
22 |   FG_LIGHT_RED = 91,
23 |   FG_LIGHT_GREEN = 92,
24 |   FG_LIGHT_YELLOW = 93,
25 |   FG_LIGHT_BLUE = 94,
26 |   FG_LIGHT_MAGENTA = 95,
27 |   FG_LIGHT_CYAN = 96,
28 |   FG_WHITE = 97,
29 | 
30 |   BG_RED = 41,
31 |   BG_GREEN = 42,
32 |   BG_BLUE = 44,
33 |   BG_DEFAULT = 49
34 | 
35 | };
36 | 
37 | class ColorModifier {
38 | 
39 |  public:
40 |   ColorModifier(ColorCode color_code, bool enable_color, bool enable_bold)
41 |  : color_code_(color_code),
42 |    enable_color_(enable_color),
43 |    enable_bold_(enable_bold){
44 |   }
45 | 
46 |   friend std::ostream& operator<<(std::ostream& os, const ColorModifier& color_modifier) {
47 |     if(color_modifier.enable_bold_ == true){
48 |       if(color_modifier.enable_color_ == true) {
49 |         return os << "\e[1" << "m" << "\033[" << color_modifier.color_code_ << "m";
50 |       }
51 |       else {
52 |         return os;
53 |       }
54 |     }
55 |     else {
56 |       if(color_modifier.enable_color_ == true) {
57 |         return os << "\e[0" << "m" << "\033[" << color_modifier.color_code_ << "m";
58 |       }
59 |       else {
60 |         return os;
61 |       }
62 |     }
63 |   }
64 | 
65 |  private:
66 | 
67 |   // color code
68 |   ColorCode color_code_;
69 | 
70 |   // enable color
71 |   bool enable_color_;
72 | 
73 |   // enable bold
74 |   bool enable_bold_;
75 | 
76 | };
77 | 
78 | }  // namespace sqlcheck
79 | 


--------------------------------------------------------------------------------
/src/include/configuration.h:
--------------------------------------------------------------------------------
  1 | // CONFIGURATION HEADER
  2 | 
  3 | #pragma once
  4 | 
  5 | #include <cstdint>
  6 | #include <cstdlib>
  7 | #include <iostream>
  8 | #include <string>
  9 | #include <sstream>
 10 | #include <memory>
 11 | #include <map>
 12 | 
 13 | namespace sqlcheck {
 14 | 
 15 | #define UNUSED_ATTRIBUTE __attribute__((unused))
 16 | 
 17 | enum RiskLevel {
 18 |   RISK_LEVEL_INVALID = 10,
 19 | 
 20 |   RISK_LEVEL_HIGH = 4,
 21 |   RISK_LEVEL_MEDIUM = 3,
 22 |   RISK_LEVEL_LOW = 2,
 23 |   RISK_LEVEL_NONE = 1,
 24 |   RISK_LEVEL_ALL = 0
 25 | 
 26 | };
 27 | 
 28 | enum PatternType {
 29 |   PATTERN_TYPE_INVALID = 0,
 30 | 
 31 |   PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN = 1,
 32 |   PATTERN_TYPE_LOGICAL_DATABASE_DESIGN = 2,
 33 |   PATTERN_TYPE_QUERY = 3,
 34 |   PATTERN_TYPE_APPLICATION = 4,
 35 | 
 36 | };
 37 | 
 38 | // Checker stats
 39 | struct CheckerStats {
 40 | 
 41 |   unsigned int error_level_counter = 0;
 42 |   unsigned int warn_level_counter = 0;
 43 |   unsigned int info_level_counter = 0;
 44 |   unsigned int debug_level_counter = 0;
 45 |   unsigned int trace_level_counter = 0;
 46 |   unsigned int all_level_counter = 0;
 47 | 
 48 | };
 49 | 
 50 | class Configuration {
 51 |  public:
 52 | 
 53 |   // Constructor
 54 |   Configuration()
 55 |    :
 56 |      color_mode(true),
 57 |      file_name(""),
 58 |      delimiter(";"),
 59 |      risk_level(RiskLevel::RISK_LEVEL_ALL),
 60 |      verbose(false),
 61 |      testing_mode(false) {
 62 |   }
 63 | 
 64 |   // color mode
 65 |   bool color_mode;
 66 | 
 67 |   // filename
 68 |   std::string file_name;
 69 | 
 70 |   // query delimiter
 71 |   std::string delimiter;
 72 | 
 73 |   // risk level
 74 |   RiskLevel risk_level;
 75 | 
 76 |   // verbose mode
 77 |   bool verbose;
 78 | 
 79 |   // test stream
 80 |   std::unique_ptr<std::istringstream> test_stream;
 81 | 
 82 |   // testing mode
 83 |   bool testing_mode;
 84 | 
 85 |   /// checker stats
 86 |   std::map<int, int> checker_stats;
 87 | 
 88 |   // line number
 89 |   std::uint32_t line_number;
 90 | 
 91 | };
 92 | 
 93 | std::string RiskLevelToString(const RiskLevel& risk_level);
 94 | 
 95 | std::string RiskLevelToDetailedString(const RiskLevel& risk_level);
 96 | 
 97 | std::string PatternTypeToString(const PatternType& pattern_type);
 98 | 
 99 | void ValidateRiskLevel(const Configuration &state);
100 | 
101 | void ValidateFileName(const Configuration &state);
102 | 
103 | void ValidateColorMode(const Configuration &state);
104 | 
105 | void ValidateVerbose(const Configuration &state);
106 | 
107 | void ValidateDelimiter(const Configuration &state);
108 | 
109 | 
110 | }  // namespace sqlcheck
111 | 


--------------------------------------------------------------------------------
/src/include/list.h:
--------------------------------------------------------------------------------
  1 | // LIST HEADER
  2 | 
  3 | #pragma once
  4 | 
  5 | #include "configuration.h"
  6 | 
  7 | namespace sqlcheck {
  8 | 
  9 | // LOGICAL DATABASE DESIGN
 10 | 
 11 | void CheckMultiValuedAttribute(Configuration& state,
 12 |                                const std::string& sql_statement,
 13 |                                bool& print_statement);
 14 | 
 15 | void CheckRecursiveDependency(Configuration& state,
 16 |                               const std::string& sql_statement,
 17 |                               bool& print_statement);
 18 | 
 19 | void CheckPrimaryKeyExists(Configuration& state,
 20 |                            const std::string& sql_statement,
 21 |                            bool& print_statement);
 22 | 
 23 | void CheckGenericPrimaryKey(Configuration& state,
 24 |                             const std::string& sql_statement,
 25 |                             bool& print_statement);
 26 | 
 27 | void CheckForeignKeyExists(Configuration& state,
 28 |                            const std::string& sql_statement,
 29 |                            bool& print_statement);
 30 | 
 31 | void CheckVariableAttribute(Configuration& state,
 32 |                             const std::string& sql_statement,
 33 |                             bool& print_statement);
 34 | 
 35 | void CheckMetadataTribbles(Configuration& state,
 36 |                            const std::string& sql_statement,
 37 |                            bool& print_statement);
 38 | 
 39 | // PHYSICAL DATABASE DESIGN
 40 | 
 41 | void CheckFloat(Configuration& state,
 42 |                 const std::string& sql_statement,
 43 |                 bool& print_statement);
 44 | 
 45 | void CheckValuesInDefinition(Configuration& state,
 46 |                              const std::string& sql_statement,
 47 |                              bool& print_statement);
 48 | 
 49 | void CheckExternalFiles(Configuration& state,
 50 |                         const std::string& sql_statement,
 51 |                         bool& print_statement);
 52 | 
 53 | void CheckIndexCount(Configuration& state,
 54 |                      const std::string& sql_statement,
 55 |                      bool& print_statement);
 56 | 
 57 | void CheckIndexAttributeOrder(Configuration& state,
 58 |                               const std::string& sql_statement,
 59 |                               bool& print_statement);
 60 | 
 61 | // QUERY
 62 | 
 63 | void CheckSelectStar(Configuration& state,
 64 |                      const std::string& sql_statement,
 65 |                      bool& print_statement);
 66 | 
 67 | void CheckJoinWithoutEquality(Configuration& state,
 68 |                               const std::string& sql_statement,
 69 |                               bool& print_statement);
 70 | 
 71 | void CheckNullUsage(Configuration& state,
 72 |                     const std::string& sql_statement,
 73 |                     bool& print_statement);
 74 | 
 75 | void CheckNotNullUsage(Configuration& state,
 76 |                        const std::string& sql_statement,
 77 |                        bool& print_statement);
 78 | 
 79 | void CheckConcatenation(Configuration& state,
 80 |                         const std::string& sql_statement,
 81 |                         bool& print_statement);
 82 | 
 83 | void CheckGroupByUsage(Configuration& state,
 84 |                        const std::string& sql_statement,
 85 |                        bool& print_statement);
 86 | 
 87 | void CheckOrderByRand(Configuration& state,
 88 |                       const std::string& sql_statement,
 89 |                       bool& print_statement);
 90 | 
 91 | void CheckPatternMatching(Configuration& state,
 92 |                           const std::string& sql_statement,
 93 |                           bool& print_statement);
 94 | 
 95 | void CheckSpaghettiQuery(Configuration& state,
 96 |                          const std::string& sql_statement,
 97 |                          bool& print_statement);
 98 | 
 99 | void CheckJoinCount(Configuration& state,
100 |                          const std::string& sql_statement,
101 |                          bool& print_statement);
102 | 
103 | void CheckDistinctCount(Configuration& state,
104 |                         const std::string& sql_statement,
105 |                         bool& print_statement);
106 | 
107 | void CheckImplicitColumns(Configuration& state,
108 |                           const std::string& sql_statement,
109 |                           bool& print_statement);
110 | 
111 | void CheckHaving(Configuration& state,
112 |                  const std::string& sql_statement,
113 |                  bool& print_statement);
114 | 
115 | void CheckNesting(Configuration& state,
116 |                   const std::string& sql_statement,
117 |                   bool& print_statement);
118 | 
119 | void CheckOr(Configuration& state,
120 |              const std::string& sql_statement,
121 |              bool& print_statement);
122 | 
123 | void CheckUnion(Configuration& state,
124 |                 const std::string& sql_statement,
125 |                 bool& print_statement);
126 | 
127 | void CheckDistinctJoin(Configuration& state,
128 |                        const std::string& sql_statement,
129 |                        bool& print_statement);
130 | 
131 | // APPLICATION
132 | 
133 | void CheckReadablePasswords(Configuration& state,
134 |                             const std::string& sql_statement,
135 |                             bool& print_statement);
136 | 
137 | 
138 | }  // namespace machine
139 | 


--------------------------------------------------------------------------------
/src/list.cpp:
--------------------------------------------------------------------------------
   1 | // LIST SOURCE
   2 | 
   3 | #include <regex>
   4 | 
   5 | #include "include/list.h"
   6 | #include "include/checker.h"
   7 | 
   8 | namespace sqlcheck {
   9 | 
  10 | // UTILITY
  11 | 
  12 | std::string GetTableName(const std::string& sql_statement){
  13 |   std::string table_template = "create table";
  14 |   std::size_t found = sql_statement.find(table_template);
  15 |   if (found == std::string::npos) {
  16 |     return "";
  17 |   }
  18 | 
  19 |   // Locate table name
  20 |   auto rest = sql_statement.substr(found + table_template.size());
  21 |   // Strip space at beginning
  22 |   rest = std::regex_replace(rest, std::regex("^ +| +$|( ) +"), "$1");
  23 |   // check if space or ( comes first in remaining string
  24 |   if (rest.find(' ') < rest.find('(')) {
  25 |     // space comes first
  26 |     rest = rest.substr(0, rest.find(' '));
  27 |   } else {
  28 |     // ( comes first
  29 |     rest = rest.substr(0, rest.find('('));
  30 |   }
  31 |   auto table_name = rest;
  32 | 
  33 |   return table_name;
  34 | }
  35 | 
  36 | bool IsDDLStatement(const std::string& sql_statement){
  37 |   std::string create_table_template = "create table";
  38 |   std::size_t found = sql_statement.find(create_table_template);
  39 |   if (found != std::string::npos) {
  40 |     return true;
  41 |   }
  42 | 
  43 |   std::string alter_table_template = "alter table";
  44 |   found = sql_statement.find(alter_table_template);
  45 |   if (found != std::string::npos) {
  46 |     return true;
  47 |   }
  48 | 
  49 |   return false;
  50 | }
  51 | 
  52 | bool IsCreateStatement(const std::string& sql_statement){
  53 |   std::string create_table_template = "create table";
  54 |   std::size_t found = sql_statement.find(create_table_template);
  55 |   if (found != std::string::npos) {
  56 |     return true;
  57 |   }
  58 | 
  59 |   return false;
  60 | }
  61 | 
  62 | // LOGICAL DATABASE DESIGN
  63 | 
  64 | 
  65 | void CheckMultiValuedAttribute(Configuration& state,
  66 |                                const std::string& sql_statement,
  67 |                                bool& print_statement){
  68 | 
  69 |   std::regex pattern("(id\\s+varchar)|(id\\s+text)|(id\\s+regexp)");
  70 |   std::string title = "Multi-Valued Attribute";
  71 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
  72 | 
  73 |   auto message =
  74 |       "● Store each value in its own column and row:  "
  75 |       "Storing a list of IDs as a VARCHAR/TEXT column can cause performance and data integrity "
  76 |       "problems. Querying against such a column would require using pattern-matching "
  77 |       "expressions. It is awkward and costly to join a comma-separated list to matching rows. "
  78 |       "This will make it harder to validate IDs. Think about what is the greatest number of "
  79 |       "entries this list must support? Instead of using a multi-valued attribute, "
  80 |       "consider storing it in a separate table, so that each individual value of that attribute "
  81 |       "occupies a separate row. Such an intersection table implements a many-to-many relationship "
  82 |       "between the two referenced tables. This will greatly simplify querying and validating "
  83 |       "the IDs.";
  84 | 
  85 |   CheckPattern(state,
  86 |                sql_statement,
  87 |                print_statement,
  88 |                pattern,
  89 |                RISK_LEVEL_HIGH,
  90 |                pattern_type,
  91 |                title,
  92 |                message,
  93 |                true);
  94 | 
  95 | }
  96 | 
  97 | void CheckRecursiveDependency(Configuration& state,
  98 |                               const std::string& sql_statement,
  99 |                               bool& print_statement){
 100 | 
 101 |   std::string table_name = GetTableName(sql_statement);
 102 |   if(table_name.empty()){
 103 |     return;
 104 |   }
 105 | 
 106 |   std::regex pattern("(references\\s+" + table_name+ ")");
 107 |   std::string title = "Recursive Dependency";
 108 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
 109 | 
 110 |   auto message =
 111 |       "● Avoid recursive relationships:  "
 112 |       "It’s common for data to have recursive relationships. Data may be organized in a "
 113 |       "treelike or hierarchical way. However, creating a foreign key constraint to enforce "
 114 |       "the relationship between two columns in the same table lends to awkward querying. "
 115 |       "Each level of the tree corresponds to another join. You will need to issue recursive "
 116 |       "queries to get all descendants or all ancestors of a node. "
 117 |       "A solution is to construct an additional closure table. It involves storing all paths "
 118 |       "through the tree, not just those with a direct parent-child relationship. "
 119 |       "You might want to compare different hierarchical data designs -- closure table, "
 120 |       "path enumeration, nested sets -- and pick one based on your application's needs.";
 121 | 
 122 |   CheckPattern(state,
 123 |                sql_statement,
 124 |                print_statement,
 125 |                pattern,
 126 |                RISK_LEVEL_HIGH,
 127 |                pattern_type,
 128 |                title,
 129 |                message,
 130 |                true);
 131 | 
 132 | }
 133 | 
 134 | void CheckPrimaryKeyExists(Configuration& state,
 135 |                            const std::string& sql_statement,
 136 |                            bool& print_statement){
 137 | 
 138 |   auto create_statement = IsCreateStatement(sql_statement);
 139 |   if(create_statement == false){
 140 |     return;
 141 |   }
 142 | 
 143 |   std::regex pattern("(primary key)");
 144 |   std::string title = "Primary Key Does Not Exist";
 145 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
 146 | 
 147 |   auto message =
 148 |       "● Consider adding a primary key:  "
 149 |       "A primary key constraint is important when you need to do the following:  "
 150 |       "prevent a table from containing duplicate rows, "
 151 |       "reference individual rows in queries, and "
 152 |       "support foreign key references "
 153 |       "If you don’t use primary key constraints, you create a chore for yourself:  "
 154 |       "checking for duplicate rows. More often than not, you will need to define "
 155 |       "a primary key for every table. Use compound keys when they are appropriate.";
 156 | 
 157 |   CheckPattern(state,
 158 |                sql_statement,
 159 |                print_statement,
 160 |                pattern,
 161 |                RISK_LEVEL_MEDIUM,
 162 |                pattern_type,
 163 |                title,
 164 |                message,
 165 |                false);
 166 | 
 167 | }
 168 | 
 169 | void CheckGenericPrimaryKey(Configuration& state,
 170 |                             const std::string& sql_statement,
 171 |                             bool& print_statement){
 172 | 
 173 |   auto ddl_statement = IsDDLStatement(sql_statement);
 174 |   if(ddl_statement == false){
 175 |     return;
 176 |   }
 177 | 
 178 |   std::regex pattern("(\\s+[\\(]?id\\s+)|(,id\\s+)|(\\s+id\\s+serial)");
 179 |   std::string title = "Generic Primary Key";
 180 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
 181 | 
 182 |   auto message =
 183 |       "● Skip using a generic primary key (id):  "
 184 |       "Adding an id column to every table causes several effects that make its "
 185 |       "use seem arbitrary. You might end up creating a redundant key or allow "
 186 |       "duplicate rows if you add this column in a compound key. "
 187 |       "The name id is so generic that it holds no meaning. This is especially "
 188 |       "important when you join two tables and they have the same primary "
 189 |       "key column name.";
 190 | 
 191 |   CheckPattern(state,
 192 |                sql_statement,
 193 |                print_statement,
 194 |                pattern,
 195 |                RISK_LEVEL_HIGH,
 196 |                pattern_type,
 197 |                title,
 198 |                message,
 199 |                true);
 200 | 
 201 | }
 202 | 
 203 | void CheckForeignKeyExists(Configuration& state,
 204 |                            const std::string& sql_statement,
 205 |                            bool& print_statement){
 206 | 
 207 |   auto create_statement = IsCreateStatement(sql_statement);
 208 |   if(create_statement == false){
 209 |     return;
 210 |   }
 211 | 
 212 |   std::regex pattern("(foreign key)");
 213 |   std::string title = "Foreign Key Does Not Exist";
 214 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
 215 | 
 216 |   auto message =
 217 |       "● Consider adding a foreign key:  "
 218 |       "Are you leaving out the application constraints? Even though it seems at "
 219 |       "first that skipping foreign key constraints makes your database design "
 220 |       "simpler, more flexible, or speedier, you pay for this in other ways. "
 221 |       "It becomes your responsibility to write code to ensure referential integrity "
 222 |       "manually. Use foreign key constraints to enforce referential integrity. "
 223 |       "Foreign keys have another feature you can’t mimic using application code:  "
 224 |       "cascading updates to multiple tables. This feature allows you to "
 225 |       "update or delete the parent row and lets the database takes care of any child "
 226 |       "rows that reference it. The way you declare the ON UPDATE or ON DELETE clauses "
 227 |       "in the foreign key constraint allow you to control the result of a cascading "
 228 |       "operation. Make your database mistake-proof with constraints.";
 229 | 
 230 |   CheckPattern(state,
 231 |                sql_statement,
 232 |                print_statement,
 233 |                pattern,
 234 |                RISK_LEVEL_MEDIUM,
 235 |                pattern_type,
 236 |                title,
 237 |                message,
 238 |                false);
 239 | 
 240 | }
 241 | 
 242 | void CheckVariableAttribute(Configuration& state,
 243 |                             const std::string& sql_statement,
 244 |                             bool& print_statement){
 245 | 
 246 |   std::string table_name = GetTableName(sql_statement);
 247 |   if(table_name.empty()){
 248 |     return;
 249 |   }
 250 | 
 251 |   auto found = table_name.find("attribute");
 252 |   if (found == std::string::npos) {
 253 |     return;
 254 |   }
 255 | 
 256 |   std::regex pattern("(attribute)");
 257 |   std::string title = "Entity-Attribute-Value Pattern";
 258 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
 259 | 
 260 |   auto message =
 261 |       "● Dynamic schema with variable attributes:  "
 262 |       "Are you trying to create a schema where you can define new attributes "
 263 |       "at runtime.? This involves storing attributes as rows in an attribute table. "
 264 |       "This is referred to as the Entity-Attribute-Value or schemaless pattern. "
 265 |       "When you use this pattern,  you sacrifice many advantages that a conventional "
 266 |       "database design would have given you. You can't make mandatory attributes. "
 267 |       "You can't enforce referential integrity. You might find that attributes are "
 268 |       "not being named consistently. A solution is to store all related types in one table, "
 269 |       "with distinct columns for every attribute that exists in any type "
 270 |       "(Single Table Inheritance). Use one attribute to define the subtype of a given row. "
 271 |       "Many attributes are subtype-specific, and these columns must "
 272 |       "be given a null value on any row storing an object for which the attribute "
 273 |       "does not apply; the columns with non-null values become sparse. "
 274 |       "Another solution is to create a separate table for each subtype "
 275 |       "(Concrete Table Inheritance). A third solution mimics inheritance, "
 276 |       "as though tables were object-oriented classes (Class Table Inheritance). "
 277 |       "Create a single table for the base type, containing attributes common to "
 278 |       "all subtypes. Then for each subtype, create another table, with a primary key "
 279 |       "that also serves as a foreign key to the base table. "
 280 |       "If you have many subtypes or if you must support new attributes frequently, "
 281 |       "you can add a BLOB column to store data in a format such as XML or JSON, "
 282 |       "which encodes both the attribute names and their values. "
 283 |       "This design is best when you can’t limit yourself to a finite set of subtypes "
 284 |       "and when you need complete flexibility to define new attributes at any time.";
 285 | 
 286 |   CheckPattern(state,
 287 |                sql_statement,
 288 |                print_statement,
 289 |                pattern,
 290 |                RISK_LEVEL_MEDIUM,
 291 |                pattern_type,
 292 |                title,
 293 |                message,
 294 |                true);
 295 | 
 296 | }
 297 | 
 298 | void CheckMetadataTribbles(Configuration& state,
 299 |                            const std::string& sql_statement,
 300 |                            bool& print_statement){
 301 | 
 302 |   auto ddl_statement = IsDDLStatement(sql_statement);
 303 |   if(ddl_statement == false){
 304 |     return;
 305 |   }
 306 | 
 307 |   std::regex pattern("[A-za-z\\-_@]+[0-9]+ ");
 308 |   std::string title = "Metadata Tribbles";
 309 |   PatternType pattern_type = PatternType::PATTERN_TYPE_LOGICAL_DATABASE_DESIGN;
 310 | 
 311 |   std::string message1 =
 312 |       "● Breaking down a table or column by year/user/etc.:  "
 313 |       "You might be trying to split a single column into multiple columns, "
 314 |       "using column names based on distinct values in another attribute. "
 315 |       "For each year or user, you will need to add one more column or table. "
 316 |       "You are mixing metadata with data. You will now need to make sure that "
 317 |       "the primary key values are unique across all the split columns or tables. "
 318 |       "The solution is to use a feature called sharding or horizontal partitioning. "
 319 |       "(PARTITION BY HASH ( YEAR(...) ). With this feature, you can gain the "
 320 |       "benefits of splitting a large table without the drawbacks. "
 321 |       "Partitioning is not defined in the SQL standard, so each brand of database "
 322 |       "implements it in their own nonstandard way. "
 323 |       "Another remedy for metadata tribbles is to create a dependent table. "
 324 |       "Instead of one row per entity with multiple columns for each year, "
 325 |       "use multiple rows. Don't let data spawn metadata.";
 326 | 
 327 |   std::string message2 =
 328 |       "● Store each value with the same meaning in a single column:  "
 329 |       "Creating multiple columns in a table with the same prefix "
 330 |       "indicates that you are trying to store a multivalued attribute. "
 331 |       "This design makes it hard to add or remove values, "
 332 |       "to ensure the uniqueness of values, and handling growing sets of values. "
 333 |       "The best solution is to create a dependent table with one column for the "
 334 |       "multivalued attribute. Store the multiple values in multiple rows instead of "
 335 |       "multiple columns and define a foreign key in the dependent table to associate "
 336 |       "the values to its parent row.";
 337 | 
 338 |   auto message = message1 + "\n" + message2;
 339 | 
 340 |   CheckPattern(state,
 341 |                sql_statement,
 342 |                print_statement,
 343 |                pattern,
 344 |                RISK_LEVEL_MEDIUM,
 345 |                pattern_type,
 346 |                title,
 347 |                message,
 348 |                true);
 349 | 
 350 | }
 351 | 
 352 | // PHYSICAL DATABASE DESIGN
 353 | 
 354 | void CheckFloat(Configuration& state,
 355 |                 const std::string& sql_statement,
 356 |                 bool& print_statement){
 357 | 
 358 |   std::regex pattern("(float)|(real)|(double precision)|(0\\.000[0-9]*)");
 359 |   std::string title = "Imprecise Data Type";
 360 |   PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN;
 361 | 
 362 |   auto message =
 363 |       "● Use precise data types:  "
 364 |       "Virtually any use of FLOAT, REAL, or DOUBLE PRECISION data types is suspect. "
 365 |       "Most applications that use floating-point numbers don't require the range of "
 366 |       "values supported by IEEE 754 formats. The cumulative impact of inexact  "
 367 |       "floating-point numbers is severe when calculating aggregates. "
 368 |       "Instead of FLOAT or its siblings, use the NUMERIC or DECIMAL SQL data types "
 369 |       "for fixed-precision fractional numbers. These data types store numeric values "
 370 |       "exactly, up to the precision you specify in the column definition. "
 371 |       "Do not use FLOAT if you can avoid it.";
 372 | 
 373 |   CheckPattern(state,
 374 |                sql_statement,
 375 |                print_statement,
 376 |                pattern,
 377 |                RISK_LEVEL_MEDIUM,
 378 |                pattern_type,
 379 |                title,
 380 |                message,
 381 |                true);
 382 | 
 383 | }
 384 | 
 385 | void CheckValuesInDefinition(Configuration& state,
 386 |                              const std::string& sql_statement,
 387 |                              bool& print_statement){
 388 | 
 389 |   auto ddl_statement = IsDDLStatement(sql_statement);
 390 |   if(ddl_statement == false){
 391 |     return;
 392 |   }
 393 | 
 394 |   std::regex pattern("( enum)|( in \\()");
 395 |   std::string title = "Values In Definition";
 396 |   PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN;
 397 | 
 398 |   auto message =
 399 |       "● Don't specify values in column definition:  "
 400 |       "With enum, you declare the values as strings, "
 401 |       "but internally the column is stored as the ordinal number of the string "
 402 |       "in the enumerated list. The storage is therefore compact, but when you "
 403 |       "sort a query by this column, the result is ordered by the ordinal value, "
 404 |       "not alphabetically by the string value. You may not expect this behavior. "
 405 |       "There's no syntax to add or remove a value from an ENUM or check constraint; "
 406 |       "you can only redefine the column with a new set of values. "
 407 |       "Moreover, if you make a value obsolete, you could upset historical data. "
 408 |       "As a matter of policy, changing metadata — that is, changing the definition "
 409 |       "of tables and columns—should be infrequent and with attention to testing and "
 410 |       "quality assurance. There's a better solution to restrict values in a column:  "
 411 |       "create a lookup table with one row for each value you allow. "
 412 |       "Then declare a foreign key constraint on the old table referencing "
 413 |       "the new table. "
 414 |       "Use metadata when validating against a fixed set of values. "
 415 |       "Use data when validating against a fluid set of values.";
 416 | 
 417 |   CheckPattern(state,
 418 |                sql_statement,
 419 |                print_statement,
 420 |                pattern,
 421 |                RISK_LEVEL_MEDIUM,
 422 |                pattern_type,
 423 |                title,
 424 |                message,
 425 |                true);
 426 | 
 427 | }
 428 | 
 429 | void CheckExternalFiles(Configuration& state,
 430 |                         const std::string& sql_statement,
 431 |                         bool& print_statement){
 432 | 
 433 |   std::regex pattern("(path varchar)|(unlink\\s?\\()");
 434 |   std::string title = "Files Are Not SQL Data Types";
 435 |   PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN;
 436 | 
 437 |   auto message =
 438 |       "● Resources outside the database are not managed by the database:  "
 439 |       "It's common for programmers to be unequivocal that we should always "
 440 |       "store files external to the database. "
 441 |       "Files don't obey DELETE, transaction isolation, rollback, or work well with "
 442 |       "database backup tools. They do not obey SQL access privileges and are not SQL "
 443 |       "data types. "
 444 |       "Resources outside the database are not managed by the database. "
 445 |       "You should consider storing blobs inside the database instead of in "
 446 |       "external files. You can save the contents of a BLOB column to a file.";
 447 | 
 448 |   CheckPattern(state,
 449 |                sql_statement,
 450 |                print_statement,
 451 |                pattern,
 452 |                RISK_LEVEL_MEDIUM,
 453 |                pattern_type,
 454 |                title,
 455 |                message,
 456 |                true);
 457 | 
 458 | }
 459 | 
 460 | void CheckIndexCount(Configuration& state,
 461 |                      const std::string& sql_statement,
 462 |                      bool& print_statement){
 463 | 
 464 |   auto create_statement = IsCreateStatement(sql_statement);
 465 |   if(create_statement == false){
 466 |     return;
 467 |   }
 468 | 
 469 |   std::size_t min_count = 3;
 470 |   std::regex pattern("(index)");
 471 |   std::string title = "Too Many Indexes";
 472 |   PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN;
 473 | 
 474 |   auto message =
 475 |       "● Don't create too many indexes:  "
 476 |       "You benefit from an index only if you run queries that use that index. "
 477 |       "There's no benefit to creating indexes that you don't use. "
 478 |       "If you cover a database table with indexes, you incur a lot of overhead "
 479 |       "with no assurance of payoff. "
 480 |       "Consider dropping unnecessary indexes. "
 481 |       "If an index provides all the columns we need, then we don't need to read "
 482 |       "rows of data from the table at all. Consider using such covering indexes. "
 483 |       "Know your data, know your queries, and maintain the right set of indexes.";
 484 | 
 485 |   CheckPattern(state,
 486 |                sql_statement,
 487 |                print_statement,
 488 |                pattern,
 489 |                RISK_LEVEL_MEDIUM,
 490 |                pattern_type,
 491 |                title,
 492 |                message,
 493 |                true,
 494 |                min_count);
 495 | 
 496 | }
 497 | 
 498 | void CheckIndexAttributeOrder(Configuration& state,
 499 |                               const std::string& sql_statement,
 500 |                               bool& print_statement){
 501 | 
 502 | 
 503 |   std::regex pattern("(create index)");
 504 |   std::string title = "Index Attribute Order";
 505 |   PatternType pattern_type = PatternType::PATTERN_TYPE_PHYSICAL_DATABASE_DESIGN;
 506 | 
 507 |   auto message =
 508 |       "● Align the index attribute order with queries:  "
 509 |       "If you create a compound index for the columns, make sure that the query "
 510 |       "attributes are in the same order as the index attributes, so that the DBMS "
 511 |       "can use the index while processing the query. "
 512 |       "If the query and index attribute orders are not aligned, then the DBMS might "
 513 |       "be unable to use the index during query processing. "
 514 |       "EX: CREATE INDEX TelephoneBook ON Accounts(last_name, first_name); "
 515 |       "SELECT * FROM Accounts ORDER BY first_name, last_name;";
 516 | 
 517 |   CheckPattern(state,
 518 |                sql_statement,
 519 |                print_statement,
 520 |                pattern,
 521 |                RISK_LEVEL_LOW,
 522 |                pattern_type,
 523 |                title,
 524 |                message,
 525 |                true);
 526 | 
 527 | }
 528 | 
 529 | 
 530 | // QUERY
 531 | 
 532 | void CheckSelectStar(Configuration& state,
 533 |                      const std::string& sql_statement,
 534 |                      bool& print_statement){
 535 | 
 536 |   std::regex pattern("(select\\s+\\*)");
 537 |   std::string title = "SELECT *";
 538 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 539 | 
 540 |   std::string message1 =
 541 |       "● Inefficiency in moving data to the consumer:  "
 542 |       "When you SELECT *, you're often retrieving more columns from the database than "
 543 |       "your application really needs to function. This causes more data to move from "
 544 |       "the database server to the client, slowing access and increasing load on your "
 545 |       "machines, as well as taking more time to travel across the network. This is "
 546 |       "especially true when someone adds new columns to underlying tables that didn't "
 547 |       "exist and weren't needed when the original consumers coded their data access.";
 548 | 
 549 |   std::string message2 =
 550 |       "● Indexing issues:  "
 551 |       "Consider a scenario where you want to tune a query to a high level of performance. "
 552 |       "If you were to use *, and it returned more columns than you actually needed, "
 553 |       "the server would often have to perform more expensive methods to retrieve your "
 554 |       "data than it otherwise might. For example, you wouldn't be able to create an index "
 555 |       "which simply covered the columns in your SELECT list, and even if you did "
 556 |       "(including all columns [shudder]), the next guy who came around and added a column "
 557 |       "to the underlying table would cause the optimizer to ignore your optimized covering "
 558 |       "index, and you'd likely find that the performance of your query would drop "
 559 |       "substantially for no readily apparent reason.";
 560 | 
 561 |   std::string message3 =
 562 |       "● Binding Problems:  "
 563 |       "When you SELECT *, it's possible to retrieve two columns of the same name from two "
 564 |       "different tables. This can often crash your data consumer. Imagine a query that joins "
 565 |       "two tables, both of which contain a column called \"ID\". How would a consumer know "
 566 |       "which was which? SELECT * can also confuse views (at least in some versions SQL Server) "
 567 |       "when underlying table structures change -- the view is not rebuilt, and the data which "
 568 |       "comes back can be nonsense. And the worst part of it is that you can take care to name "
 569 |       "your columns whatever you want, but the next guy who comes along might have no way of "
 570 |       "knowing that he has to worry about adding a column which will collide with your "
 571 |       "already-developed names.";
 572 | 
 573 |   auto message = message1 + "\n" + message2 + "\n" + message3;
 574 | 
 575 |   CheckPattern(state,
 576 |                sql_statement,
 577 |                print_statement,
 578 |                pattern,
 579 |                RISK_LEVEL_HIGH,
 580 |                pattern_type,
 581 |                title,
 582 |                message,
 583 |                true);
 584 | 
 585 | }
 586 | 
 587 | void CheckJoinWithoutEquality(Configuration& state,
 588 |                               const std::string& sql_statement,
 589 |                               bool& print_statement) {
 590 |   std::regex pattern("join[\\s\\._]?[^=]+?(left|right|join|where|case)");
 591 |   std::string title = "JOIN Without Equality Check";
 592 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 593 | 
 594 |   auto message =
 595 |       "● Use = with JOIN: "
 596 |       "JOIN should always have an equality check to ensure proper scope of records. ";
 597 | 
 598 |   CheckPattern(state,
 599 |                sql_statement,
 600 |                print_statement,
 601 |                pattern,
 602 |                RISK_LEVEL_HIGH,
 603 |                pattern_type,
 604 |                title,
 605 |                message,
 606 |                true);
 607 | }
 608 | 
 609 | void CheckNullUsage(Configuration& state,
 610 |                     const std::string& sql_statement,
 611 |                     bool& print_statement) {
 612 | 
 613 |   std::regex pattern("(null)");
 614 |   std::string title = "NULL Usage";
 615 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 616 | 
 617 |   auto message =
 618 |       "● Use NULL as a Unique Value:  "
 619 |       "NULL is not the same as zero. A number ten greater than an unknown is still an unknown. "
 620 |       "NULL is not the same as a string of zero length. "
 621 |       "Combining any string with NULL in standard SQL returns NULL. "
 622 |       "NULL is not the same as false. Boolean expressions with AND, OR, and NOT also produce "
 623 |       "results that some people find confusing. "
 624 |       "When you declare a column as NOT NULL, it should be because it would make no sense "
 625 |       "for the row to exist without a value in that column. "
 626 |       "Use null to signify a missing value for any data type.";
 627 | 
 628 |   CheckPattern(state,
 629 |                sql_statement,
 630 |                print_statement,
 631 |                pattern,
 632 |                RISK_LEVEL_NONE,
 633 |                pattern_type,
 634 |                title,
 635 |                message,
 636 |                true);
 637 | 
 638 | }
 639 | 
 640 | void CheckNotNullUsage(Configuration& state,
 641 |                        const std::string& sql_statement,
 642 |                        bool& print_statement) {
 643 | 
 644 |   auto create_statement = IsCreateStatement(sql_statement);
 645 |   if(create_statement == false){
 646 |     return;
 647 |   }
 648 | 
 649 |   std::regex pattern("(not null)");
 650 |   std::string title = "NOT NULL Usage";
 651 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 652 | 
 653 |   auto message =
 654 |       "● Use NOT NULL only if the column cannot have a missing value:  "
 655 |       "When you declare a column as NOT NULL, it should be because it would make no sense "
 656 |       "for the row to exist without a value in that column. "
 657 |       "Use null to signify a missing value for any data type.";
 658 | 
 659 |   CheckPattern(state,
 660 |                sql_statement,
 661 |                print_statement,
 662 |                pattern,
 663 |                RISK_LEVEL_NONE,
 664 |                pattern_type,
 665 |                title,
 666 |                message,
 667 |                true);
 668 | 
 669 | }
 670 | 
 671 | void CheckConcatenation(Configuration& state,
 672 |                         const std::string& sql_statement,
 673 |                         bool& print_statement) {
 674 | 
 675 | 
 676 |   std::regex pattern("\\|\\|");
 677 |   std::string title = "String Concatenation";
 678 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 679 | 
 680 |   auto message =
 681 |       "● Use COALESCE for string concatenation of nullable columns:  "
 682 |       "You may need to force a column or expression to be non-null for the sake of "
 683 |       "simplifying the query logic, but you don't want that value to be stored. "
 684 |       "Use COALESCE function to construct the concatenated expression so that a "
 685 |       "null-valued column doesn't make the whole expression become null. "
 686 |       "EX: SELECT first_name || COALESCE(' ' || middle_initial || ' ', ' ') || last_name "
 687 |       "AS full_name FROM Accounts;";
 688 | 
 689 |   CheckPattern(state,
 690 |                sql_statement,
 691 |                print_statement,
 692 |                pattern,
 693 |                RISK_LEVEL_LOW,
 694 |                pattern_type,
 695 |                title,
 696 |                message,
 697 |                true);
 698 | 
 699 | }
 700 | 
 701 | void CheckGroupByUsage(Configuration& state,
 702 |                        const std::string& sql_statement,
 703 |                        bool& print_statement){
 704 | 
 705 |   std::regex pattern("(group by)");
 706 |   std::string title = "GROUP BY Usage";
 707 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 708 | 
 709 |   auto message =
 710 |       "● Do not reference non-grouped columns:  "
 711 |       "Every column in the select-list of a query must have a single value row "
 712 |       "per row group. This is called the Single-Value Rule. "
 713 |       "Columns named in the GROUP BY clause are guaranteed to be exactly one value "
 714 |       "per group, no matter how many rows the group matches. "
 715 |       "Most DBMSs report an error if you try to run any query that tries to return "
 716 |       "a column other than those columns named in the GROUP BY clause or as "
 717 |       "arguments to aggregate functions. "
 718 |       "Every expression in the select list must be contained in either an "
 719 |       "aggregate function or the GROUP BY clause. "
 720 |       "Follow the single-value rule to avoid ambiguous query results.";
 721 | 
 722 |   CheckPattern(state,
 723 |                sql_statement,
 724 |                print_statement,
 725 |                pattern,
 726 |                RISK_LEVEL_LOW,
 727 |                pattern_type,
 728 |                title,
 729 |                message,
 730 |                true);
 731 | 
 732 | 
 733 | }
 734 | 
 735 | void CheckOrderByRand(Configuration& state,
 736 |                       const std::string& sql_statement,
 737 |                       bool& print_statement){
 738 | 
 739 |   std::regex pattern("(order by rand\\()");
 740 |   std::string title = "ORDER BY RAND Usage";
 741 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 742 | 
 743 |   auto message =
 744 |       "● Sorting by a nondeterministic expression (RAND()) means the sorting cannot benefit from an index:  "
 745 |       "There is no index containing the values returned by the random function. "
 746 |       "That’s the point of them being ran- dom: they are different and "
 747 |       "unpredictable each time they're selected. This is a problem for the performance "
 748 |       "of the query, because using an index is one of the best ways of speeding up "
 749 |       "sorting. The consequence of not using an index is that the query result set "
 750 |       "has to be sorted by the database using a slow table scan. "
 751 |       "One technique that avoids sorting the table is to choose a random value "
 752 |       "between 1 and the greatest primary key value. "
 753 |       "Still another technique that avoids problems found in the preceding alternatives "
 754 |       "is to count the rows in the data set and return a random number between 0 and "
 755 |       "the count. Then use this number as an offset when querying the data set. "
 756 |       "Some queries just cannot be optimized; consider taking a different approach.";
 757 | 
 758 |   CheckPattern(state,
 759 |                sql_statement,
 760 |                print_statement,
 761 |                pattern,
 762 |                RISK_LEVEL_MEDIUM,
 763 |                pattern_type,
 764 |                title,
 765 |                message,
 766 |                true);
 767 | 
 768 | }
 769 | 
 770 | void CheckPatternMatching(Configuration& state,
 771 |                           const std::string& sql_statement,
 772 |                           bool& print_statement){
 773 | 
 774 |   std::regex pattern("(\blike\b)|(\bregexp\b)|(\bsimilar to\b)");
 775 |   std::string title = "Pattern Matching Usage";
 776 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 777 | 
 778 |   auto message =
 779 |       "● Avoid using vanilla pattern matching:  "
 780 |       "The most important disadvantage of pattern-matching operators is that "
 781 |       "they have poor performance. A second problem of simple pattern-matching using LIKE "
 782 |       "or regular expressions is that it can find unintended matches. "
 783 |       "It's best to use a specialized search engine technology like Apache Lucene, instead of SQL. "
 784 |       "Another alternative is to reduce the recurring cost of search by saving the result. "
 785 |       "Consider using vendor extensions like FULLTEXT INDEX in MySQL. "
 786 |       "More broadly, you don't have to use SQL to solve every problem.";
 787 | 
 788 |   CheckPattern(state,
 789 |                sql_statement,
 790 |                print_statement,
 791 |                pattern,
 792 |                RISK_LEVEL_MEDIUM,
 793 |                pattern_type,
 794 |                title,
 795 |                message,
 796 |                true);
 797 | 
 798 | }
 799 | 
 800 | void CheckSpaghettiQuery(Configuration& state,
 801 |                          const std::string& sql_statement,
 802 |                          bool& print_statement){
 803 | 
 804 |   std::regex true_pattern(".+?");
 805 |   std::regex false_pattern("pattern must not exist");
 806 |   std::regex pattern;
 807 | 
 808 |   std::string title = "Spaghetti Query Alert";
 809 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 810 |   std::size_t spaghetti_query_char_count = 500;
 811 | 
 812 |   if(sql_statement.size() >= spaghetti_query_char_count){
 813 |     pattern = true_pattern;
 814 |   }
 815 |   else {
 816 |     pattern = false_pattern;
 817 |   }
 818 | 
 819 |   auto message =
 820 |       "● Split up a complex spaghetti query into several simpler queries:  "
 821 |       "SQL is a very expressive language—you can accomplish a lot in a single query or statement. "
 822 |       "But that doesn't mean it's mandatory or even a good idea to approach every task with the "
 823 |       "assumption it has to be done in one line of code. "
 824 |       "One common unintended consequence of producing all your results in one query is "
 825 |       "a Cartesian product. This happens when two of the tables in the query have no condition "
 826 |       "restricting their relationship. Without such a restriction, the join of two tables pairs "
 827 |       "each row in the first table to every row in the other table. Each such pairing becomes a "
 828 |       "row of the result set, and you end up with many more rows than you expect. "
 829 |       "It's important to consider that these queries are simply hard to write, hard to modify, "
 830 |       "and hard to debug. You should expect to get regular requests for incremental enhancements "
 831 |       "to your database applications. Managers want more complex reports and more fields in a "
 832 |       "user interface. If you design intricate, monolithic SQL queries, it's more costly and "
 833 |       "time-consuming to make enhancements to them. Your time is worth something, both to you "
 834 |       "and to your project. "
 835 |       "Split up a complex spaghetti query into several simpler queries. "
 836 |       "When you split up a complex SQL query, the result may be many similar queries, "
 837 |       "perhaps varying slightly depending on data values. Writing these queries is a chore, "
 838 |       "so it's a good application of SQL code generation. "
 839 |       "Although SQL makes it seem possible to solve a complex problem in a single line of code, "
 840 |       "don't be tempted to build a house of cards.";
 841 | 
 842 |   CheckPattern(state,
 843 |                sql_statement,
 844 |                print_statement,
 845 |                pattern,
 846 |                RISK_LEVEL_LOW,
 847 |                pattern_type,
 848 |                title,
 849 |                message,
 850 |                true);
 851 | 
 852 | }
 853 | 
 854 | void CheckJoinCount(Configuration& state,
 855 |                     const std::string& sql_statement,
 856 |                     bool& print_statement){
 857 | 
 858 |   std::regex pattern("(\bjoin\b)");
 859 |   std::string title = "Reduce Number of JOINs";
 860 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 861 |   std::size_t min_count = 5;
 862 | 
 863 |   auto message =
 864 |       "● Reduce Number of JOINs:  "
 865 |       "Too many JOINs is a symptom of complex spaghetti queries. Consider splitting "
 866 |       "up the complex query into many simpler queries, and reduce the number of JOINs";
 867 | 
 868 |   CheckPattern(state,
 869 |                sql_statement,
 870 |                print_statement,
 871 |                pattern,
 872 |                RISK_LEVEL_LOW,
 873 |                pattern_type,
 874 |                title,
 875 |                message,
 876 |                true,
 877 |                min_count);
 878 | 
 879 | }
 880 | 
 881 | void CheckDistinctCount(Configuration& state,
 882 |                         const std::string& sql_statement,
 883 |                         bool& print_statement){
 884 | 
 885 |   std::regex pattern("(\bdistinct\b)");
 886 |   std::string title = "Eliminate Unnecessary DISTINCT Conditions";
 887 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 888 |   std::size_t min_count = 5;
 889 | 
 890 |   auto message =
 891 |       "● Eliminate Unnecessary DISTINCT Conditions:  "
 892 |       "Too many DISTINCT conditions is a symptom of complex spaghetti queries. "
 893 |       "Consider splitting up the complex query into many simpler queries, "
 894 |       "and reduce the number of DISTINCT conditions "
 895 |       "It is possible that the DISTINCT condition has no effect if a primary key "
 896 |       "column is part of the result set of columns";
 897 | 
 898 |   CheckPattern(state,
 899 |                sql_statement,
 900 |                print_statement,
 901 |                pattern,
 902 |                RISK_LEVEL_LOW,
 903 |                pattern_type,
 904 |                title,
 905 |                message,
 906 |                true,
 907 |                min_count);
 908 | 
 909 | }
 910 | 
 911 | void CheckImplicitColumns(Configuration& state,
 912 |                           const std::string& sql_statement,
 913 |                           bool& print_statement){
 914 | 
 915 |   std::regex pattern("(insert into \\S+ values)");
 916 |   std::string title = "Implicit Column Usage";
 917 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 918 | 
 919 |   auto message =
 920 |       "● Explicitly name columns:  "
 921 |       "Although using wildcards and unnamed columns satisfies the goal "
 922 |       "of less typing, this habit creates several hazards. "
 923 |       "This can break application refactoring and can harm performance. "
 924 |       "Always spell out all the columns you need, instead of relying on "
 925 |       "wild-cards or implicit column lists.";
 926 | 
 927 |   CheckPattern(state,
 928 |                sql_statement,
 929 |                print_statement,
 930 |                pattern,
 931 |                RISK_LEVEL_LOW,
 932 |                pattern_type,
 933 |                title,
 934 |                message,
 935 |                true);
 936 | 
 937 | }
 938 | 
 939 | void CheckHaving(Configuration& state,
 940 |                  const std::string& sql_statement,
 941 |                  bool& print_statement){
 942 | 
 943 |   std::regex pattern("(\bhaving\b)");
 944 |   std::string title = "HAVING Clause Usage";
 945 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 946 | 
 947 |   auto message =
 948 |       "● Consider removing the HAVING clause:  "
 949 |       "Rewriting the query's HAVING clause into a predicate will enable the "
 950 |       "use of indexes during query processing. "
 951 |       "EX: SELECT s.cust_id,count(s.cust_id) FROM SH.sales s GROUP BY s.cust_id "
 952 |       "HAVING s.cust_id != '1660' AND s.cust_id != '2'; can be rewritten as:  "
 953 |       "SELECT s.cust_id,count(cust_id) FROM SH.sales s WHERE s.cust_id != '1660' "
 954 |       "AND s.cust_id !='2' GROUP BY s.cust_id;";
 955 | 
 956 |   CheckPattern(state,
 957 |                sql_statement,
 958 |                print_statement,
 959 |                pattern,
 960 |                RISK_LEVEL_LOW,
 961 |                pattern_type,
 962 |                title,
 963 |                message,
 964 |                true);
 965 | 
 966 | }
 967 | 
 968 | void CheckNesting(Configuration& state,
 969 |                   const std::string& sql_statement,
 970 |                   bool& print_statement){
 971 | 
 972 |   std::regex pattern("(\bselect\b)");
 973 |   std::string title = "Nested sub queries";
 974 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
 975 |   std::size_t min_count = 2;
 976 | 
 977 |   auto message =
 978 |       "● Un-nest sub queries:  "
 979 |       " Rewriting nested queries as joins often leads to more efficient "
 980 |       "execution and more effective optimization. In general, sub-query unnesting "
 981 |       "is always done for correlated sub-queries with, at most, one table in "
 982 |       "the FROM clause, which are used in ANY, ALL, and EXISTS predicates. "
 983 |       "A uncorrelated sub-query, or a sub-query with more than one table in "
 984 |       "the FROM clause, is flattened if it can be decided, based on the query "
 985 |       "semantics, that the sub-query returns at most one row. "
 986 |       "EX: SELECT * FROM SH.products p WHERE p.prod_id = (SELECT s.prod_id FROM SH.sales "
 987 |       "s WHERE s.cust_id = 100996 AND s.quantity_sold = 1 ); can be rewritten as:  "
 988 |       "SELECT p.* FROM SH.products p, sales s WHERE p.prod_id = s.prod_id AND "
 989 |       "s.cust_id = 100996 AND s.quantity_sold = 1;";
 990 | 
 991 |   CheckPattern(state,
 992 |                sql_statement,
 993 |                print_statement,
 994 |                pattern,
 995 |                RISK_LEVEL_LOW,
 996 |                pattern_type,
 997 |                title,
 998 |                message,
 999 |                true,
1000 |                min_count);
1001 | 
1002 | 
1003 | }
1004 | 
1005 | void CheckOr(Configuration& state,
1006 |                  const std::string& sql_statement,
1007 |                  bool& print_statement){
1008 | 
1009 |   std::regex pattern("(\bor\b)");
1010 |   std::string title = "OR Usage";
1011 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
1012 | 
1013 |   auto message =
1014 |       "● Consider using an IN predicate when querying an indexed column:  "
1015 |       "The IN-list predicate can be exploited for indexed retrieval and also, "
1016 |       "the optimizer can sort the IN-list to match the sort sequence of the index, "
1017 |       "leading to more efficient retrieval. Note that the IN-list must contain only "
1018 |       "constants, or values that are constant during one execution of the query block, "
1019 |       "such as outer references. "
1020 |       "EX: SELECT s.* FROM SH.sales s WHERE s.prod_id = 14 OR s.prod_id = 17; "
1021 |       "can be rewritten as:  "
1022 |       "SELECT s.* FROM SH.sales s WHERE s.prod_id IN (14, 17);";
1023 | 
1024 |   CheckPattern(state,
1025 |                sql_statement,
1026 |                print_statement,
1027 |                pattern,
1028 |                RISK_LEVEL_LOW,
1029 |                pattern_type,
1030 |                title,
1031 |                message,
1032 |                true);
1033 | 
1034 | }
1035 | 
1036 | void CheckUnion(Configuration& state,
1037 |                 const std::string& sql_statement,
1038 |                 bool& print_statement){
1039 | 
1040 |   std::regex pattern("(union)");
1041 |   std::string title = "UNION Usage";
1042 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
1043 | 
1044 |   auto message =
1045 |       "● Consider using UNION ALL if you do not care about duplicates:  "
1046 |       "Unlike UNION which removes duplicates, UNION ALL allows duplicate tuples. "
1047 |       "If you do not care about duplicate tuples, then using UNION ALL would be "
1048 |       "a faster option.";
1049 | 
1050 |   CheckPattern(state,
1051 |                sql_statement,
1052 |                print_statement,
1053 |                pattern,
1054 |                RISK_LEVEL_LOW,
1055 |                pattern_type,
1056 |                title,
1057 |                message,
1058 |                true);
1059 | 
1060 | }
1061 | 
1062 | void CheckDistinctJoin(Configuration& state,
1063 |                        const std::string& sql_statement,
1064 |                        bool& print_statement){
1065 | 
1066 |   std::regex pattern("(distinct.*join)");
1067 |   std::string title = "DISTINCT & JOIN Usage";
1068 |   PatternType pattern_type = PatternType::PATTERN_TYPE_QUERY;
1069 | 
1070 |   auto message =
1071 |       "● Consider using a sub-query with EXISTS instead of DISTINCT:  "
1072 |       "The DISTINCT keyword removes duplicates after sorting the tuples. "
1073 |       "Instead, consider using a sub query with the EXISTS keyword, you can avoid "
1074 |       "having to return an entire table. "
1075 |       "EX: SELECT DISTINCT c.country_id, c.country_name FROM SH.countries c, "
1076 |       "SH.customers e WHERE e.country_id = c.country_id; "
1077 |       "can be rewritten to:  "
1078 |       "SELECT c.country_id, c.country_name FROM SH.countries c WHERE  EXISTS "
1079 |       "(SELECT 'X' FROM  SH.customers e WHERE e.country_id = c.country_id);";
1080 | 
1081 |   CheckPattern(state,
1082 |                sql_statement,
1083 |                print_statement,
1084 |                pattern,
1085 |                RISK_LEVEL_LOW,
1086 |                pattern_type,
1087 |                title,
1088 |                message,
1089 |                true);
1090 | 
1091 | }
1092 | 
1093 | 
1094 | 
1095 | // APPLICATION
1096 | 
1097 | void CheckReadablePasswords(Configuration& state,
1098 |                             const std::string& sql_statement,
1099 |                             bool& print_statement){
1100 | 
1101 |   std::regex pattern("(password varchar)|(password text)|(password =)| "
1102 |       "(pwd varchar)|(pwd text)|(pwd =)");
1103 |   std::string title = "Readable Passwords";
1104 |   PatternType pattern_type = PatternType::PATTERN_TYPE_APPLICATION;
1105 | 
1106 |   auto message =
1107 |       "● Do not store readable passwords:  "
1108 |       "It’s not secure to store a password in clear text or even to pass it over the "
1109 |       "network in the clear. If an attacker can read the SQL statement you use to "
1110 |       "insert a password, they can see the password plainly. "
1111 |       "Additionally, interpolating the user's input string into the SQL query in plain text "
1112 |       "exposes it to discovery by an attacker. "
1113 |       "If you can read passwords, so can a hacker. "
1114 |       "The solution is to encode the password using a one-way cryptographic hash  "
1115 |       "function. This function transforms its input string into a new string, "
1116 |       "called the hash, that is unrecognizable. "
1117 |       "Use a salt to thwart dictionary attacks. Don't put the plain-text password "
1118 |       "into the SQL query. Instead, compute the hash in your application code, "
1119 |       "and use only the hash in the SQL query.";
1120 | 
1121 |   CheckPattern(state,
1122 |                sql_statement,
1123 |                print_statement,
1124 |                pattern,
1125 |                RISK_LEVEL_LOW,
1126 |                pattern_type,
1127 |                title,
1128 |                message,
1129 |                true);
1130 | 
1131 | }
1132 | 
1133 | }  // namespace machine
1134 | 
1135 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | // MAIN SOURCE
  2 | 
  3 | #include <iostream>
  4 | #include <fstream>
  5 | 
  6 | #include "checker.h"
  7 | #include "include/configuration.h"
  8 | 
  9 | #include "gflags/gflags.h"
 10 | 
 11 | namespace sqlcheck {
 12 | 
 13 | Configuration state;
 14 | 
 15 | }  // namespace sqlcheck
 16 | 
 17 | DEFINE_bool(c, false, "Display warnings in color mode");
 18 | DEFINE_bool(color_mode, false, "Display warnings in color mode");
 19 | DEFINE_bool(v, false, "Display verbose warnings");
 20 | DEFINE_bool(verbose, false, "Display verbose warnings");
 21 | DEFINE_string(d, "", "Query delimiter string (default -- ;)");
 22 | DEFINE_string(delimiter, "", "Query delimiter string (default -- ;)");
 23 | DEFINE_bool(h, false, "Print help message");
 24 | DEFINE_uint64(r, sqlcheck::RISK_LEVEL_ALL,
 25 |               "Set of anti-patterns to check \n"
 26 |               "1 (all anti-patterns, default) \n"
 27 |               "2 (only medium and high risk anti-patterns) \n"
 28 |               "3 (only high risk anti-patterns) \n");
 29 | DEFINE_uint64(risk_level, sqlcheck::RISK_LEVEL_ALL,
 30 |               "Set of anti-patterns to check \n"
 31 |               "1 (all anti-patterns, default) \n"
 32 |               "2 (only medium and high risk anti-patterns) \n"
 33 |               "3 (only high risk anti-patterns) \n");
 34 | DEFINE_string(f, "", "SQL file name"); // standard input
 35 | DEFINE_string(file_name, "", "SQL file name"); // standard input
 36 | 
 37 | void ConfigureChecker(sqlcheck::Configuration &state) {
 38 | 
 39 |   // Default Values
 40 |   state.risk_level = sqlcheck::RISK_LEVEL_ALL;
 41 |   state.file_name = "";
 42 |   state.delimiter = ";";
 43 |   state.testing_mode = false;
 44 |   state.verbose = false;
 45 |   state.color_mode = false;
 46 |   state.line_number = 1;
 47 | 
 48 |   // Configure checker
 49 |   state.color_mode = FLAGS_c || FLAGS_color_mode;
 50 |   state.verbose = FLAGS_v || FLAGS_verbose;
 51 |   if(FLAGS_f.empty() == false){
 52 |     state.file_name = FLAGS_f;
 53 |   }
 54 |   if(FLAGS_file_name.empty() == false){
 55 |     state.file_name = FLAGS_file_name;
 56 |   }
 57 |   if(FLAGS_d.empty() == false){
 58 |     state.delimiter = FLAGS_f;
 59 |   }
 60 |   if(FLAGS_delimiter.empty() == false){
 61 |     state.delimiter = FLAGS_delimiter;
 62 |   }
 63 |   if(FLAGS_r != 0){
 64 |     state.risk_level = (sqlcheck::RiskLevel) FLAGS_r;
 65 |   }
 66 |   if(FLAGS_risk_level != 0){
 67 |     state.risk_level = (sqlcheck::RiskLevel) FLAGS_risk_level;
 68 |   }
 69 | 
 70 |   // Run validators
 71 |   std::cout << "+-------------------------------------------------+\n"
 72 |             << "|                   SQLCHECK                      |\n"
 73 |             << "+-------------------------------------------------+\n";
 74 | 
 75 |   ValidateRiskLevel(state);
 76 |   ValidateFileName(state);
 77 |   ValidateColorMode(state);
 78 |   ValidateVerbose(state);
 79 |   ValidateDelimiter(state);
 80 | 
 81 |   std::cout << "-------------------------------------------------\n";
 82 | 
 83 | }
 84 | 
 85 | void Usage() {
 86 |   std::cout <<
 87 |       "Command line options : sqlcheck <options>\n"
 88 |       "   -f -file_name          :  SQL file name\n"
 89 |       "   -r -risk_level         :  Set of anti-patterns to check\n"
 90 |       "                          :  1 (all anti-patterns, default) \n"
 91 |       "                          :  2 (only medium and high risk anti-patterns) \n"
 92 |       "                          :  3 (only high risk anti-patterns) \n"
 93 |       "   -c -color_mode         :  Display warnings in color mode \n"
 94 |       "   -v -verbose            :  Display verbose warnings \n"
 95 |       "   -d -delimiter          :  Query delimiter string (; by default) \n"
 96 |       "   -h -help               :  Print help message \n";
 97 | }
 98 | 
 99 | int main(int argc, char **argv) {
100 | 
101 |   bool has_issues = false;
102 | 
103 |   try {
104 | 
105 |     // Parse the input arguments from the user
106 |     gflags::SetUsageMessage("");
107 |     gflags::SetVersionString("1.2.1");
108 | 
109 |     gflags::ParseCommandLineFlags(&argc, &argv, true);
110 | 
111 |     // Print help message
112 |     if(FLAGS_h == true){
113 |       FLAGS_h = false;
114 |       Usage();
115 | 
116 |       gflags::ShutDownCommandLineFlags();
117 |       return (EXIT_SUCCESS);
118 |     }
119 | 
120 |     // Customize the checker configuration
121 |     ConfigureChecker(sqlcheck::state);
122 | 
123 |     // Invoke the checker
124 |     has_issues = sqlcheck::Check(sqlcheck::state);
125 | 
126 |   }
127 |   // Catching at the top level ensures that
128 |   // destructors are always called
129 |   catch (std::exception& exc) {
130 |     std::cerr << exc.what() << std::endl;
131 |     gflags::ShutDownCommandLineFlags();
132 |     exit(EXIT_FAILURE);
133 |   }
134 | 
135 |   gflags::ShutDownCommandLineFlags();
136 | 
137 |   (has_issues) ? exit(EXIT_FAILURE) : exit(EXIT_SUCCESS);
138 | }
139 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ##################################################################################
 2 | 
 3 | ## TESTS
 4 | 
 5 | # Make sure the compiler can find include files for our sqlcheck library
 6 | include_directories (${CMAKE_SOURCE_DIR}/src/include)
 7 | 
 8 | # ---[ TEST SUITE
 9 | add_executable(test_suite test_suite.cpp)
10 | target_link_libraries(test_suite sqlcheck_library 
11 | ${GTEST_BOTH_LIBRARIES} 
12 | googletest
13 | ${GLOG_LIBRARIES} 
14 | ${CMAKE_THREAD_LIBS_INIT}
15 | )
16 | add_test(NAME TestSuite COMMAND test_suite)
17 | 
18 | # --[ Add "make check" target
19 | 
20 | set(CTEST_FLAGS "")
21 | add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} ${CTEST_FLAGS} --verbose)
22 | 
23 | 


--------------------------------------------------------------------------------
/test/test_suite.cpp:
--------------------------------------------------------------------------------
  1 | // TEST SUITE
  2 | 
  3 | #include <sstream>
  4 | 
  5 | #include "checker.h"
  6 | 
  7 | #include <gtest/gtest.h>
  8 | 
  9 | namespace sqlcheck {
 10 | 
 11 | TEST(TestSuite, SelectStarTest) {
 12 | 
 13 |   Configuration default_conf;
 14 |   default_conf.testing_mode = true;
 15 |   default_conf.verbose = false;
 16 | 
 17 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
 18 |   stream->str(
 19 |       "SELECT    *   FROM FOO;\n"
 20 |       "\n"
 21 | 
 22 |       "SELECT *\n"
 23 |       "FROM BAR;\n"
 24 |   );
 25 | 
 26 |   default_conf.test_stream.reset(stream.release());
 27 | 
 28 |   Check(default_conf);
 29 |   
 30 | }
 31 | 
 32 | TEST(TestSuite, MultiValuedAttributeTest) {
 33 | 
 34 |   Configuration default_conf;
 35 |   default_conf.testing_mode = true;
 36 |   default_conf.verbose = false;
 37 | 
 38 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
 39 |   stream->str(
 40 |       "SELECT product_id FROM Products WHERE account_id REGEXP '[[:<:]]12[[:>:]]';\n"
 41 | 
 42 |       "CREATE TABLE Products ("
 43 |       "product_id   SERIAL PRIMARY KEY,"
 44 |       "product_name VARCHAR(1000),"
 45 |       "account_id   VARCHAR(100));\n"
 46 | 
 47 |   );
 48 | 
 49 |   default_conf.test_stream.reset(stream.release());
 50 | 
 51 |   Check(default_conf);
 52 | 
 53 | }
 54 | 
 55 | TEST(TestSuite, RecursiveDependencyTest) {
 56 | 
 57 |   Configuration default_conf;
 58 |   default_conf.testing_mode = true;
 59 |   default_conf.verbose = false;
 60 | 
 61 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
 62 |   stream->str(
 63 | 
 64 |       "CREATE TABLE Comments ("
 65 |       "comment_id   SERIAL PRIMARY KEY,"
 66 |       "parent_id    BIGINT UNSIGNED,"
 67 |       "FOREIGN KEY (parent_id) REFERENCES Comments(comment_id));\n"
 68 | 
 69 |   );
 70 | 
 71 |   default_conf.test_stream.reset(stream.release());
 72 | 
 73 |   Check(default_conf);
 74 | 
 75 | }
 76 | 
 77 | TEST(TestSuite, PrimaryKeyExistsTest) {
 78 | 
 79 |   Configuration default_conf;
 80 |   default_conf.testing_mode = true;
 81 |   default_conf.verbose = false;
 82 | 
 83 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
 84 |   stream->str(
 85 | 
 86 |       "CREATE TABLE ArticleTags ("
 87 |       "article_id  BIGINT UNSIGNED NOT NULL,"
 88 |       "tag_id      BIGINT UNSIGNED NOT NULL,);\n"
 89 | 
 90 |       "CREATE TABLE BugsProducts ("
 91 |       "id SERIAL PRIMARY KEY,"
 92 |       "bug_id      BIGINT UNSIGNED NOT NULL,"
 93 |       "product_id  BIGINT UNSIGNED NOT NULL,"
 94 |       "UNIQUE KEY (bug_id, product_id),"
 95 |       "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id),"
 96 |       "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n"
 97 | 
 98 |       "CREATE TABLE BugsProducts ("
 99 |       "bug_id      BIGINT UNSIGNED NOT NULL,"
100 |       "id SERIAL PRIMARY KEY,"
101 |       "product_id  BIGINT UNSIGNED NOT NULL,"
102 |       "UNIQUE KEY (bug_id, product_id),"
103 |       "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id),"
104 |       "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n"
105 |   );
106 | 
107 |   default_conf.test_stream.reset(stream.release());
108 | 
109 |   Check(default_conf);
110 | 
111 | }
112 | 
113 | TEST(TestSuite, GenericPrimaryKeyTest) {
114 | 
115 |   Configuration default_conf;
116 |   default_conf.testing_mode = true;
117 |   default_conf.verbose = false;
118 | 
119 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
120 |   stream->str(
121 | 
122 |       "CREATE TABLE BugsProducts ("
123 |       "id SERIAL PRIMARY KEY,"
124 |       "bug_id      BIGINT UNSIGNED NOT NULL,"
125 |       "product_id  BIGINT UNSIGNED NOT NULL,"
126 |       "UNIQUE KEY (bug_id, product_id),"
127 |       "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id),"
128 |       "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n"
129 | 
130 |       "CREATE TABLE BugsProducts ("
131 |       "bug_id      BIGINT UNSIGNED NOT NULL,"
132 |       "id SERIAL PRIMARY KEY,"
133 |       "product_id  BIGINT UNSIGNED NOT NULL,"
134 |       "UNIQUE KEY (bug_id, product_id),"
135 |       "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id),"
136 |       "FOREIGN KEY (product_id) REFERENCES Products(product_id));\n"
137 |   );
138 | 
139 |   default_conf.test_stream.reset(stream.release());
140 | 
141 |   Check(default_conf);
142 | 
143 | }
144 | 
145 | TEST(TestSuite, ForeignKeyExistsTest) {
146 | 
147 |   Configuration default_conf;
148 |   default_conf.testing_mode = true;
149 |   default_conf.verbose = false;
150 | 
151 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
152 |   stream->str(
153 | 
154 |       "CREATE TABLE BugsProducts ("
155 |       "id SERIAL PRIMARY KEY,"
156 |       "bug_id      BIGINT UNSIGNED NOT NULL,"
157 |       "product_id  BIGINT UNSIGNED NOT NULL,"
158 |       "UNIQUE KEY (bug_id, product_id));\n"
159 | 
160 |   );
161 | 
162 |   default_conf.test_stream.reset(stream.release());
163 | 
164 |   Check(default_conf);
165 | 
166 | }
167 | 
168 | TEST(TestSuite, VariableAttributeTest) {
169 | 
170 |   Configuration default_conf;
171 |   default_conf.testing_mode = true;
172 |   default_conf.verbose = false;
173 | 
174 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
175 |   stream->str(
176 | 
177 |       "CREATE TABLE IssueAttributes ("
178 |       "issue_id    BIGINT UNSIGNED NOT NULL,"
179 |       "attr_name   VARCHAR(100) NOT NULL,"
180 |       "attr_value  VARCHAR(100),"
181 |       "PRIMARY KEY (issue_id, attr_name),"
182 |       "FOREIGN KEY (issue_id) REFERENCES Issues(issue_id)"
183 |       ");"
184 | 
185 |   );
186 | 
187 |   default_conf.test_stream.reset(stream.release());
188 | 
189 |   Check(default_conf);
190 | 
191 | }
192 | 
193 | TEST(TestSuite, MultiColumnAttributeTest) {
194 | 
195 |   Configuration default_conf;
196 |   default_conf.testing_mode = true;
197 |   default_conf.verbose = false;
198 | 
199 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
200 |   stream->str(
201 | 
202 |       "CREATE TABLE Bugs ("
203 |       "bug_id      SERIAL PRIMARY KEY,"
204 |       "description VARCHAR(1000),"
205 |       "tag1        VARCHAR(20),"
206 |       "tag2        VARCHAR(20),"
207 |       "tag3        VARCHAR(20)"
208 |       ");"
209 | 
210 |   );
211 | 
212 |   default_conf.test_stream.reset(stream.release());
213 | 
214 |   Check(default_conf);
215 | 
216 | }
217 | 
218 | TEST(TestSuite, MetadataTribblesTest) {
219 | 
220 |   Configuration default_conf;
221 |   default_conf.testing_mode = true;
222 |   default_conf.verbose = false;
223 | 
224 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
225 |   stream->str(
226 | 
227 |       "CREATE TABLE ProjectHistory (\n" // 1
228 |       "bugs_fixed_2008  INT,\n"         // 2
229 |       "bugs_fixed_2009  INT,\n"         // 3
230 |       "bugs_fixed_2010  INT,\n"         // 4
231 |       "2010  INT"                       // 5
232 |       ");\n"                            // 5
233 | 
234 |       "CREATE TABLE Bugs_2008 ( . . . );\n" // 6
235 | 
236 |       "ALTER TABLE Customers ADD (revenue2002 NUMBER(9,2));\n" // 7
237 | 
238 |       "CREATE TABLE Bugs_2009 (-- other columns" // 8
239 |       "date_reported DATE CHECK (EXTRACT(YEAR FROM date_reported) = 2009));\n" // 8
240 | 
241 |   );
242 | 
243 |   default_conf.test_stream.reset(stream.release());
244 | 
245 |   Check(default_conf);
246 | 
247 | }
248 | 
249 | TEST(TestSuite, FloatTest) {
250 | 
251 |   Configuration default_conf;
252 |   default_conf.testing_mode = true;
253 |   default_conf.verbose = false;
254 | 
255 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
256 |   stream->str(
257 | 
258 |       "ALTER TABLE Bugs ADD COLUMN hours FLOAT;\n"
259 | 
260 |       "SELECT * FROM Accounts WHERE ABS(hourly_rate - 59.95) < 0.0000001;\n"
261 | 
262 |       "ALTER TABLE Bugs ADD COLUMN hours NUMERIC(9,2);\n"
263 | 
264 |   );
265 | 
266 |   default_conf.test_stream.reset(stream.release());
267 | 
268 |   Check(default_conf);
269 | 
270 | }
271 | 
272 | void PrintMatches(const std::string& in, const std::string& re){
273 | 
274 |   std::smatch m;
275 |   std::regex_search(in, m, std::regex(re));
276 | 
277 |   if(m.empty()) {
278 |     std::cout << "input=[" << in << "], regex=[" << re << "]: NO MATCH\n";
279 |   } else {
280 |     std::cout << "input=[" << in << "], regex=[" << re << "]: ";
281 |     std::cout << "prefix=[" << m.prefix() << "] ";
282 |     for(std::size_t n = 0; n < m.size(); ++n)
283 |       std::cout << " m[" << n << "]=[" << m[n] << "] ";
284 |     std::cout << "suffix=[" << m.suffix() << "]\n";
285 |   }
286 | 
287 | }
288 | 
289 | TEST(TestSuite, RegexTests) {
290 | 
291 |   // greedy match, repeats [a-z] 4 times
292 |   PrintMatches("abcdefghi", "a[a-z]{2,4}");
293 | 
294 |   // non-greedy match, repeats [a-z] 2 times
295 |   PrintMatches("abcdefghi", "a[a-z]{2,4}?");
296 | 
297 |   // Choice point ordering for quantifiers results in a match
298 |   // with two repetitions, first matching the substring "aa",
299 |   // second matching the substring "ba", leaving "ac" not matched
300 |   // ("ba" appears in the capture clause m[1])
301 |   PrintMatches("aabaac", "(aa|aabaac|ba|b|c)*");
302 | 
303 |   // Choice point ordering for quantifiers makes this regex
304 |   // calculate the greatest common divisor between 10 and 15
305 |   // (the answer is 5, and it populates m[1] with "aaaaa")
306 |   PrintMatches("aaaaaaaaaa,aaaaaaaaaaaaaaa", "^(a+)\\1*,\\1+$");
307 | 
308 |   // the substring "bbb" does not appear in the capture clause m[4]
309 |   // because it is cleared when the second repetition of the atom
310 |   // (a+)?(b+)?(c) is matching the substring "ac"
311 |   PrintMatches("zaacbbbcac", "(z)((a+)?(b+)?(c))*");
312 | 
313 | }
314 | 
315 | TEST(TestSuite, MetadataRegexTests) {
316 | 
317 |   std::string regex = "[A-za-z\\-_@]+[0-9]+ ";
318 | 
319 |   // metadata tribbles
320 |   PrintMatches("bugs_2004", regex);
321 |   PrintMatches("bugs8_foo", regex);
322 | 
323 |   // metadata tribbles
324 |   PrintMatches("bugs_2004", regex);
325 |   PrintMatches("CREATE TABLE Bugs_2008 ( . . . )", regex);
326 |   PrintMatches("CREATE TABLE Bugs_200843214 ( . . . )", regex);
327 |   PrintMatches("CREATE TABLE tag8 ( . . . )", regex);
328 | 
329 | }
330 | 
331 | TEST(TestSuite, PhysicalDesignTests) {
332 | 
333 |   Configuration default_conf;
334 |   default_conf.testing_mode = true;
335 |   default_conf.verbose = false;
336 | 
337 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
338 |   stream->str(
339 | 
340 |       "CREATE TABLE Bugs ("
341 |       "-- other columns"
342 |       "status ENUM('NEW', 'IN PROGRESS', 'FIXED'), );\n"
343 | 
344 |       "CREATE TABLE Bugs ("
345 |       "-- other columns"
346 |       "status VARCHAR(20) CHECK (status IN ('NEW', 'IN PROGRESS', 'FIXED')) );\n"
347 | 
348 |       "ALTER TABLE Bugs MODIFY COLUMN status ENUM('NEW', 'IN PROGRESS', 'CODE COMPLETE', 'VERIFIED');\n"
349 | 
350 |       "CREATE TABLE Screenshots ("
351 |       "bug_id            BIGINT UNSIGNED NOT NULL,"
352 |       "image_id          BIGINT UNSIGNED NOT NULL,"
353 |       "screenshot_path   VARCHAR(100),"
354 |       "caption           VARCHAR(100),"
355 |       "PRIMARY KEY       (bug_id, image_id),"
356 |       "FOREIGN KEY (bug_id) REFERENCES Bugs(bug_id)"
357 |       ");\n"
358 | 
359 |       "unlink('images/screenshot1234-1.jpg' );\n"
360 | 
361 |       "CREATE TABLE Bugs ("
362 |       "bug_id SERIAL PRIMARY KEY,"
363 |       "date_reported DATE NOT NULL,"
364 |       "INDEX (bug_id),"
365 |       "INDEX (summary),"
366 |       "INDEX (hours),"
367 |       "INDEX (bug_id, date_reported, status)"
368 |       ");\n"
369 | 
370 |       "CREATE INDEX TelephoneBook ON Accounts(last_name, first_name);\n"
371 | 
372 |   );
373 | 
374 |   default_conf.test_stream.reset(stream.release());
375 | 
376 |   Check(default_conf);
377 | 
378 | }
379 | 
380 | TEST(TestSuite, EnumFalsePositive) {
381 |   Configuration default_conf;
382 |   default_conf.testing_mode = true;
383 |   default_conf.verbose = false;
384 | 
385 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
386 |   stream->str(
387 |       "CREATE TABLE Bugsin ("
388 |       "bug_id SERIAL PRIMARY KEY,"
389 |       "phonenum varchar(20),"
390 |       "date_reported DATE NOT NULL"
391 |       ");\n"
392 | 
393 |      "CREATE TABLE Screenshots ("
394 |       "bug_id   BIGINT UNSIGNED NOT NULL REFERENCES Bugsin (bug_id),"
395 |       "image_id          BIGINT UNSIGNED NOT NULL,"
396 |       "screenshot_path   VARCHAR(100),"
397 |       "caption           VARCHAR(100),"
398 |       "PRIMARY KEY       (bug_id, image_id),"
399 |       "FOREIGN KEY (bug_id) REFERENCES Bugsin(bug_id)"
400 |       ");\n"
401 |   );
402 |   default_conf.test_stream.reset(stream.release());
403 |   Check(default_conf);
404 | }
405 | 
406 | TEST(TestSuite, QueryTests) {
407 | 
408 |   Configuration default_conf;
409 |   default_conf.testing_mode = true;
410 |   default_conf.verbose = false;
411 | 
412 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
413 |   stream->str(
414 | 
415 |       "SELECT * FROM Bugs WHERE assigned_to IS NULL OR assigned_to <> 1;\n"
416 | 
417 |       "SELECT first_name || ' ' || last_name AS full_name FROM Accounts;\n"
418 | 
419 |       "SELECT product_id, MAX(date_reported) AS latest,"
420 |       "MIN(date_reported) AS earliest, bug_id"
421 |       "FROM Bugs JOIN BugsProducts USING (bug_id)"
422 |       "GROUP BY product_id;\n"
423 | 
424 |       "SELECT product_id, MAX(date_reported) AS latest"
425 |       "FROM Bugs JOIN BugsProducts USING (bug_id)"
426 |       "GROUP BY product_id;\n"
427 | 
428 |       "SELECT * FROM Bugs ORDER BY RAND() LIMIT 1;\n"
429 | 
430 |       "SELECT b1.*"
431 |       "FROM Bugs AS b1"
432 |       "JOIN (SELECT CEIL(RAND() * (SELECT MAX(bug_id) FROM Bugs)) AS rand_id) AS b2"
433 |       "ON (b1.bug_id = b2.rand_id);\n"
434 | 
435 |       "SELECT * FROM Bugs WHERE description LIKE '%crash%';\n"
436 | 
437 |       "SELECT * FROM Bugs WHERE description REGEXP 'crash';\n"
438 | 
439 |       "SELECT COUNT(bp.product_id) AS how_many_products,"
440 |       "COUNT(dev.account_id) AS how_many_developers,"
441 |       "COUNT(b.bug_id)/COUNT(dev.account_id) AS avg_bugs_per_developer,"
442 |       "COUNT(cust.account_id) AS how_many_customers"
443 |       "FROM Bugs b JOIN BugsProducts bp ON (b.bug_id = bp.bug_id)"
444 |       "JOIN Accounts dev ON (b.assigned_to = dev.account_id)"
445 |       "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'"
446 |       "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'"
447 |       "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'"
448 |       "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'"
449 |       "JOIN Accounts cust ON (b.reported_by = cust.account_id) WHERE cust.email NOT LIKE '%@example.com'"
450 |       "GROUP BY bp.product_id;"
451 | 
452 |       "SELECT AVG(bugs_per_developer) AS average_bugs_per_developer"
453 |       "FROM (SELECT dev.account_id, COUNT(*) AS bugs_per_developer"
454 |       "FROM Bugs b JOIN Accounts dev"
455 |       "ON (b.assigned_to = dev.account_id)"
456 |       "WHERE b.status = 'FIXED' GROUP BY dev.account_id) t;\n"
457 | 
458 |       "INSERT INTO Bugs VALUES (DEFAULT, CURDATE(), 'New bug', 'Test T987 fails...',"
459 |       "NULL, 123, NULL, NULL, DEFAULT, 'Medium', NULL);\n"
460 | 
461 |       "INSERT INTO Accounts (account_name, first_name, last_name, email,"
462 |       "password_hash, portrait_image, hourly_rate)"
463 |       "VALUES ('bkarwin', 'Bill', 'Karwin', 'bill@example.com', SHA2('xyzzy'), NULL, 49.95);\n"
464 | 
465 |       "CREATE TABLE Accounts ("
466 |       "account_id    SERIAL PRIMARY KEY,"
467 |       "account_name  VARCHAR(20) NOT NULL,"
468 |       "email         VARCHAR(100) NOT NULL,"
469 |       "password      VARCHAR(30) NOT NULL"
470 |       ");\n"
471 | 
472 |       "SELECT * FROM Accounts"
473 |       "WHERE account_name = 'bill' AND password = 'opensesame';\n"
474 | 
475 |       "SELECT s.cust_id,count(s.cust_id) FROM  SH.sales s GROUP BY s.cust_id\n"
476 |       "HAVING s.cust_id != '1660' AND s.cust_id != '2';\n"
477 | 
478 |       "SELECT * FROM SH.products p WHERE p.prod_id = (SELECT s.prod_id FROM SH.sales s"
479 |       "WHERE s.cust_id = 100996 AND s.quantity_sold = 1 );\n"
480 | 
481 |       "SELECT s.* FROM SH.sales s WHERE s.prod_id = 14 OR s.prod_id = 17;\n"
482 | 
483 |       "SELECT cust_id FROM SH.sales UNION SELECT cust_id FROM customers;\n"
484 | 
485 |       "SELECT DISTINCT c.country_id, c.country_name FROM SH.countries c JOIN SH.customers e"
486 |       "WHERE e.country_id = c.country_id;\n"
487 | 
488 |   );
489 | 
490 |   default_conf.test_stream.reset(stream.release());
491 | 
492 |   Check(default_conf);
493 | 
494 | }
495 | 
496 | TEST(TestSuite, JoinEqualityTest) {
497 |     Configuration default_conf;
498 |     default_conf.testing_mode = true;
499 |     default_conf.verbose = false;
500 | 
501 |     std::unique_ptr<std::istringstream> stream(new std::istringstream());
502 |     stream->str(
503 |             "SELECT baz.id\n"
504 |             "FROM foo\n"
505 |             "LEFT JOIN bar ON bar\n"
506 |             "JOIN baz ON baz = foo.baz;"
507 |     );
508 | 
509 |     default_conf.test_stream.reset(stream.release());
510 | 
511 |     Check(default_conf);
512 | 
513 | }
514 | 
515 | TEST(TestSuite, SingleColumnTable) {
516 |   Configuration default_conf;
517 |   default_conf.testing_mode = true;
518 |   default_conf.verbose = true;
519 | 
520 |   std::unique_ptr<std::istringstream> stream(new std::istringstream());
521 |   stream->str(
522 |     "CREATE TABLE sometable(singlecolumn int);"
523 |   );
524 | 
525 |   default_conf.test_stream.reset(stream.release());
526 | 
527 |   Check(default_conf);
528 | }
529 | 
530 | }  // End machine sqlcheck
531 | 


--------------------------------------------------------------------------------