├── .clang-format ├── .github └── workflows │ └── test.yml ├── .gitignore ├── LICENSE ├── Makefile ├── algebra.md ├── algebra ├── Expression.cpp ├── Expression.hpp ├── Operator.cpp └── Operator.hpp ├── examples ├── dialects │ └── sqlite.sane ├── features │ ├── foreigncall.sane │ ├── gensym.sane │ ├── isidentical.sane │ ├── table.sane │ └── window.sane ├── tpch-sqlite │ ├── q1.sane │ ├── q10.sane │ ├── q11.sane │ ├── q12.sane │ ├── q13.sane │ ├── q14.sane │ ├── q15.sane │ ├── q16.sane │ ├── q17.sane │ ├── q18.sane │ ├── q19.sane │ ├── q2.sane │ ├── q20.sane │ ├── q21.sane │ ├── q22.sane │ ├── q3.sane │ ├── q4.sane │ ├── q5.sane │ ├── q6.sane │ ├── q7.sane │ ├── q8.sane │ └── q9.sane └── tpch │ ├── q1.sane │ ├── q10.sane │ ├── q11.sane │ ├── q12.sane │ ├── q13.sane │ ├── q14.sane │ ├── q15.sane │ ├── q16.sane │ ├── q17.sane │ ├── q18.sane │ ├── q19.sane │ ├── q2.sane │ ├── q20.sane │ ├── q21.sane │ ├── q22.sane │ ├── q3.sane │ ├── q4.sane │ ├── q5.sane │ ├── q6.sane │ ├── q7.sane │ ├── q8.sane │ └── q9.sane ├── infra ├── Schema.cpp └── Schema.hpp ├── main.cpp ├── makeutil ├── astgen.cpp └── patchbison ├── parser ├── ASTBase.cpp ├── ASTBase.hpp ├── Keywords.hpp ├── SaneQLLexer.cpp ├── SaneQLLexer.hpp ├── SaneQLParser.hpp ├── astspec └── saneql.ypp ├── semana ├── Functions.cpp ├── Functions.hpp ├── SemanticAnalysis.cpp └── SemanticAnalysis.hpp └── sql ├── SQLWriter.cpp └── SQLWriter.hpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | AccessModifierOffset: 0 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveMacros: false 6 | AlignConsecutiveAssignments: false 7 | AlignConsecutiveBitFields: false 8 | AlignConsecutiveDeclarations: false 9 | AlignEscapedNewlines: Left 10 | AlignOperands: DontAlign 11 | AlignTrailingComments: false 12 | AllowAllArgumentsOnNextLine: true 13 | AllowAllConstructorInitializersOnNextLine: true 14 | AllowAllParametersOfDeclarationOnNextLine: true 15 | AllowShortEnumsOnASingleLine: true 16 | AllowShortBlocksOnASingleLine: Always 17 | AllowShortCaseLabelsOnASingleLine: true 18 | AllowShortFunctionsOnASingleLine: All 19 | AllowShortLambdasOnASingleLine: All 20 | AllowShortIfStatementsOnASingleLine: WithoutElse 21 | AllowShortLoopsOnASingleLine: true 22 | AlwaysBreakAfterDefinitionReturnType: None 23 | AlwaysBreakAfterReturnType: None 24 | AlwaysBreakBeforeMultilineStrings: false 25 | AlwaysBreakTemplateDeclarations: MultiLine 26 | BinPackArguments: true 27 | BinPackParameters: true 28 | BraceWrapping: 29 | AfterCaseLabel: false 30 | AfterClass: false 31 | AfterControlStatement: Never 32 | AfterEnum: false 33 | AfterFunction: false 34 | AfterNamespace: false 35 | AfterObjCDeclaration: false 36 | AfterStruct: false 37 | AfterUnion: false 38 | AfterExternBlock: false 39 | BeforeCatch: false 40 | BeforeElse: false 41 | BeforeLambdaBody: false 42 | BeforeWhile: false 43 | IndentBraces: false 44 | SplitEmptyFunction: true 45 | SplitEmptyRecord: true 46 | SplitEmptyNamespace: true 47 | BreakBeforeBinaryOperators: None 48 | BreakBeforeBraces: Attach 49 | BreakBeforeInheritanceComma: false 50 | BreakInheritanceList: BeforeColon 51 | BreakBeforeTernaryOperators: false 52 | BreakConstructorInitializersBeforeComma: false 53 | BreakConstructorInitializers: BeforeColon 54 | BreakAfterJavaFieldAnnotations: false 55 | BreakStringLiterals: true 56 | ColumnLimit: 0 57 | CommentPragmas: '(LCOV|unreachable)' 58 | CompactNamespaces: true 59 | ConstructorInitializerAllOnOneLineOrOnePerLine: false 60 | ConstructorInitializerIndentWidth: 3 61 | ContinuationIndentWidth: 3 62 | Cpp11BracedListStyle: true 63 | DeriveLineEnding: true 64 | DerivePointerAlignment: false 65 | DisableFormat: false 66 | ExperimentalAutoDetectBinPacking: true 67 | FixNamespaceComments: false 68 | ForEachMacros: 69 | - foreach 70 | - Q_FOREACH 71 | - BOOST_FOREACH 72 | IncludeBlocks: Preserve 73 | IncludeCategories: 74 | - Regex: '^"' 75 | Priority: 1 76 | SortPriority: 0 77 | - Regex: '^' 78 | Priority: 2 79 | SortPriority: 0 80 | - Regex: '^<.*\.h>' 81 | Priority: 4 82 | SortPriority: 0 83 | - Regex: '^<' 84 | Priority: 3 85 | SortPriority: 0 86 | - Regex: '.\*' 87 | Priority: 5 88 | SortPriority: 0 89 | IncludeIsMainRegex: '(Test)?$' 90 | IncludeIsMainSourceRegex: '' 91 | IndentCaseLabels: true 92 | IndentCaseBlocks: false 93 | IndentGotoLabels: true 94 | IndentPPDirectives: None 95 | IndentExternBlock: AfterExternBlock 96 | IndentWidth: 3 97 | IndentWrappedFunctionNames: false 98 | InsertTrailingCommas: None 99 | JavaScriptQuotes: Leave 100 | JavaScriptWrapImports: true 101 | KeepEmptyLinesAtTheStartOfBlocks: false 102 | MacroBlockBegin: PROXY_BEGIN 103 | MacroBlockEnd: PROXY_END 104 | MaxEmptyLinesToKeep: 1 105 | NamespaceIndentation: None 106 | ObjCBinPackProtocolList: Auto 107 | ObjCBlockIndentWidth: 2 108 | ObjCBreakBeforeNestedBlockParam: true 109 | ObjCSpaceAfterProperty: false 110 | ObjCSpaceBeforeProtocolList: true 111 | PenaltyBreakAssignment: 2 112 | PenaltyBreakBeforeFirstCallParameter: 50 113 | PenaltyBreakComment: 50 114 | PenaltyBreakFirstLessLess: 50 115 | PenaltyBreakString: 50 116 | PenaltyBreakTemplateDeclaration: 10 117 | PenaltyExcessCharacter: 1000000 118 | PenaltyReturnTypeOnItsOwnLine: 50 119 | PointerAlignment: Left 120 | ReflowComments: false 121 | SortIncludes: true 122 | SortUsingDeclarations: true 123 | SpaceAfterCStyleCast: true 124 | SpaceAfterLogicalNot: false 125 | SpaceAfterTemplateKeyword: true 126 | SpaceBeforeAssignmentOperators: true 127 | SpaceBeforeCpp11BracedList: false 128 | SpaceBeforeCtorInitializerColon: true 129 | SpaceBeforeInheritanceColon: true 130 | SpaceBeforeParens: ControlStatements 131 | SpaceBeforeRangeBasedForLoopColon: true 132 | SpaceInEmptyBlock: false 133 | SpaceInEmptyParentheses: false 134 | SpacesBeforeTrailingComments: 1 135 | SpacesInAngles: false 136 | SpacesInConditionalStatement: false 137 | SpacesInContainerLiterals: false 138 | SpacesInCStyleCastParentheses: false 139 | SpacesInParentheses: false 140 | SpacesInSquareBrackets: false 141 | SpaceBeforeSquareBrackets: false 142 | Standard: Latest 143 | StatementMacros: 144 | - Q_UNUSED 145 | - QT_REQUIRE_VERSION 146 | TabWidth: 8 147 | UseCRLF: false 148 | UseTab: Never 149 | WhitespaceSensitiveMacros: 150 | - STRINGIZE 151 | - PP_STRINGIZE 152 | - BOOST_PP_STRINGIZE 153 | ... 154 | 155 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | on: 3 | push: 4 | # run when these files changed 5 | paths: 6 | - "**.h" 7 | - "**.hpp" 8 | - "**.cpp" 9 | - "Makefile" 10 | - ".github/workflows/test.yml" 11 | 12 | jobs: 13 | test-query-compilation: 14 | runs-on: ubuntu-latest 15 | steps: 16 | 17 | - name: install dependencies 18 | run: | 19 | sudo apt-get install -y build-essential gcc g++ bison flex 20 | 21 | - name: checkout project 22 | uses: actions/checkout@v3 23 | 24 | - name: build saneql 25 | run: | 26 | make -j4 bin/saneql 27 | 28 | - name: compile saneql tpch queries 29 | run: | 30 | for query in $( seq 1 22 ); do 31 | bin/saneql examples/tpch/q$query.sane 32 | done 33 | 34 | - name: compile saneql feature examples 35 | run: | 36 | for example in $(ls examples/features); do 37 | bin/saneql examples/features/$example 38 | done 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Thomas Neumann 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PREFIX:=bin/ 2 | 3 | all: $(PREFIX)saneql 4 | 5 | src:=parser/ASTBase.cpp parser/SaneQLLexer.cpp infra/Schema.cpp semana/Functions.cpp semana/SemanticAnalysis.cpp algebra/Expression.cpp algebra/Operator.cpp sql/SQLWriter.cpp main.cpp 6 | gensrc:=$(PREFIX)parser/saneql_parser.cpp 7 | obj:=$(addprefix $(PREFIX),$(src:.cpp=.o)) $(gensrc:.cpp=.o) 8 | 9 | CXXFLAGS:=-std=c++23 -I$(PREFIX) -I. -g -Wall -Wextra 10 | 11 | -include $(addprefix $(PREFIX),$(src:.cpp=.d)) $(gensrc:.cpp=.d) 12 | 13 | checkdir=@mkdir -p $(dir $@) 14 | ASTGEN:=$(PREFIX)astgen --astbaseheader parser/ASTBase.hpp --astbaseclass saneql::ASTBase --astheader parser/AST.hpp --namespace saneql::ast 15 | 16 | $(PREFIX)parser/AST.hpp: parser/astspec $(PREFIX)astgen 17 | $(checkdir) 18 | $(ASTGEN) astheader parser/astspec $@ 19 | 20 | $(PREFIX)parser/AST.cpp: parser/astspec $(PREFIX)astgen 21 | $(checkdir) 22 | $(ASTGEN) astbody parser/astspec $@ 23 | 24 | $(PREFIX)parser/AST.o: $(PREFIX)parser/AST.hpp 25 | $(PREFIX)parser/AST.d: $(PREFIX)parser/AST.cpp 26 | 27 | $(PREFIX)parser/saneql.expanded.ypp: parser/astspec parser/saneql.ypp $(PREFIX)astgen 28 | @mkdir -p $(dir $@) 29 | $(ASTGEN) expandgrammar parser/astspec parser/saneql.ypp $@ 30 | 31 | $(PREFIX)parser/saneql_parser.o: $(PREFIX)parser/AST.hpp 32 | $(PREFIX)parser/saneql_parser.cpp: $(PREFIX)parser/saneql.expanded.ypp 33 | makeutil/patchbison `which bison` $< $@ 34 | 35 | $(PREFIX)semana/SemanticAnalysis.o: $(PREFIX)parser/AST.hpp 36 | 37 | CXX?=g++ 38 | compilecpp=$(CXX) -c -o$@ $(strip $(CXXFLAGS) $(CXXFLAGS-$(dir $<)) $(CXXFLAGS-$<) $(IFLAGS) $(LLVM_IFLAGS)) -MMD -MP -MF $(@:.o=.d) $< 39 | 40 | $(PREFIX)%.o: %.cpp 41 | $(checkdir) 42 | $(compilecpp) 43 | 44 | $(PREFIX)%.o: $(PREFIX)%.cpp 45 | $(checkdir) 46 | $(compilecpp) 47 | 48 | $(PREFIX)saneql: $(obj) 49 | $(CXX) $(CXXFLAGS) -o$@ $^ 50 | 51 | $(PREFIX)astgen: $(PREFIX)makeutil/astgen.o 52 | $(CXX) $(CXXFLAGS) -o$@ $^ 53 | 54 | -------------------------------------------------------------------------------- /algebra.md: -------------------------------------------------------------------------------- 1 | Operations on tables 2 | ==================== 3 | 4 | as(binding symbol) 5 | e.g. rel.as(r1).join(rel.as(r2), r1.x=r2.y) 6 | 7 | filter(condition expression) 8 | e.g. rel.filter(x=1) 9 | 10 | join(other table, condition expression, type symbol := inner) 11 | e.g. r1.join(r2, x=y) 12 | 13 | map(expressions list expression) 14 | e.g. r1.map({y:=2*x}) 15 | 16 | project(expressions list expression) 17 | e.g. r1.project({x}) 18 | 19 | projectout(remove list symbol) 20 | e.g. r1.projectout({x}) 21 | 22 | groupby(groups list expression, aggregates list expression, type symbol := group, sets list list symbol := {}) 23 | e.g. r1.groupby({a,b},{total:=sum(c)}); 24 | 25 | orderby(expressions list expression) 26 | e.g. r1.orderby({x,y.desc()}) 27 | 28 | union(other table) 29 | unionall(other table) 30 | intersect(other table) 31 | intersectall(other table) 32 | except(other table) 33 | exceptall(other table) 34 | e.g. r1.union(r2) 35 | 36 | window(expressions list expression, partitionby list expression := {}, orderby list expression := {}, framebegin expression := unbounded(), frameend expression := currentrow(), frametype symbol := values) 37 | r1.window({cs:=sum(x)}, orderby:={y}) 38 | 39 | Types 40 | ===== 41 | 42 | - scalar types 43 | - table 44 | - tuple? 45 | - symbol 46 | - lambda 47 | - list of type 48 | 49 | 50 | -------------------------------------------------------------------------------- /algebra/Expression.cpp: -------------------------------------------------------------------------------- 1 | #include "algebra/Expression.hpp" 2 | #include "algebra/Operator.hpp" 3 | #include "sql/SQLWriter.hpp" 4 | #include 5 | #include 6 | //--------------------------------------------------------------------------- 7 | // (c) 2023 Thomas Neumann 8 | //--------------------------------------------------------------------------- 9 | using namespace std; 10 | //--------------------------------------------------------------------------- 11 | namespace saneql::algebra { 12 | //--------------------------------------------------------------------------- 13 | Expression::~Expression() 14 | // Destructor 15 | { 16 | } 17 | //--------------------------------------------------------------------------- 18 | void Expression::generateOperand(SQLWriter& out) 19 | // Generate SQL in a form that is suitable as operand 20 | { 21 | out.write("("); 22 | generate(out); 23 | out.write(")"); 24 | } 25 | //--------------------------------------------------------------------------- 26 | IURef::IURef(const IU* iu) 27 | : Expression(iu->getType()), iu(iu) 28 | // Constructor 29 | { 30 | } 31 | //--------------------------------------------------------------------------- 32 | void IURef::generate(SQLWriter& out) 33 | // Generate SQL 34 | { 35 | out.writeIU(iu); 36 | } 37 | //--------------------------------------------------------------------------- 38 | void ConstExpression::generate(SQLWriter& out) 39 | // Generate SQL 40 | { 41 | if (null) { 42 | out.write("NULL"); 43 | } else { 44 | auto type = getType(); 45 | if ((type.getType() != Type::Char) && (type.getType() != Type::Varchar) && (type.getType() != Type::Text)) { 46 | out.write("cast("); 47 | out.writeString(value); 48 | out.write(" as "); 49 | out.writeType(type); 50 | out.write(")"); 51 | } else { 52 | out.writeString(value); 53 | } 54 | } 55 | } 56 | //--------------------------------------------------------------------------- 57 | void CastExpression::generate(SQLWriter& out) 58 | // Generate SQL 59 | { 60 | out.write("cast("); 61 | input->generate(out); 62 | out.write(" as "); 63 | out.writeType(getType()); 64 | out.write(")"); 65 | } 66 | //--------------------------------------------------------------------------- 67 | ComparisonExpression::ComparisonExpression(unique_ptr left, unique_ptr right, Mode mode, Collate collate) 68 | : Expression(Type::getBool().withNullable((mode != Mode::Is) && (mode != Mode::IsNot) && (left->getType().isNullable() || right->getType().isNullable()))), left(move(left)), right(move(right)), mode(mode), collate(collate) 69 | // Constructor 70 | { 71 | } 72 | //--------------------------------------------------------------------------- 73 | void ComparisonExpression::generate(SQLWriter& out) 74 | // Generate SQL 75 | { 76 | left->generateOperand(out); 77 | switch (mode) { 78 | case Mode::Equal: out.write(" = "); break; 79 | case Mode::NotEqual: out.write(" <> "); break; 80 | case Mode::Is: out.write(" is not distinct from "); break; 81 | case Mode::IsNot: out.write(" is distinct from "); break; 82 | case Mode::Less: out.write(" < "); break; 83 | case Mode::LessOrEqual: out.write(" <= "); break; 84 | case Mode::Greater: out.write(" > "); break; 85 | case Mode::GreaterOrEqual: out.write(" >= "); break; 86 | case Mode::Like: out.write(" like "); break; 87 | } 88 | right->generateOperand(out); 89 | } 90 | //--------------------------------------------------------------------------- 91 | BetweenExpression::BetweenExpression(unique_ptr base, unique_ptr lower, unique_ptr upper, Collate collate) 92 | : Expression(Type::getBool().withNullable(base->getType().isNullable() || lower->getType().isNullable() || upper->getType().isNullable())), base(move(base)), lower(move(lower)), upper(move(upper)), collate(collate) 93 | // Constructor 94 | { 95 | } 96 | //--------------------------------------------------------------------------- 97 | void BetweenExpression::generate(SQLWriter& out) 98 | // Generate SQL 99 | { 100 | base->generateOperand(out); 101 | out.write(" between "); 102 | lower->generateOperand(out); 103 | out.write(" and "); 104 | upper->generateOperand(out); 105 | } 106 | //--------------------------------------------------------------------------- 107 | InExpression::InExpression(unique_ptr probe, vector> values, Collate collate) 108 | : Expression(Type::getBool().withNullable(probe->getType().isNullable() || any_of(values.begin(), values.end(), [](auto& e) { return e->getType().isNullable(); }))), probe(move(probe)), values(move(values)), collate(collate) 109 | // Constructor 110 | { 111 | } 112 | //--------------------------------------------------------------------------- 113 | void InExpression::generate(SQLWriter& out) 114 | // Generate SQL 115 | { 116 | probe->generateOperand(out); 117 | out.write(" in ("); 118 | bool first = true; 119 | for (auto& v : values) { 120 | if (first) 121 | first = false; 122 | else 123 | out.write(", "); 124 | v->generate(out); 125 | } 126 | out.write(")"); 127 | } 128 | //--------------------------------------------------------------------------- 129 | BinaryExpression::BinaryExpression(unique_ptr left, unique_ptr right, Type resultType, Operation op) 130 | : Expression(resultType), left(move(left)), right(move(right)), op(op) 131 | // Constructor 132 | { 133 | } 134 | //--------------------------------------------------------------------------- 135 | void BinaryExpression::generate(SQLWriter& out) 136 | // Generate SQL 137 | { 138 | left->generateOperand(out); 139 | switch (op) { 140 | case Operation::Plus: out.write(" + "); break; 141 | case Operation::Minus: out.write(" - "); break; 142 | case Operation::Mul: out.write(" * "); break; 143 | case Operation::Div: out.write(" / "); break; 144 | case Operation::Mod: out.write(" % "); break; 145 | case Operation::Power: out.write(" ^ "); break; 146 | case Operation::Concat: out.write(" || "); break; 147 | case Operation::And: out.write(" and "); break; 148 | case Operation::Or: out.write(" or "); break; 149 | } 150 | right->generateOperand(out); 151 | } 152 | //--------------------------------------------------------------------------- 153 | UnaryExpression::UnaryExpression(unique_ptr input, Type resultType, Operation op) 154 | : Expression(resultType), input(move(input)), op(op) 155 | // Constructor 156 | { 157 | } 158 | //--------------------------------------------------------------------------- 159 | void UnaryExpression::generate(SQLWriter& out) 160 | // Generate SQL 161 | { 162 | switch (op) { 163 | case Operation::Plus: out.write("+"); break; 164 | case Operation::Minus: out.write("-"); break; 165 | case Operation::Not: out.write(" not "); break; 166 | } 167 | input->generateOperand(out); 168 | } 169 | //--------------------------------------------------------------------------- 170 | ExtractExpression::ExtractExpression(unique_ptr input, Part part) 171 | : Expression(Type::getInteger().withNullable(input->getType().isNullable())), input(move(input)), part(part) 172 | // Constructor 173 | { 174 | } 175 | //--------------------------------------------------------------------------- 176 | void ExtractExpression::generate(SQLWriter& out) 177 | // Generate SQL 178 | { 179 | out.write("extract("); 180 | switch (part) { 181 | case Part::Year: out.write("year"); break; 182 | case Part::Month: out.write("month"); break; 183 | case Part::Day: out.write("day"); break; 184 | } 185 | out.write(" from "); 186 | input->generateOperand(out); 187 | out.write(")"); 188 | } 189 | //--------------------------------------------------------------------------- 190 | SubstrExpression::SubstrExpression(unique_ptr value, unique_ptr from, unique_ptr len) 191 | : Expression(value->getType().withNullable(value->getType().isNullable() || (from ? from->getType().isNullable() : false) || (len ? len->getType().isNullable() : false))), value(move(value)), from(move(from)), len(move(len)) 192 | // Constructor 193 | { 194 | } 195 | //--------------------------------------------------------------------------- 196 | void SubstrExpression::generate(SQLWriter& out) 197 | // Generate SQL 198 | { 199 | out.write("substring("); 200 | value->generate(out); 201 | if (from) { 202 | out.write(" from "); 203 | from->generate(out); 204 | } 205 | if (len) { 206 | out.write(" for "); 207 | len->generate(out); 208 | } 209 | out.write(")"); 210 | } 211 | //--------------------------------------------------------------------------- 212 | SimpleCaseExpression::SimpleCaseExpression(unique_ptr value, Cases cases, unique_ptr defaultValue) 213 | : Expression(defaultValue->getType()), value(move(value)), cases(move(cases)), defaultValue(move(defaultValue)) 214 | // Constructor 215 | { 216 | } 217 | //--------------------------------------------------------------------------- 218 | void SimpleCaseExpression::generate(SQLWriter& out) 219 | // Generate SQL 220 | { 221 | out.write("case "); 222 | value->generateOperand(out); 223 | for (auto& c : cases) { 224 | out.write(" when "); 225 | c.first->generate(out); 226 | out.write(" then "); 227 | c.second->generate(out); 228 | } 229 | out.write(" else "); 230 | defaultValue->generate(out); 231 | out.write(" end"); 232 | } 233 | //--------------------------------------------------------------------------- 234 | SearchedCaseExpression::SearchedCaseExpression(Cases cases, unique_ptr defaultValue) 235 | : Expression(defaultValue->getType()), cases(move(cases)), defaultValue(move(defaultValue)) 236 | // Constructor 237 | { 238 | } 239 | //--------------------------------------------------------------------------- 240 | void SearchedCaseExpression::generate(SQLWriter& out) 241 | // Generate SQL 242 | { 243 | out.write("case"); 244 | for (auto& c : cases) { 245 | out.write(" when "); 246 | c.first->generate(out); 247 | out.write(" then "); 248 | c.second->generate(out); 249 | } 250 | out.write(" else "); 251 | defaultValue->generate(out); 252 | out.write(" end"); 253 | } 254 | //--------------------------------------------------------------------------- 255 | Aggregate::Aggregate(unique_ptr input, vector aggregates, unique_ptr computation) 256 | : Expression(computation->getType()), input(move(input)), aggregates(move(aggregates)), computation(move(computation)) 257 | // Constructor 258 | { 259 | } 260 | //--------------------------------------------------------------------------- 261 | void Aggregate::generate(SQLWriter& out) 262 | // Generate SQL 263 | { 264 | out.write("(select "); 265 | computation->generate(out); 266 | if (!aggregates.empty()) { 267 | out.write(" from (select "); 268 | bool first = true; 269 | for (auto& a : aggregates) { 270 | if (first) 271 | first = false; 272 | else 273 | out.write(", "); 274 | switch (a.op) { 275 | case Op::CountStar: out.write("count(*)"); break; 276 | case Op::Count: out.write("count("); break; 277 | case Op::CountDistinct: out.write("count(distinct "); break; 278 | case Op::Sum: out.write("sum("); break; 279 | case Op::SumDistinct: out.write("sum(distinct "); break; 280 | case Op::Avg: out.write("avg("); break; 281 | case Op::AvgDistinct: out.write("avg(distinct "); break; 282 | case Op::Min: out.write("min("); break; 283 | case Op::Max: out.write("max("); break; 284 | } 285 | if (a.op != Op::CountStar) { 286 | a.value->generate(out); 287 | out.write(")"); 288 | } 289 | out.write(" as "); 290 | out.writeIU(a.iu.get()); 291 | } 292 | out.write(" from "); 293 | input->generate(out); 294 | out.write(" s"); 295 | out.write(") s"); 296 | } 297 | out.write(")"); 298 | } 299 | //--------------------------------------------------------------------------- 300 | ForeignCall::ForeignCall(string name, Type returnType, vector> arguments, CallType callType) 301 | : Expression(returnType), name(std::move(name)), arguments(std::move(arguments)), callType(callType) 302 | // Constructor 303 | { 304 | } 305 | //--------------------------------------------------------------------------- 306 | void ForeignCall::generate(SQLWriter& out) { 307 | switch (callType) { 308 | case CallType::Function: { 309 | out.write(name); 310 | out.write("("); 311 | bool first = true; 312 | for (auto& a : arguments) { 313 | if(!std::exchange(first, false)) out.write(", "); 314 | a->generate(out); 315 | } 316 | out.write(")"); 317 | break; 318 | } 319 | case CallType::LeftAssocOperator: { // ((a op b) op c) op d 320 | for (auto i = 0u; i != arguments.size() - 2; ++i) { 321 | out.write("("); 322 | } 323 | arguments[0]->generateOperand(out); 324 | for (auto i = 1u; i != arguments.size(); ++i) { 325 | out.write(" "); 326 | out.write(name); 327 | out.write(" "); 328 | arguments[i]->generateOperand(out); 329 | if (i != arguments.size() - 1) { 330 | out.write(")"); 331 | } 332 | } 333 | break; 334 | } 335 | case CallType::RightAssocOperator: { // a op (b op (c op d)) 336 | for (auto i = 0u; i != arguments.size(); ++i) { 337 | arguments[i]->generateOperand(out); 338 | if (i != arguments.size() - 1) { 339 | out.write(" "); 340 | out.write(name); 341 | out.write(" "); 342 | out.write("("); 343 | } 344 | } 345 | for (auto i = 0u; i != arguments.size() - 2; ++i) { 346 | out.write(")"); 347 | } 348 | break; 349 | } 350 | } 351 | } 352 | //--------------------------------------------------------------------------- 353 | } 354 | //--------------------------------------------------------------------------- 355 | -------------------------------------------------------------------------------- /algebra/Expression.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_Expression 2 | #define H_saneql_Expression 3 | //--------------------------------------------------------------------------- 4 | #include "infra/Schema.hpp" 5 | #include "semana/Functions.hpp" 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | // SaneQL 10 | // (c) 2023 Thomas Neumann 11 | // SPDX-License-Identifier: BSD-3-Clause 12 | //--------------------------------------------------------------------------- 13 | namespace saneql { 14 | //--------------------------------------------------------------------------- 15 | enum class Collate : uint8_t; 16 | class SQLWriter; 17 | //--------------------------------------------------------------------------- 18 | namespace algebra { 19 | //--------------------------------------------------------------------------- 20 | class IU; 21 | class Operator; 22 | //--------------------------------------------------------------------------- 23 | /// Base class for expressions 24 | class Expression { 25 | private: 26 | /// The type 27 | Type type; 28 | 29 | public: 30 | /// Constructor 31 | explicit Expression(Type type) : type(type) {} 32 | /// Destructor 33 | virtual ~Expression(); 34 | 35 | /// Get the result type 36 | Type getType() const { return type; } 37 | 38 | /// Generate SQL 39 | virtual void generate(SQLWriter& out) = 0; 40 | /// Generate SQL in a form that is suitable as operand 41 | virtual void generateOperand(SQLWriter& out); 42 | }; 43 | //--------------------------------------------------------------------------- 44 | /// An IU reference 45 | class IURef : public Expression { 46 | /// The IU 47 | const IU* iu; 48 | 49 | public: 50 | /// Constructor 51 | IURef(const IU* iu); 52 | 53 | /// Get the IU 54 | const IU* getIU() const { return iu; } 55 | 56 | /// Generate SQL 57 | void generate(SQLWriter& out) override; 58 | /// Generate SQL in a form that is suitable as operand 59 | void generateOperand(SQLWriter& out) override { generate(out); } 60 | }; 61 | //--------------------------------------------------------------------------- 62 | /// A constant value 63 | class ConstExpression : public Expression { 64 | /// The raw value 65 | std::string value; 66 | /// NULL? 67 | bool null; 68 | 69 | public: 70 | /// Constructor for non-null values 71 | ConstExpression(std::string value, Type type) : Expression(type), value(std::move(value)), null(false) {} 72 | /// Constructor for NULL values 73 | ConstExpression(std::nullptr_t, Type type) : Expression(type), null(true) {} 74 | 75 | /// Generate SQL 76 | void generate(SQLWriter& out) override; 77 | /// Generate SQL in a form that is suitable as operand 78 | void generateOperand(SQLWriter& out) override { generate(out); } 79 | }; 80 | //--------------------------------------------------------------------------- 81 | /// A cast expression 82 | class CastExpression : public Expression { 83 | /// The input 84 | std::unique_ptr input; 85 | 86 | public: 87 | /// Constructor 88 | CastExpression(std::unique_ptr input, Type type) : Expression(type), input(move(input)) {} 89 | 90 | /// Generate SQL 91 | void generate(SQLWriter& out) override; 92 | }; 93 | //--------------------------------------------------------------------------- 94 | /// A comparison expression 95 | class ComparisonExpression : public Expression { 96 | public: 97 | /// Possible modes 98 | enum Mode { 99 | Equal, 100 | NotEqual, 101 | Is, 102 | IsNot, 103 | Less, 104 | LessOrEqual, 105 | Greater, 106 | GreaterOrEqual, 107 | Like 108 | }; 109 | /// The input 110 | std::unique_ptr left, right; 111 | /// The mode 112 | Mode mode; 113 | /// The collation 114 | Collate collate; 115 | 116 | public: 117 | /// Constructor 118 | ComparisonExpression(std::unique_ptr left, std::unique_ptr right, Mode mode, Collate collate); 119 | 120 | /// Generate SQL 121 | void generate(SQLWriter& out) override; 122 | }; 123 | //--------------------------------------------------------------------------- 124 | /// A between expression 125 | class BetweenExpression : public Expression { 126 | public: 127 | /// The input 128 | std::unique_ptr base, lower, upper; 129 | /// The collation 130 | Collate collate; 131 | 132 | public: 133 | /// Constructor 134 | BetweenExpression(std::unique_ptr base, std::unique_ptr lower, std::unique_ptr upper, Collate collate); 135 | 136 | /// Generate SQL 137 | void generate(SQLWriter& out) override; 138 | }; 139 | //--------------------------------------------------------------------------- 140 | /// An in expression 141 | class InExpression : public Expression { 142 | public: 143 | /// The input 144 | std::unique_ptr probe; 145 | /// The values to check against 146 | std::vector> values; 147 | /// The collation 148 | Collate collate; 149 | 150 | public: 151 | /// Constructor 152 | InExpression(std::unique_ptr probe, std::vector> values, Collate collate); 153 | 154 | /// Generate SQL 155 | void generate(SQLWriter& out) override; 156 | }; 157 | //--------------------------------------------------------------------------- 158 | /// A binary expression 159 | class BinaryExpression : public Expression { 160 | public: 161 | /// Possible operations 162 | enum Operation { 163 | Plus, 164 | Minus, 165 | Mul, 166 | Div, 167 | Mod, 168 | Power, 169 | Concat, 170 | And, 171 | Or 172 | }; 173 | /// The input 174 | std::unique_ptr left, right; 175 | /// The mode 176 | Operation op; 177 | 178 | public: 179 | /// Constructor 180 | BinaryExpression(std::unique_ptr left, std::unique_ptr right, Type resultType, Operation op); 181 | 182 | /// Generate SQL 183 | void generate(SQLWriter& out) override; 184 | }; 185 | //--------------------------------------------------------------------------- 186 | /// An unary expression 187 | class UnaryExpression : public Expression { 188 | public: 189 | /// Possible operations 190 | enum Operation { 191 | Plus, 192 | Minus, 193 | Not 194 | }; 195 | /// The input 196 | std::unique_ptr input; 197 | /// The mode 198 | Operation op; 199 | 200 | public: 201 | /// Constructor 202 | UnaryExpression(std::unique_ptr input, Type resultType, Operation op); 203 | 204 | /// Generate SQL 205 | void generate(SQLWriter& out) override; 206 | }; 207 | //--------------------------------------------------------------------------- 208 | /// An extract expression 209 | class ExtractExpression : public Expression { 210 | public: 211 | /// Possible parts 212 | enum Part { 213 | Year, 214 | Month, 215 | Day 216 | }; 217 | /// The input 218 | std::unique_ptr input; 219 | /// The part 220 | Part part; 221 | 222 | public: 223 | /// Constructor 224 | ExtractExpression(std::unique_ptr input, Part part); 225 | 226 | /// Generate SQL 227 | void generate(SQLWriter& out) override; 228 | }; 229 | //--------------------------------------------------------------------------- 230 | /// A substring expression 231 | class SubstrExpression : public Expression { 232 | public: 233 | /// The input 234 | std::unique_ptr value, from, len; 235 | 236 | public: 237 | /// Constructor 238 | SubstrExpression(std::unique_ptr value, std::unique_ptr from, std::unique_ptr len); 239 | 240 | /// Generate SQL 241 | void generate(SQLWriter& out) override; 242 | }; 243 | //--------------------------------------------------------------------------- 244 | /// A simple case expression 245 | class SimpleCaseExpression : public Expression { 246 | public: 247 | using Cases = std::vector, std::unique_ptr>>; 248 | 249 | /// The value to search 250 | std::unique_ptr value; 251 | /// The cases 252 | Cases cases; 253 | /// The default result 254 | std::unique_ptr defaultValue; 255 | 256 | public: 257 | /// Constructor 258 | SimpleCaseExpression(std::unique_ptr value, Cases cases, std::unique_ptr defaultValue); 259 | 260 | /// Generate SQL 261 | void generate(SQLWriter& out) override; 262 | }; 263 | //--------------------------------------------------------------------------- 264 | /// A searched case expression 265 | class SearchedCaseExpression : public Expression { 266 | public: 267 | using Cases = SimpleCaseExpression::Cases; 268 | 269 | /// The cases 270 | Cases cases; 271 | /// The default result 272 | std::unique_ptr defaultValue; 273 | 274 | public: 275 | /// Constructor 276 | SearchedCaseExpression(Cases cases, std::unique_ptr defaultValue); 277 | 278 | /// Generate SQL 279 | void generate(SQLWriter& out) override; 280 | }; 281 | //--------------------------------------------------------------------------- 282 | /// Helper for aggregation steps 283 | struct AggregationLike { 284 | /// A regular computation 285 | struct Entry { 286 | /// The expression 287 | std::unique_ptr value; 288 | /// The result IU 289 | std::unique_ptr iu; 290 | }; 291 | /// Known aggregation functions 292 | enum class Op { 293 | CountStar, 294 | Count, 295 | CountDistinct, 296 | Sum, 297 | SumDistinct, 298 | Min, 299 | Max, 300 | Avg, 301 | AvgDistinct 302 | }; 303 | /// Known window functions 304 | enum class WindowOp { 305 | CountStar, 306 | Count, 307 | CountDistinct, 308 | Sum, 309 | SumDistinct, 310 | Min, 311 | Max, 312 | Avg, 313 | AvgDistinct, 314 | RowNumber, 315 | Rank, 316 | DenseRank, 317 | NTile, 318 | Lead, 319 | Lag, 320 | FirstValue, 321 | LastValue 322 | }; 323 | static_assert(static_cast(Op::AvgDistinct) == static_cast(WindowOp::AvgDistinct)); 324 | 325 | /// An aggregation 326 | struct Aggregation { 327 | /// The expression 328 | std::unique_ptr value; 329 | /// The result IU 330 | std::unique_ptr iu; 331 | /// The operation 332 | Op op; 333 | /// The parameters 334 | std::vector> parameters{}; 335 | }; 336 | }; 337 | //--------------------------------------------------------------------------- 338 | /// An aggregate expression 339 | class Aggregate : public Expression, public AggregationLike { 340 | private: 341 | /// The input 342 | std::unique_ptr input; 343 | /// The aggregates 344 | std::vector aggregates; 345 | /// The final result computation 346 | std::unique_ptr computation; 347 | 348 | public: 349 | /// Constructor 350 | Aggregate(std::unique_ptr input, std::vector aggregates, std::unique_ptr computation); 351 | 352 | // Generate SQL 353 | void generate(SQLWriter& out) override; 354 | }; 355 | //--------------------------------------------------------------------------- 356 | /// A foreign call expression 357 | struct ForeignCall : public Expression { 358 | // Type of the generated call 359 | enum class CallType { Function, LeftAssocOperator, RightAssocOperator }; 360 | static constexpr CallType defaultType() { return CallType::Function; } 361 | 362 | private: 363 | /// The name of the declared function 364 | std::string name; 365 | /// The function call arguments 366 | std::vector> arguments; 367 | /// The call type 368 | CallType callType; 369 | 370 | public: 371 | /// Constructor 372 | ForeignCall(std::string name, Type returnType, std::vector> arguments, CallType callType); 373 | 374 | /// Generate SQL 375 | void generate(SQLWriter& out) override; 376 | }; 377 | //--------------------------------------------------------------------------- 378 | } 379 | } 380 | //--------------------------------------------------------------------------- 381 | #endif 382 | -------------------------------------------------------------------------------- /algebra/Operator.cpp: -------------------------------------------------------------------------------- 1 | #include "algebra/Operator.hpp" 2 | #include "sql/SQLWriter.hpp" 3 | //--------------------------------------------------------------------------- 4 | // (c) 2023 Thomas Neumann 5 | //--------------------------------------------------------------------------- 6 | using namespace std; 7 | //--------------------------------------------------------------------------- 8 | namespace saneql::algebra { 9 | //--------------------------------------------------------------------------- 10 | Operator::~Operator() 11 | // Destructor 12 | { 13 | } 14 | //--------------------------------------------------------------------------- 15 | TableScan::TableScan(string name, vector columns) 16 | : name(move(name)), columns(move(columns)) 17 | // Constructor 18 | { 19 | } 20 | //--------------------------------------------------------------------------- 21 | void TableScan::generate(SQLWriter& out) 22 | // Generate SQL 23 | { 24 | out.write("(select "); 25 | bool first = true; 26 | for (auto& c : columns) { 27 | if (first) 28 | first = false; 29 | else 30 | out.write(", "); 31 | out.writeIdentifier(c.name); 32 | out.write(" as "); 33 | out.writeIU(c.iu.get()); 34 | } 35 | out.write(" from "); 36 | out.writeIdentifier(name); 37 | out.write(")"); 38 | } 39 | //--------------------------------------------------------------------------- 40 | Select::Select(unique_ptr input, unique_ptr condition) 41 | : input(move(input)), condition(move(condition)) 42 | // Constructor 43 | { 44 | } 45 | //--------------------------------------------------------------------------- 46 | void Select::generate(SQLWriter& out) 47 | // Generate SQL 48 | { 49 | out.write("(select * from "); 50 | input->generate(out); 51 | out.write(" s where "); 52 | condition->generate(out); 53 | out.write(")"); 54 | } 55 | //--------------------------------------------------------------------------- 56 | Map::Map(unique_ptr input, vector computations) 57 | : input(move(input)), computations(move(computations)) 58 | // Constructor 59 | { 60 | } 61 | //--------------------------------------------------------------------------- 62 | void Map::generate(SQLWriter& out) 63 | // Generate SQL 64 | { 65 | out.write("(select *"); 66 | for (auto& c : computations) { 67 | out.write(", "); 68 | c.value->generate(out); 69 | out.write(" as "); 70 | out.writeIU(c.iu.get()); 71 | } 72 | out.write(" from "); 73 | input->generate(out); 74 | out.write(" s)"); 75 | } 76 | //--------------------------------------------------------------------------- 77 | SetOperation::SetOperation(unique_ptr left, unique_ptr right, vector> leftColumns, vector> rightColumns, vector> resultColumns, Op op) 78 | : left(move(left)), right(move(right)), leftColumns(move(leftColumns)), rightColumns(move(rightColumns)), resultColumns(move(resultColumns)), op(op) 79 | // Constructor 80 | { 81 | } 82 | //--------------------------------------------------------------------------- 83 | void SetOperation::generate(SQLWriter& out) 84 | // Generate SQL 85 | { 86 | auto dumpColumns = [&out](const vector>& columns) { 87 | if (columns.empty()) { 88 | out.write("1"); 89 | } else { 90 | bool first = true; 91 | for (auto& c : columns) { 92 | if (first) 93 | first = false; 94 | else 95 | out.write(", "); 96 | c->generate(out); 97 | } 98 | } 99 | }; 100 | out.write("(select * from ((select "); 101 | dumpColumns(leftColumns); 102 | out.write(" from "); 103 | left->generate(out); 104 | out.write(" l) "); 105 | switch (op) { 106 | case Op::Union: out.write("union"); break; 107 | case Op::UnionAll: out.write("union all"); break; 108 | case Op::Except: out.write("except"); break; 109 | case Op::ExceptAll: out.write("except all"); break; 110 | case Op::Intersect: out.write("intersect"); break; 111 | case Op::IntersectAll: out.write("intersect all"); break; 112 | } 113 | out.write(" (select "); 114 | dumpColumns(rightColumns); 115 | out.write(" from "); 116 | right->generate(out); 117 | out.write(" r)) s"); 118 | if (!resultColumns.empty()) { 119 | out.write("("); 120 | bool first = true; 121 | for (auto& c : resultColumns) { 122 | if (first) 123 | first = false; 124 | else 125 | out.write(", "); 126 | out.writeIU(c.get()); 127 | } 128 | out.write(")"); 129 | } 130 | out.write(")"); 131 | } 132 | //--------------------------------------------------------------------------- 133 | Join::Join(unique_ptr left, unique_ptr right, unique_ptr condition, JoinType joinType) 134 | : left(move(left)), right(move(right)), condition(move(condition)), joinType(joinType) 135 | // Constructor 136 | { 137 | } 138 | //--------------------------------------------------------------------------- 139 | void Join::generate(SQLWriter& out) 140 | // Generate SQL 141 | { 142 | switch (joinType) { 143 | case JoinType::Inner: 144 | out.write("(select * from "); 145 | left->generate(out); 146 | out.write(" l inner join "); 147 | right->generate(out); 148 | out.write(" r on "); 149 | condition->generate(out); 150 | out.write(")"); 151 | break; 152 | case JoinType::LeftOuter: 153 | out.write("(select * from "); 154 | left->generate(out); 155 | out.write(" l left outer join "); 156 | right->generate(out); 157 | out.write(" r on "); 158 | condition->generate(out); 159 | out.write(")"); 160 | break; 161 | case JoinType::RightOuter: 162 | out.write("(select * from "); 163 | left->generate(out); 164 | out.write(" l right outer join "); 165 | right->generate(out); 166 | out.write(" r on "); 167 | condition->generate(out); 168 | out.write(")"); 169 | break; 170 | case JoinType::FullOuter: 171 | out.write("(select * from "); 172 | left->generate(out); 173 | out.write(" l full outer join "); 174 | right->generate(out); 175 | out.write(" r on "); 176 | condition->generate(out); 177 | out.write(")"); 178 | break; 179 | case JoinType::LeftSemi: 180 | out.write("(select * from "); 181 | left->generate(out); 182 | out.write(" l where exists(select * from "); 183 | right->generate(out); 184 | out.write(" r where "); 185 | condition->generate(out); 186 | out.write("))"); 187 | break; 188 | case JoinType::RightSemi: 189 | out.write("(select * from "); 190 | right->generate(out); 191 | out.write(" r where exists(select * from "); 192 | left->generate(out); 193 | out.write(" l where "); 194 | condition->generate(out); 195 | out.write("))"); 196 | break; 197 | case JoinType::LeftAnti: 198 | out.write("(select * from "); 199 | left->generate(out); 200 | out.write(" l where not exists(select * from "); 201 | right->generate(out); 202 | out.write(" r where "); 203 | condition->generate(out); 204 | out.write("))"); 205 | break; 206 | case JoinType::RightAnti: 207 | out.write("(select * from "); 208 | right->generate(out); 209 | out.write(" r where not exists(select * from "); 210 | left->generate(out); 211 | out.write(" l where "); 212 | condition->generate(out); 213 | out.write("))"); 214 | break; 215 | } 216 | } 217 | //--------------------------------------------------------------------------- 218 | GroupBy::GroupBy(unique_ptr input, vector groupBy, vector aggregates) 219 | : input(move(input)), groupBy(move(groupBy)), aggregates(move(aggregates)) 220 | // Constructor 221 | { 222 | } 223 | //--------------------------------------------------------------------------- 224 | void GroupBy::generate(SQLWriter& out) 225 | // Generate SQL 226 | { 227 | out.write("(select "); 228 | bool first = true; 229 | for (auto& g : groupBy) { 230 | if (first) 231 | first = false; 232 | else 233 | out.write(", "); 234 | g.value->generate(out); 235 | out.write(" as "); 236 | out.writeIU(g.iu.get()); 237 | } 238 | for (auto& a : aggregates) { 239 | if (first) 240 | first = false; 241 | else 242 | out.write(", "); 243 | switch (a.op) { 244 | case Op::CountStar: out.write("count(*)"); break; 245 | case Op::Count: out.write("count("); break; 246 | case Op::CountDistinct: out.write("count(distinct "); break; 247 | case Op::Sum: out.write("sum("); break; 248 | case Op::SumDistinct: out.write("sum(distinct "); break; 249 | case Op::Avg: out.write("avg("); break; 250 | case Op::AvgDistinct: out.write("avg(distinct "); break; 251 | case Op::Min: out.write("min("); break; 252 | case Op::Max: out.write("max("); break; 253 | } 254 | if (a.op != Op::CountStar) { 255 | a.value->generate(out); 256 | out.write(")"); 257 | } 258 | out.write(" as "); 259 | out.writeIU(a.iu.get()); 260 | } 261 | out.write(" from "); 262 | input->generate(out); 263 | out.write(" s group by "); 264 | if (groupBy.empty()) { 265 | out.write("true"); 266 | } else { 267 | for (unsigned index = 0, limit = groupBy.size(); index < limit; ++index) { 268 | if (index) out.write(", "); 269 | out.write(to_string(index + 1)); 270 | } 271 | } 272 | out.write(")"); 273 | } 274 | //--------------------------------------------------------------------------- 275 | Sort::Sort(unique_ptr input, vector order, optional limit, optional offset) 276 | : input(move(input)), order(move(order)), limit(limit), offset(offset) 277 | // Constructor 278 | { 279 | } 280 | //--------------------------------------------------------------------------- 281 | void Sort::generate(SQLWriter& out) 282 | // Generate SQL 283 | { 284 | out.write("(select * from "); 285 | input->generate(out); 286 | out.write(" s"); 287 | if (!order.empty()) { 288 | out.write(" order by "); 289 | bool first = true; 290 | for (auto& o : order) { 291 | if (first) 292 | first = false; 293 | else 294 | out.write(", "); 295 | o.value->generate(out); 296 | if (o.collate != Collate{}) out.write(" collate TODO"); // TODO 297 | if (o.descending) out.write(" desc"); 298 | } 299 | } 300 | if (limit.has_value()) { 301 | out.write(" limit "); 302 | out.write(to_string(*limit)); 303 | } 304 | if (offset.has_value()) { 305 | out.write(" offset "); 306 | out.write(to_string(*offset)); 307 | } 308 | out.write(")"); 309 | } 310 | //--------------------------------------------------------------------------- 311 | Window::Window(unique_ptr input, vector aggregates, vector> partitionBy, vector orderBy) 312 | : input(move(input)), aggregates(move(aggregates)), partitionBy(move(partitionBy)), orderBy(move(orderBy)) 313 | // Constructor 314 | { 315 | } 316 | //--------------------------------------------------------------------------- 317 | void Window::generate(SQLWriter& out) 318 | // Generate SQL 319 | { 320 | auto aggr = [&out](const char* name, const Aggregation& a, bool distinct = false) { 321 | out.write(name); 322 | out.write("("); 323 | if (distinct) out.write("distinct "); 324 | a.value->generate(out); 325 | for (auto& p : a.parameters) { 326 | out.write(", "); 327 | p->generate(out); 328 | } 329 | out.write(")"); 330 | }; 331 | out.write("(select *"); 332 | for (auto& a : aggregates) { 333 | out.write(", "); 334 | switch (static_cast(a.op)) { 335 | case Op::CountStar: out.write("count(*)"); break; 336 | case Op::Count: aggr("count", a); break; 337 | case Op::CountDistinct: aggr("count", a, true); break; 338 | case Op::Sum: aggr("sum", a); break; 339 | case Op::SumDistinct: aggr("sum", a, true); break; 340 | case Op::Avg: aggr("avg", a); break; 341 | case Op::AvgDistinct: aggr("avg", a, true); break; 342 | case Op::Min: aggr("min", a); break; 343 | case Op::Max: aggr("max", a); break; 344 | case Op::RowNumber: out.write("row_number()"); break; 345 | case Op::Rank: aggr("rank", a); break; 346 | case Op::DenseRank: aggr("dense_rank", a); break; 347 | case Op::NTile: aggr("ntile", a); break; 348 | case Op::Lead: aggr("lead", a); break; 349 | case Op::Lag: aggr("lag", a); break; 350 | case Op::FirstValue: aggr("first_value", a); break; 351 | case Op::LastValue: aggr("last_value", a); break; 352 | } 353 | out.write(" over ("); 354 | if (!partitionBy.empty()) { 355 | out.write("partition by "); 356 | bool first = true; 357 | for (auto& p : partitionBy) { 358 | if (first) 359 | first = false; 360 | else 361 | out.write(", "); 362 | p->generate(out); 363 | } 364 | } 365 | if (!orderBy.empty()) { 366 | if (!partitionBy.empty()) out.write(" "); 367 | out.write("order by "); 368 | bool first = true; 369 | for (auto& o : orderBy) { 370 | if (first) 371 | first = false; 372 | else 373 | out.write(", "); 374 | o.value->generate(out); 375 | if (o.collate != Collate{}) out.write(" collate TODO"); // TODO 376 | if (o.descending) out.write(" desc"); 377 | } 378 | } 379 | out.write(") as "); 380 | out.writeIU(a.iu.get()); 381 | } 382 | out.write(" from "); 383 | input->generate(out); 384 | out.write(" s)"); 385 | } 386 | //--------------------------------------------------------------------------- 387 | InlineTable::InlineTable(vector> columns, vector> values, unsigned rowCount) 388 | : columns(move(columns)), values(move(values)), rowCount(move(rowCount)) 389 | // Constructor 390 | { 391 | } 392 | //--------------------------------------------------------------------------- 393 | void InlineTable::generate(SQLWriter& out) 394 | // Generate SQL 395 | { 396 | out.write("(select * from (values"); 397 | if (rowCount) { 398 | for (unsigned index = 0; index != rowCount; ++index) { 399 | if (index) out.write(","); 400 | if (!columns.empty()) { 401 | out.write("("); 402 | for (unsigned index2 = 0, limit2 = columns.size(); index2 != limit2; ++index2) { 403 | if (index2) out.write(", "); 404 | values[index * limit2 + index2]->generate(out); 405 | } 406 | out.write(")"); 407 | } else { 408 | // PostgreSQL does not support empty tuples in values, add a dummy value 409 | out.write("(NULL)"); 410 | } 411 | } 412 | } else { 413 | if (!columns.empty()) { 414 | out.write("("); 415 | for (unsigned index2 = 0, limit2 = columns.size(); index2 != limit2; ++index2) { 416 | if (index2) out.write(", "); 417 | out.write("NULL"); 418 | } 419 | out.write(")"); 420 | } else { 421 | // PostgreSQL does not support empty tuples in values, add a dummy value 422 | out.write("(NULL)"); 423 | } 424 | } 425 | out.write(") s("); 426 | bool first = true; 427 | for (auto& c : columns) { 428 | if (first) 429 | first = false; 430 | else 431 | out.write(", "); 432 | out.writeIU(c.get()); 433 | } 434 | out.write(")"); 435 | if (!rowCount) out.write(" limit 0"); 436 | out.write(")"); 437 | } 438 | //--------------------------------------------------------------------------- 439 | } 440 | //--------------------------------------------------------------------------- 441 | -------------------------------------------------------------------------------- /algebra/Operator.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_Operator 2 | #define H_saneql_Operator 3 | //--------------------------------------------------------------------------- 4 | #include "algebra/Expression.hpp" 5 | #include "infra/Schema.hpp" 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | // SaneQL 10 | // (c) 2023 Thomas Neumann 11 | // SPDX-License-Identifier: BSD-3-Clause 12 | //--------------------------------------------------------------------------- 13 | namespace saneql { 14 | //--------------------------------------------------------------------------- 15 | class SQLWriter; 16 | //--------------------------------------------------------------------------- 17 | namespace algebra { 18 | //--------------------------------------------------------------------------- 19 | /// An information unit 20 | class IU { 21 | /// The type 22 | Type type; 23 | 24 | public: 25 | /// Constructor 26 | explicit IU(Type type) : type(type) {} 27 | 28 | /// Get the type 29 | const Type& getType() const { return type; } 30 | }; 31 | //--------------------------------------------------------------------------- 32 | /// Base class for operators 33 | class Operator { 34 | public: 35 | /// Destructor 36 | virtual ~Operator(); 37 | 38 | // Generate SQL 39 | virtual void generate(SQLWriter& out) = 0; 40 | }; 41 | //--------------------------------------------------------------------------- 42 | /// A table scan operator 43 | class TableScan : public Operator { 44 | public: 45 | /// A column entry 46 | struct Column { 47 | /// The name 48 | std::string name; 49 | /// The IU 50 | std::unique_ptr iu; 51 | }; 52 | 53 | private: 54 | /// The table name 55 | std::string name; 56 | /// The columns 57 | std::vector columns; 58 | 59 | public: 60 | /// Constructor 61 | TableScan(std::string name, std::vector columns); 62 | 63 | // Generate SQL 64 | void generate(SQLWriter& out) override; 65 | }; 66 | //--------------------------------------------------------------------------- 67 | /// A select operator 68 | class Select : public Operator { 69 | /// The input 70 | std::unique_ptr input; 71 | /// The filter condition 72 | std::unique_ptr condition; 73 | 74 | public: 75 | /// Constructor 76 | Select(std::unique_ptr input, std::unique_ptr condition); 77 | 78 | // Generate SQL 79 | void generate(SQLWriter& out) override; 80 | }; 81 | //--------------------------------------------------------------------------- 82 | /// A map operator 83 | class Map : public Operator { 84 | public: 85 | using Entry = AggregationLike::Entry; 86 | 87 | private: 88 | /// The input 89 | std::unique_ptr input; 90 | /// The computations 91 | std::vector computations; 92 | 93 | public: 94 | /// Constructor 95 | Map(std::unique_ptr input, std::vector computations); 96 | 97 | // Generate SQL 98 | void generate(SQLWriter& out) override; 99 | }; 100 | //--------------------------------------------------------------------------- 101 | /// A set operation operator 102 | class SetOperation : public Operator { 103 | public: 104 | /// Operation types 105 | enum class Op { 106 | Union, 107 | UnionAll, 108 | Except, 109 | ExceptAll, 110 | Intersect, 111 | IntersectAll 112 | }; 113 | 114 | private: 115 | /// The input 116 | std::unique_ptr left, right; 117 | /// The input columns 118 | std::vector> leftColumns, rightColumns; 119 | /// The result columns 120 | std::vector> resultColumns; 121 | /// The operation 122 | Op op; 123 | 124 | public: 125 | /// Constructor 126 | SetOperation(std::unique_ptr left, std::unique_ptr right, std::vector> leftColumns, std::vector> rightColumns, std::vector> resultColumns, Op op); 127 | 128 | // Generate SQL 129 | void generate(SQLWriter& out) override; 130 | }; 131 | //--------------------------------------------------------------------------- 132 | /// A join operator 133 | class Join : public Operator { 134 | public: 135 | /// Join types 136 | enum class JoinType { 137 | Inner, 138 | LeftOuter, 139 | RightOuter, 140 | FullOuter, 141 | LeftSemi, 142 | RightSemi, 143 | LeftAnti, 144 | RightAnti 145 | }; 146 | 147 | private: 148 | /// The input 149 | std::unique_ptr left, right; 150 | /// The join condition 151 | std::unique_ptr condition; 152 | /// The join type 153 | JoinType joinType; 154 | 155 | public: 156 | /// Constructor 157 | Join(std::unique_ptr left, std::unique_ptr right, std::unique_ptr condition, JoinType joinType); 158 | 159 | // Generate SQL 160 | void generate(SQLWriter& out) override; 161 | }; 162 | //--------------------------------------------------------------------------- 163 | /// A group by operator 164 | class GroupBy : public Operator, public AggregationLike { 165 | private: 166 | /// The input 167 | std::unique_ptr input; 168 | /// The group by expressions 169 | std::vector groupBy; 170 | /// The aggregates 171 | std::vector aggregates; 172 | 173 | public: 174 | /// Constructor 175 | GroupBy(std::unique_ptr input, std::vector groupBy, std::vector aggregates); 176 | 177 | // Generate SQL 178 | void generate(SQLWriter& out) override; 179 | }; 180 | //--------------------------------------------------------------------------- 181 | /// A sort operator 182 | class Sort : public Operator { 183 | public: 184 | struct Entry { 185 | /// The value to order by 186 | std::unique_ptr value; 187 | /// The collate 188 | Collate collate; 189 | /// Descending? 190 | bool descending; 191 | }; 192 | 193 | /// The input 194 | std::unique_ptr input; 195 | /// The order 196 | std::vector order; 197 | /// View 198 | std::optional limit, offset; 199 | 200 | public: 201 | /// Constructor 202 | Sort(std::unique_ptr input, std::vector order, std::optional limit, std::optional offset); 203 | 204 | // Generate SQL 205 | void generate(SQLWriter& out) override; 206 | }; 207 | //--------------------------------------------------------------------------- 208 | /// A window operator 209 | class Window : public Operator, public AggregationLike { 210 | public: 211 | using Op = WindowOp; 212 | 213 | private: 214 | /// The input 215 | std::unique_ptr input; 216 | /// The aggregates 217 | std::vector aggregates; 218 | /// The partition by expressions 219 | std::vector> partitionBy; 220 | /// The order by expression 221 | std::vector orderBy; 222 | 223 | public: 224 | /// Constructor 225 | Window(std::unique_ptr input, std::vector aggregates, std::vector> partitionBy, std::vector orderBy); 226 | 227 | // Generate SQL 228 | void generate(SQLWriter& out) override; 229 | }; 230 | //--------------------------------------------------------------------------- 231 | /// An inline table definition 232 | class InlineTable : public Operator { 233 | public: 234 | /// The columns 235 | std::vector> columns; 236 | /// The values 237 | std::vector> values; 238 | /// The row count 239 | unsigned rowCount; 240 | 241 | public: 242 | /// Constructor 243 | InlineTable(std::vector> columns, std::vector> values, unsigned rowCount); 244 | 245 | // Generate SQL 246 | void generate(SQLWriter& out) override; 247 | }; 248 | //--------------------------------------------------------------------------- 249 | } 250 | } 251 | //--------------------------------------------------------------------------- 252 | #endif 253 | -------------------------------------------------------------------------------- /examples/dialects/sqlite.sane: -------------------------------------------------------------------------------- 1 | let date(spec, modifier expression := '+0 seconds') := foreigncall('date', date, {spec, modifier}), 2 | let concat(string1, string2) := foreigncall('||', text, {string1, string2}), 3 | -------------------------------------------------------------------------------- /examples/features/foreigncall.sane: -------------------------------------------------------------------------------- 1 | -- outputs sqlite-compatible sql 2 | let date(spec, modifier expression := '+0 seconds') := foreigncall('date', date, {spec, modifier}), 3 | let concat(string1, string2, string3 := "") := foreigncall('||', text, {string1, string2, string3}, type := operator), 4 | orders 5 | .filter(o_orderdate < date('1995-03-15', '+10 days')) 6 | .map({txt := concat(o_orderstatus, ' comment: ', o_comment)}) 7 | .orderby({o_orderdate.desc()}, limit:=10) 8 | .project({o_orderkey, o_orderdate, txt}) 9 | -------------------------------------------------------------------------------- /examples/features/gensym.sane: -------------------------------------------------------------------------------- 1 | let semijoin(preserve table, probe table, p expression, x symbol := gensym(x), y symbol :=gensym(y)) := 2 | preserve 3 | .window({x:=row_number()}) 4 | .alias(y) 5 | .join(probe, p) 6 | .project({y}) 7 | .distinct() 8 | .projectout({x}), 9 | semijoin(nation, region.filter(r_name='ASIA'), n_regionkey=r_regionkey) 10 | 11 | -------------------------------------------------------------------------------- /examples/features/isidentical.sane: -------------------------------------------------------------------------------- 1 | let isidentical(t1 table, t2 table) := 2 | t1.except(t2, all:=true).union(t2.except(t1, all:=true)).aggregate(count())=0, 3 | isidentical(nation.filter(n_nationkey<100), nation) 4 | 5 | -------------------------------------------------------------------------------- /examples/features/table.sane: -------------------------------------------------------------------------------- 1 | table({{a:=1,b:=2},{3,4}}) 2 | 3 | -------------------------------------------------------------------------------- /examples/features/window.sane: -------------------------------------------------------------------------------- 1 | nation 2 | .window({ r_order := row_number(), 3 | tile := ntile(2), 4 | neighbour1 := lag(n_name), 5 | neighbour2 := lead(n_name::text, offset := 2, default := '-') }, 6 | orderby := n_name, 7 | partitionby := n_regionkey) 8 | .join(region, n_regionkey = r_regionkey) 9 | .project({ r_name, r_order, n_name, neighbour1, neighbour2, tile }) 10 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q1.sane: -------------------------------------------------------------------------------- 1 | let date(spec, modifier) := foreigncall('date', date, {spec, modifier}), 2 | lineitem 3 | .filter(l_shipdate <= date('1998-12-01', '-90 days')) 4 | .groupby({l_returnflag, l_linestatus}, 5 | {sum_qty:=sum(l_quantity), 6 | sum_base_price:=sum(l_extendedprice), 7 | sum_disc_price:=sum(l_extendedprice * (1 - l_discount)), 8 | sum_charge:=sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), 9 | avg_qty:=avg(l_quantity), 10 | avg_price:=avg(l_extendedprice), 11 | avg_disc:=avg(l_discount), 12 | count_order:=count() 13 | }) 14 | .orderby({l_returnflag, l_linestatus}) 15 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q10.sane: -------------------------------------------------------------------------------- 1 | let base := '1993-10-01', 2 | let basedate(add := '+0 seconds') := foreigncall('date', date, {base, add}), 3 | orders 4 | .filter(o_orderdate >= basedate() && o_orderdate < basedate(add := '+3 months')) 5 | .join(customer, c_custkey=o_custkey) 6 | .join(lineitem.filter(l_returnflag='R'), l_orderkey=o_orderkey) 7 | .join(nation, c_nationkey=n_nationkey) 8 | .groupby({c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment}, {revenue:=sum(l_extendedprice * (1 - l_discount))}) 9 | .orderby({revenue.desc()}, limit:=20) 10 | 11 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q11.sane: -------------------------------------------------------------------------------- 1 | let partsupp_germany := partsupp 2 | .join(supplier, ps_suppkey=s_suppkey) 3 | .join(nation.filter(n_name='GERMANY'), s_nationkey=n_nationkey), 4 | partsupp_germany 5 | .groupby(ps_partkey, {value:=sum(ps_supplycost * ps_availqty)}) 6 | .filter(value>partsupp_germany.aggregate(sum(ps_supplycost*ps_availqty))*0.0001) 7 | .orderby(value.desc()) 8 | 9 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q12.sane: -------------------------------------------------------------------------------- 1 | let base := '1994-01-01', 2 | let basedate(add := '+0 seconds') := foreigncall('date', date, {base, add}), 3 | lineitem 4 | .filter(l_commitdate < l_receiptdate 5 | && l_shipdate < l_commitdate 6 | && l_receiptdate >= basedate() 7 | && l_receiptdate < basedate(add := '+1 year') && l_shipmode.in({'MAIL', 'SHIP'})) 8 | .join(orders, o_orderkey=l_orderkey) 9 | .groupby(l_shipmode, {high_line_count:=sum(case({o_orderpriority = '1-URGENT' || o_orderpriority = '2-HIGH' => 1}, else:=0)), 10 | low_line_count:=sum(case({o_orderpriority <> '1-URGENT' && o_orderpriority <> '2-HIGH' => 1}, else:=0))}) 11 | .orderby(l_shipmode) 12 | 13 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q13.sane: -------------------------------------------------------------------------------- 1 | customer 2 | .join(orders.filter(!o_comment.like('%special%requests%')), c_custkey=o_custkey, type:=leftouter) 3 | .groupby({c_custkey}, {c_count:=count(o_orderkey)}) 4 | .groupby({c_count}, {custdist:=count()}) 5 | .orderby({custdist.desc(), c_count.desc()}) 6 | 7 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q14.sane: -------------------------------------------------------------------------------- 1 | let base:='1995-09-01', 2 | let basedate(add := '+0 seconds') := foreigncall('date', date, {base, add}), 3 | lineitem 4 | .filter(l_shipdate >= basedate() && l_shipdate < basedate(add := '+1 month')) 5 | .join(part, l_partkey=p_partkey) 6 | .aggregate(100.00*sum(case({p_type.like('PROMO%') => l_extendedprice * (1 - l_discount)}, else:=0)) / sum(l_extendedprice * (1 - l_discount))) 7 | 8 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q15.sane: -------------------------------------------------------------------------------- 1 | let base := '1996-01-01', 2 | let basedate(add := '+0 seconds') := foreigncall('date', date, {base, add}), 3 | let revenue:= 4 | lineitem 5 | .filter(l_shipdate >= basedate() && l_shipdate < basedate('+3 months')) 6 | .groupby(l_suppkey, {total_revenue:=sum(l_extendedprice * (1 - l_discount))}) 7 | .project({supplier_no:=l_suppkey, total_revenue}), 8 | supplier 9 | .join(revenue, s_suppkey = supplier_no) 10 | .filter(total_revenue=revenue.aggregate(max(total_revenue))) 11 | .orderby({s_suppkey}) 12 | .project({s_suppkey, s_name, s_address, s_phone, total_revenue}) 13 | 14 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q16.sane: -------------------------------------------------------------------------------- 1 | part 2 | .filter(p_brand <> 'Brand#45' && !p_type.like('MEDIUM POLISHED%') && p_size.in({49, 14, 23, 45, 19, 3, 36, 9})) 3 | .join(partsupp, p_partkey=ps_partkey) 4 | .join(supplier.filter(s_comment.like('%Customer%Complaints%')), ps_suppkey=s_suppkey, type:=leftanti) 5 | .groupby({p_brand, p_type, p_size}, {supplier_cnt:=count(ps_suppkey, distinct:=true)}) 6 | .orderby({supplier_cnt.desc(), p_brand, p_type, p_size}) 7 | 8 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q17.sane: -------------------------------------------------------------------------------- 1 | let avg_for_part(p_partkey) := 2 | lineitem.filter(l_partkey=p_partkey).aggregate(0.2*avg(l_quantity)), 3 | part 4 | .filter(p_brand = 'Brand#23' && p_container = 'MED BOX') 5 | .join(lineitem, p_partkey=l_partkey) 6 | .filter(l_quantity < avg_for_part(p_partkey)) 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q18.sane: -------------------------------------------------------------------------------- 1 | customer 2 | .join(orders, c_custkey=o_custkey) 3 | .join(lineitem.groupby({l_orderkey}, {s:=sum(l_quantity)}).filter(s>300), o_orderkey=l_orderkey, type:=leftsemi) 4 | .join(lineitem, o_orderkey=l_orderkey) 5 | .groupby({c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice}, {s:=sum(l_quantity)}) 6 | .orderby({o_totalprice.desc(), o_orderdate}, limit:=100) 7 | 8 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q19.sane: -------------------------------------------------------------------------------- 1 | lineitem 2 | .filter(l_shipmode.in({'AIR', 'AIR REG'}) && l_shipinstruct = 'DELIVER IN PERSON') 3 | .join(part, p_partkey=l_partkey) 4 | .filter( 5 | (p_brand = 'Brand#12' && p_container.in({'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG'}) && l_quantity.between(1,1+10) && p_size.between(1,5)) 6 | || (p_brand = 'Brand#23' && p_container.in({'MED BAG', 'MED BOX', 'MED PKG', 'MED PACK'}) && l_quantity.between(10,10+10) && p_size.between(1,10)) 7 | || (p_brand = 'Brand#34' && p_container.in({'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'}) && l_quantity.between(20,20+10) && p_size.between(1,15))) 8 | .aggregate(sum(l_extendedprice* (1 - l_discount))) 9 | 10 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q2.sane: -------------------------------------------------------------------------------- 1 | let min_supplycost_for_part(p_partkey) := 2 | partsupp 3 | .filter(ps_partkey = p_partkey) 4 | .join(supplier, s_suppkey=ps_suppkey) 5 | .join(nation, s_nationkey=n_nationkey) 6 | .join(region.filter(r_name='EUROPE'), n_regionkey=r_regionkey).aggregate(min(ps_supplycost)), 7 | part 8 | .filter(condition:=p_size = 15 && p_type.like('%BRASS')) 9 | .join(partsupp, p_partkey = ps_partkey) 10 | .join(supplier, s_suppkey = ps_suppkey) 11 | .join(nation, s_nationkey = n_nationkey) 12 | .join(region.filter(r_name='EUROPE'), n_regionkey=r_regionkey) 13 | .filter(ps_supplycost = min_supplycost_for_part(p_partkey)) 14 | .orderby({s_acctbal.desc(), n_name, s_name, p_partkey}, limit:=100) 15 | .project({s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment}) 16 | 17 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q20.sane: -------------------------------------------------------------------------------- 1 | let base := '1994-01-01', 2 | let basedate(add := '+0 seconds') := foreigncall('date', date, {base, add}), 3 | let qty_per_ps(ps_partkey, ps_suppkey) := 4 | lineitem 5 | .filter(l_partkey = ps_partkey && l_suppkey = ps_suppkey && l_shipdate >= basedate() && l_shipdate < basedate('+1 year')) 6 | .aggregate(sum(l_quantity)), 7 | let avail := 8 | partsupp 9 | .join(part.filter(p_name.like('forest%')), ps_partkey=p_partkey, type:=leftsemi) 10 | .filter(ps_availqty > 0.5*qty_per_ps(ps_partkey, ps_suppkey)) 11 | .project(ps_suppkey), 12 | supplier 13 | .join(nation.filter(n_name='CANADA'), s_nationkey=n_nationkey) 14 | .join(avail, s_suppkey=ps_suppkey, type:=leftsemi) 15 | .orderby({s_name}) 16 | .project({s_name, s_address}) 17 | 18 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q21.sane: -------------------------------------------------------------------------------- 1 | supplier 2 | .join(lineitem.filter(l_receiptdate>l_commitdate).as(l1), s_suppkey=l1.l_suppkey) 3 | .join(orders.filter(o_orderstatus = 'F'), o_orderkey = l1.l_orderkey) 4 | .join(nation.filter(n_name = 'SAUDI ARABIA'), s_nationkey = n_nationkey) 5 | .join(lineitem.as(l2), l2.l_orderkey = l1.l_orderkey && l2.l_suppkey <> l1.l_suppkey, type:=leftsemi) 6 | .join(lineitem.as(l3), l3.l_orderkey = l1.l_orderkey && l3.l_suppkey <> l1.l_suppkey && l3.l_receiptdate > l3.l_commitdate, type:=leftanti) 7 | .groupby({s_name}, {numwait:=count()}) 8 | .orderby({numwait.desc(), s_name}, limit:=100) 9 | 10 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q22.sane: -------------------------------------------------------------------------------- 1 | let substr(str,from,len) := foreigncall('substr', text, {str,from,len}), 2 | let avg_for_selected := 3 | customer 4 | .filter(c_acctbal > 0.00 && substr(c_phone,1,2).in({'13', '31', '23', '29', '30', '18', '17'})) 5 | .aggregate(avg(c_acctbal)), 6 | customer 7 | .map({cntrycode:=substr(c_phone,1,2)}) 8 | .filter(cntrycode.in({'13', '31', '23', '29', '30', '18', '17'}) && c_acctbal > avg_for_selected) 9 | .join(orders, o_custkey=c_custkey, type:=leftanti) 10 | .groupby({cntrycode}, {numcust:=count(), totacctbal:=sum(c_acctbal)}) 11 | .orderby({cntrycode}) 12 | 13 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q3.sane: -------------------------------------------------------------------------------- 1 | let date(spec) := foreigncall('date', date, {spec}), 2 | customer 3 | .filter(c_mktsegment = 'BUILDING') 4 | .join(orders.filter(o_orderdate < date('1995-03-15')), c_custkey = o_custkey) 5 | .join(lineitem.filter(l_shipdate > date('1995-03-15')), l_orderkey = o_orderkey) 6 | .groupby({l_orderkey,o_orderdate,o_shippriority},{revenue:=sum(l_extendedprice * (1 - l_discount))}) 7 | .orderby({revenue.desc(), o_orderdate}, limit:=10) 8 | .project({l_orderkey, revenue, o_orderdate, o_shippriority}) 9 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q4.sane: -------------------------------------------------------------------------------- 1 | let date(spec, modifier := '+0 seconds') := foreigncall('date', date, {spec, modifier}), 2 | orders 3 | .filter(o_orderdate >= date('1993-07-01') && o_orderdate < date('1993-07-01', '+3 months')) 4 | .join(lineitem.filter(l_commitdate < l_receiptdate), l_orderkey = o_orderkey, type:=exists) 5 | .groupby({o_orderpriority}, {order_count:=count()}) 6 | .orderby(o_orderpriority) 7 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q5.sane: -------------------------------------------------------------------------------- 1 | let date(spec, modifier := '+0 seconds') := foreigncall('date', date, {spec, modifier}), 2 | customer 3 | .join(orders.filter(o_orderdate >= date('1994-01-01') && o_orderdate < date('1994-01-01', '+1 year')), c_custkey=o_custkey) 4 | .join(lineitem, l_orderkey=o_orderkey) 5 | .join(supplier, l_suppkey=s_suppkey) 6 | .join(nation, s_nationkey=n_nationkey) 7 | .join(region.filter(r_name='ASIA'), n_regionkey=r_regionkey) 8 | .groupby({n_name}, {revenue:=sum(l_extendedprice * (1 - l_discount))}) 9 | .orderby({revenue.desc()}) 10 | .project({n_name, revenue}) 11 | 12 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q6.sane: -------------------------------------------------------------------------------- 1 | let date(spec, modifier := '+0 seconds') := foreigncall('date', date, {spec, modifier}), 2 | lineitem 3 | .filter(l_shipdate >= date('1994-01-01') && l_shipdate < date('1994-01-01', '+1 year') && l_discount.between(0.06 - 0.01, 0.06 + 0.01) && l_quantity<24) 4 | .aggregate(sum(l_extendedprice * l_discount)) 5 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q7.sane: -------------------------------------------------------------------------------- 1 | let date(spec, modifier := '+0 seconds') := foreigncall('date', date, {spec, modifier}), 2 | let extractYear(date) := foreigncall('strftime', text, {'%Y', date})::integer, 3 | supplier 4 | .join(lineitem.filter(l_shipdate.between(date('1995-01-01'), date('1996-12-31'))), s_suppkey=l_suppkey) 5 | .join(orders, o_orderkey=l_orderkey) 6 | .join(customer, c_custkey=o_custkey) 7 | .join(nation.as(n1), s_nationkey=n1.n_nationkey) 8 | .join(nation.as(n2), c_nationkey=n2.n_nationkey) 9 | .filter((n1.n_name = 'FRANCE' && n2.n_name = 'GERMANY') || (n1.n_name = 'GERMANY' && n2.n_name = 'FRANCE')) 10 | .map({supp_nation:=n1.n_name, cust_nation:=n2.n_name, l_year:=extractYear(l_shipdate), volume:=l_extendedprice * (1 - l_discount)}) 11 | .groupby({supp_nation, cust_nation, l_year}, {revenue:=sum(volume)}) 12 | .orderby({supp_nation, cust_nation, l_year}) 13 | 14 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q8.sane: -------------------------------------------------------------------------------- 1 | let date(spec) := foreigncall('date', date, {spec}), 2 | let extractYear(date) := foreigncall('strftime', text, {'%Y', date})::integer, 3 | part 4 | .filter(p_type = 'ECONOMY ANODIZED STEEL') 5 | .join(lineitem, p_partkey=l_partkey) 6 | .join(supplier, s_suppkey=l_suppkey) 7 | .join(orders.filter(o_orderdate.between(date('1995-01-01'), date('1996-12-31'))), l_orderkey=o_orderkey) 8 | .join(customer, o_custkey=c_custkey) 9 | .join(nation.as(n1), c_nationkey=n1.n_nationkey) 10 | .join(nation.as(n2), s_nationkey=n2.n_nationkey) 11 | .join(region.filter(r_name='AMERICA'), n1.n_regionkey=r_regionkey) 12 | .map({o_year:=extractYear(o_orderdate), volume:=l_extendedprice * (1 - l_discount), nation:=n2.n_name}) 13 | .groupby({o_year}, {mkt_share:=sum(case({nation='BRAZIL' => volume}, else:=0))/sum(volume)}) 14 | .orderby({o_year}) 15 | 16 | -------------------------------------------------------------------------------- /examples/tpch-sqlite/q9.sane: -------------------------------------------------------------------------------- 1 | let extractYear(date) := foreigncall('strftime', text, {'%Y', date})::integer, 2 | part 3 | .filter(p_name.like('%green%')) 4 | .join(lineitem, p_partkey=l_partkey) 5 | .join(supplier, s_suppkey=l_suppkey) 6 | .join(partsupp, ps_suppkey=l_suppkey && ps_partkey=l_partkey) 7 | .join(orders, o_orderkey=l_orderkey) 8 | .join(nation, s_nationkey=n_nationkey) 9 | .map({nation:=n_name, o_year:=extractYear(o_orderdate), amount:=l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity}) 10 | .groupby({nation, o_year}, {sum_profit:=sum(amount)}) 11 | .orderby({nation, o_year.desc()}) 12 | 13 | -------------------------------------------------------------------------------- /examples/tpch/q1.sane: -------------------------------------------------------------------------------- 1 | lineitem 2 | .filter(l_shipdate <= '1998-12-01'::date - '90 days'::interval) 3 | .groupby({l_returnflag, l_linestatus}, 4 | {sum_qty:=sum(l_quantity), 5 | sum_base_price:=sum(l_extendedprice), 6 | sum_disc_price:=sum(l_extendedprice * (1 - l_discount)), 7 | sum_charge:=sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), 8 | avg_qty:=avg(l_quantity), 9 | avg_price:=avg(l_extendedprice), 10 | avg_disc:=avg(l_discount), 11 | count_order:=count() 12 | }) 13 | .orderby({l_returnflag, l_linestatus}) 14 | -------------------------------------------------------------------------------- /examples/tpch/q10.sane: -------------------------------------------------------------------------------- 1 | let base := '1993-10-01'::date, 2 | orders 3 | .filter(o_orderdate >= base && o_orderdate < base + '3 month'::interval) 4 | .join(customer, c_custkey=o_custkey) 5 | .join(lineitem.filter(l_returnflag='R'), l_orderkey=o_orderkey) 6 | .join(nation, c_nationkey=n_nationkey) 7 | .groupby({c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment}, {revenue:=sum(l_extendedprice * (1 - l_discount))}) 8 | .orderby({revenue.desc()}, limit:=20) 9 | 10 | -------------------------------------------------------------------------------- /examples/tpch/q11.sane: -------------------------------------------------------------------------------- 1 | let partsupp_germany := partsupp 2 | .join(supplier, ps_suppkey=s_suppkey) 3 | .join(nation.filter(n_name='GERMANY'), s_nationkey=n_nationkey), 4 | partsupp_germany 5 | .groupby(ps_partkey, {value:=sum(ps_supplycost * ps_availqty)}) 6 | .filter(value>partsupp_germany.aggregate(sum(ps_supplycost*ps_availqty))*0.0001) 7 | .orderby(value.desc()) 8 | 9 | -------------------------------------------------------------------------------- /examples/tpch/q12.sane: -------------------------------------------------------------------------------- 1 | let base := '1994-01-01'::date, 2 | lineitem 3 | .filter(l_commitdate < l_receiptdate && l_shipdate < l_commitdate && l_receiptdate >= base && l_receiptdate < base + '1 year'::interval && l_shipmode.in({'MAIL', 'SHIP'})) 4 | .join(orders, o_orderkey=l_orderkey) 5 | .groupby(l_shipmode, {high_line_count:=sum(case({o_orderpriority = '1-URGENT' || o_orderpriority = '2-HIGH' => 1}, else:=0)), low_line_count:=sum(case({o_orderpriority <> '1-URGENT' && o_orderpriority <> '2-HIGH' => 1}, else:=0))}) 6 | .orderby(l_shipmode) 7 | 8 | -------------------------------------------------------------------------------- /examples/tpch/q13.sane: -------------------------------------------------------------------------------- 1 | customer 2 | .join(orders.filter(!o_comment.like('%special%requests%')), c_custkey=o_custkey, type:=leftouter) 3 | .groupby({c_custkey}, {c_count:=count(o_orderkey)}) 4 | .groupby({c_count}, {custdist:=count()}) 5 | .orderby({custdist.desc(), c_count.desc()}) 6 | 7 | -------------------------------------------------------------------------------- /examples/tpch/q14.sane: -------------------------------------------------------------------------------- 1 | let base:='1995-09-01'::date, 2 | lineitem 3 | .filter(l_shipdate >= base && l_shipdate < base + '1 month'::interval) 4 | .join(part, l_partkey=p_partkey) 5 | .aggregate(100.00*sum(case({p_type.like('PROMO%') => l_extendedprice * (1 - l_discount)}, else:=0)) / sum(l_extendedprice * (1 - l_discount))) 6 | 7 | -------------------------------------------------------------------------------- /examples/tpch/q15.sane: -------------------------------------------------------------------------------- 1 | let base := '1996-01-01'::date, 2 | let revenue:= 3 | lineitem 4 | .filter(l_shipdate >= base && l_shipdate < base + '3 month'::interval) 5 | .groupby(l_suppkey, {total_revenue:=sum(l_extendedprice * (1 - l_discount))}) 6 | .project({supplier_no:=l_suppkey, total_revenue}), 7 | supplier 8 | .join(revenue, s_suppkey = supplier_no) 9 | .filter(total_revenue=revenue.aggregate(max(total_revenue))) 10 | .orderby({s_suppkey}) 11 | .project({s_suppkey, s_name, s_address, s_phone, total_revenue}) 12 | 13 | -------------------------------------------------------------------------------- /examples/tpch/q16.sane: -------------------------------------------------------------------------------- 1 | part 2 | .filter(p_brand <> 'Brand#45' && !p_type.like('MEDIUM POLISHED%') && p_size.in({49, 14, 23, 45, 19, 3, 36, 9})) 3 | .join(partsupp, p_partkey=ps_partkey) 4 | .join(supplier.filter(s_comment.like('%Customer%Complaints%')), ps_suppkey=s_suppkey, type:=leftanti) 5 | .groupby({p_brand, p_type, p_size}, {supplier_cnt:=count(ps_suppkey, distinct:=true)}) 6 | .orderby({supplier_cnt.desc(), p_brand, p_type, p_size}) 7 | 8 | -------------------------------------------------------------------------------- /examples/tpch/q17.sane: -------------------------------------------------------------------------------- 1 | let avg_for_part(p_partkey) := 2 | lineitem.filter(l_partkey=p_partkey).aggregate(0.2*avg(l_quantity)), 3 | part 4 | .filter(p_brand = 'Brand#23' && p_container = 'MED BOX') 5 | .join(lineitem, p_partkey=l_partkey) 6 | .filter(l_quantity < avg_for_part(p_partkey)) 7 | 8 | 9 | -------------------------------------------------------------------------------- /examples/tpch/q18.sane: -------------------------------------------------------------------------------- 1 | customer 2 | .join(orders, c_custkey=o_custkey) 3 | .join(lineitem.groupby({l_orderkey}, {s:=sum(l_quantity)}).filter(s>300), o_orderkey=l_orderkey, type:=leftsemi) 4 | .join(lineitem, o_orderkey=l_orderkey) 5 | .groupby({c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice}, {s:=sum(l_quantity)}) 6 | .orderby({o_totalprice.desc(), o_orderdate}, limit:=100) 7 | 8 | -------------------------------------------------------------------------------- /examples/tpch/q19.sane: -------------------------------------------------------------------------------- 1 | lineitem 2 | .filter(l_shipmode.in({'AIR', 'AIR REG'}) && l_shipinstruct = 'DELIVER IN PERSON') 3 | .join(part, p_partkey=l_partkey) 4 | .filter( 5 | (p_brand = 'Brand#12' && p_container.in({'SM CASE', 'SM BOX', 'SM PACK', 'SM PKG'}) && l_quantity.between(1,1+10) && p_size.between(1,5)) 6 | || (p_brand = 'Brand#23' && p_container.in({'MED BAG', 'MED BOX', 'MED PKG', 'MED PACK'}) && l_quantity.between(10,10+10) && p_size.between(1,10)) 7 | || (p_brand = 'Brand#34' && p_container.in({'LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'}) && l_quantity.between(20,20+10) && p_size.between(1,15))) 8 | .aggregate(sum(l_extendedprice* (1 - l_discount))) 9 | 10 | -------------------------------------------------------------------------------- /examples/tpch/q2.sane: -------------------------------------------------------------------------------- 1 | let min_supplycost_for_part(p_partkey) := 2 | partsupp 3 | .filter(ps_partkey = p_partkey) 4 | .join(supplier, s_suppkey=ps_suppkey) 5 | .join(nation, s_nationkey=n_nationkey) 6 | .join(region.filter(r_name='EUROPE'), n_regionkey=r_regionkey).aggregate(min(ps_supplycost)), 7 | part 8 | .filter(condition:=p_size = 15 && p_type.like('%BRASS')) 9 | .join(partsupp, p_partkey = ps_partkey) 10 | .join(supplier, s_suppkey = ps_suppkey) 11 | .join(nation, s_nationkey = n_nationkey) 12 | .join(region.filter(r_name='EUROPE'), n_regionkey=r_regionkey) 13 | .filter(ps_supplycost = min_supplycost_for_part(p_partkey)) 14 | .orderby({s_acctbal.desc(), n_name, s_name, p_partkey}, limit:=100) 15 | .project({s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment}) 16 | 17 | -------------------------------------------------------------------------------- /examples/tpch/q20.sane: -------------------------------------------------------------------------------- 1 | let base := '1994-01-01'::date, 2 | let qty_per_ps(ps_partkey, ps_suppkey) := 3 | lineitem 4 | .filter(l_partkey = ps_partkey && l_suppkey = ps_suppkey && l_shipdate >= base && l_shipdate < base + '1 year'::interval) 5 | .aggregate(sum(l_quantity)), 6 | let avail := 7 | partsupp 8 | .join(part.filter(p_name.like('forest%')), ps_partkey=p_partkey, type:=leftsemi) 9 | .filter(ps_availqty > 0.5*qty_per_ps(ps_partkey, ps_suppkey)) 10 | .project(ps_suppkey), 11 | supplier 12 | .join(nation.filter(n_name='CANADA'), s_nationkey=n_nationkey) 13 | .join(avail, s_suppkey=ps_suppkey, type:=leftsemi) 14 | .orderby({s_name}) 15 | .project({s_name, s_address}) 16 | 17 | -------------------------------------------------------------------------------- /examples/tpch/q21.sane: -------------------------------------------------------------------------------- 1 | supplier 2 | .join(lineitem.filter(l_receiptdate>l_commitdate).as(l1), s_suppkey=l1.l_suppkey) 3 | .join(orders.filter(o_orderstatus = 'F'), o_orderkey = l1.l_orderkey) 4 | .join(nation.filter(n_name = 'SAUDI ARABIA'), s_nationkey = n_nationkey) 5 | .join(lineitem.as(l2), l2.l_orderkey = l1.l_orderkey && l2.l_suppkey <> l1.l_suppkey, type:=leftsemi) 6 | .join(lineitem.as(l3), l3.l_orderkey = l1.l_orderkey && l3.l_suppkey <> l1.l_suppkey && l3.l_receiptdate > l3.l_commitdate, type:=leftanti) 7 | .groupby({s_name}, {numwait:=count()}) 8 | .orderby({numwait.desc(), s_name}, limit:=100) 9 | 10 | -------------------------------------------------------------------------------- /examples/tpch/q22.sane: -------------------------------------------------------------------------------- 1 | let avg_for_selected := 2 | customer 3 | .filter(c_acctbal > 0.00 && c_phone.substr(1,2).in({'13', '31', '23', '29', '30', '18', '17'})) 4 | .aggregate(avg(c_acctbal)), 5 | customer 6 | .map({cntrycode:=c_phone.substr(1,2)}) 7 | .filter(cntrycode.in({'13', '31', '23', '29', '30', '18', '17'}) && c_acctbal > avg_for_selected) 8 | .join(orders, o_custkey=c_custkey, type:=leftanti) 9 | .groupby({cntrycode}, {numcust:=count(), totacctbal:=sum(c_acctbal)}) 10 | .orderby({cntrycode}) 11 | 12 | -------------------------------------------------------------------------------- /examples/tpch/q3.sane: -------------------------------------------------------------------------------- 1 | customer 2 | .filter(c_mktsegment = 'BUILDING') 3 | .join(orders.filter(o_orderdate < '1995-03-15'::date), c_custkey = o_custkey) 4 | .join(lineitem.filter(l_shipdate > '1995-03-15'::date), l_orderkey = o_orderkey) 5 | .groupby({l_orderkey,o_orderdate,o_shippriority},{revenue:=sum(l_extendedprice * (1 - l_discount))}) 6 | .orderby({revenue.desc(), o_orderdate}, limit:=10) 7 | .project({l_orderkey, revenue, o_orderdate, o_shippriority}) 8 | -------------------------------------------------------------------------------- /examples/tpch/q4.sane: -------------------------------------------------------------------------------- 1 | orders 2 | .filter(o_orderdate >= '1993-07-01'::date && o_orderdate < '1993-07-01'::date + '3 month'::interval) 3 | .join(lineitem.filter(l_commitdate < l_receiptdate), l_orderkey = o_orderkey, type:=exists) 4 | .groupby({o_orderpriority}, {order_count:=count()}) 5 | .orderby(o_orderpriority) 6 | -------------------------------------------------------------------------------- /examples/tpch/q5.sane: -------------------------------------------------------------------------------- 1 | -- gives the wrong result? 2 | customer 3 | .join(orders.filter(o_orderdate >= '1994-01-01'::date && o_orderdate < '1994-01-01'::date + '1 year'::interval), c_custkey=o_custkey) 4 | .join(lineitem, l_orderkey=o_orderkey) 5 | .join(supplier, l_suppkey=s_suppkey) 6 | .join(nation, s_nationkey=n_nationkey) 7 | .join(region.filter(r_name='ASIA'), n_regionkey=r_regionkey) 8 | .groupby({n_name}, {revenue:=sum(l_extendedprice * (1 - l_discount))}) 9 | .orderby({revenue.desc()}) 10 | .project({n_name, revenue}) 11 | 12 | -------------------------------------------------------------------------------- /examples/tpch/q6.sane: -------------------------------------------------------------------------------- 1 | lineitem 2 | .filter(l_shipdate >= '1994-01-01'::date && l_shipdate < '1994-01-01'::date + '1 year'::interval && l_discount.between(0.06 - 0.01, 0.06 + 0.01) && l_quantity<24) 3 | .aggregate(sum(l_extendedprice * l_discount)) 4 | -------------------------------------------------------------------------------- /examples/tpch/q7.sane: -------------------------------------------------------------------------------- 1 | supplier 2 | .join(lineitem.filter(l_shipdate.between('1995-01-01'::date, '1996-12-31'::date)), s_suppkey=l_suppkey) 3 | .join(orders, o_orderkey=l_orderkey) 4 | .join(customer, c_custkey=o_custkey) 5 | .join(nation.as(n1), s_nationkey=n1.n_nationkey) 6 | .join(nation.as(n2), c_nationkey=n2.n_nationkey) 7 | .filter((n1.n_name = 'FRANCE' && n2.n_name = 'GERMANY') || (n1.n_name = 'GERMANY' && n2.n_name = 'FRANCE')) 8 | .map({supp_nation:=n1.n_name, cust_nation:=n2.n_name, l_year:=l_shipdate.extract(year), volume:=l_extendedprice * (1 - l_discount)}) 9 | .groupby({supp_nation, cust_nation, l_year}, {revenue:=sum(volume)}) 10 | .orderby({supp_nation, cust_nation, l_year}) 11 | 12 | -------------------------------------------------------------------------------- /examples/tpch/q8.sane: -------------------------------------------------------------------------------- 1 | part 2 | .filter(p_type = 'ECONOMY ANODIZED STEEL') 3 | .join(lineitem, p_partkey=l_partkey) 4 | .join(supplier, s_suppkey=l_suppkey) 5 | .join(orders.filter(o_orderdate.between('1995-01-01'::date, '1996-12-31'::date)), l_orderkey=o_orderkey) 6 | .join(customer, o_custkey=c_custkey) 7 | .join(nation.as(n1), c_nationkey=n1.n_nationkey) 8 | .join(nation.as(n2), s_nationkey=n2.n_nationkey) 9 | .join(region.filter(r_name='AMERICA'), n1.n_regionkey=r_regionkey) 10 | .map({o_year:=o_orderdate.extract(year), volume:=l_extendedprice * (1 - l_discount), nation:=n2.n_name}) 11 | .groupby({o_year}, {mkt_share:=sum(case({nation='BRAZIL' => volume}, else:=0))/sum(volume)}) 12 | .orderby({o_year}) 13 | 14 | -------------------------------------------------------------------------------- /examples/tpch/q9.sane: -------------------------------------------------------------------------------- 1 | part 2 | .filter(p_name.like('%green%')) 3 | .join(lineitem, p_partkey=l_partkey) 4 | .join(supplier, s_suppkey=l_suppkey) 5 | .join(partsupp, ps_suppkey=l_suppkey && ps_partkey=l_partkey) 6 | .join(orders, o_orderkey=l_orderkey) 7 | .join(nation, s_nationkey=n_nationkey) 8 | .map({nation:=n_name, o_year:=o_orderdate.extract(year), amount:=l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity}) 9 | .groupby({nation, o_year}, {sum_profit:=sum(amount)}) 10 | .orderby({nation, o_year.desc()}) 11 | 12 | -------------------------------------------------------------------------------- /infra/Schema.cpp: -------------------------------------------------------------------------------- 1 | #include "infra/Schema.hpp" 2 | //--------------------------------------------------------------------------- 3 | // (c) 2023 Thomas Neumann 4 | //--------------------------------------------------------------------------- 5 | using namespace std; 6 | //--------------------------------------------------------------------------- 7 | namespace saneql { 8 | //--------------------------------------------------------------------------- 9 | string Type::getName() const 10 | // Get the name (for error reporting) 11 | { 12 | switch (tag) { 13 | case Unknown: return "unknown"; 14 | case Bool: return "boolean"; 15 | case Integer: return "integer"; 16 | case Decimal: return "decimal"; 17 | case Char: return "char"; 18 | case Varchar: return "varchar"; 19 | case Text: return "text"; 20 | case Date: return "date"; 21 | case Interval: return "interval"; 22 | } 23 | __builtin_unreachable(); 24 | } 25 | //--------------------------------------------------------------------------- 26 | void Schema::createTable(std::string name, std::initializer_list columns) 27 | // Create a table 28 | { 29 | auto& t = tables[name]; 30 | t.columns.assign(columns.begin(), columns.end()); 31 | } 32 | //--------------------------------------------------------------------------- 33 | void Schema::createTPCH() 34 | // Create the TPC-H schema for experiments 35 | { 36 | createTable("part", {{"p_partkey", Type::getInteger()}, {"p_name", Type::getVarchar(55)}, {"p_mfgr", Type::getChar(25)}, {"p_brand", Type::getChar(10)}, {"p_type", Type::getVarchar(25)}, {"p_size", Type::getInteger()}, {"p_container", Type::getChar(10)}, {"p_retailprice", Type::getDecimal(12, 2)}, {"p_comment", Type::getVarchar(23)}}); 37 | createTable("region", {{"r_regionkey", Type::getInteger()}, {"r_name", Type::getChar(25)}, {"r_comment", Type::getVarchar(152)}}); 38 | createTable("nation", {{"n_nationkey", Type::getInteger()}, {"n_name", Type::getChar(25)}, {"n_regionkey", Type::getInteger()}, {"n_comment", Type::getVarchar(152)}}); 39 | createTable("supplier", {{"s_suppkey", Type::getInteger()}, {"s_name", Type::getChar(25)}, {"s_address", Type::getVarchar(40)}, {"s_nationkey", Type::getInteger()}, {"s_phone", Type::getChar(15)}, {"s_acctbal", Type::getDecimal(12, 2)}, {"s_comment", Type::getVarchar(101)}}); 40 | createTable("partsupp", {{"ps_partkey", Type::getInteger()}, {"ps_suppkey", Type::getInteger()}, {"ps_availqty", Type::getInteger()}, {"ps_supplycost", Type::getDecimal(12, 2)}, {"ps_comment", Type::getVarchar(199)}}); 41 | createTable("customer", {{"c_custkey", Type::getInteger()}, {"c_name", Type::getVarchar(25)}, {"c_address", Type::getVarchar(40)}, {"c_nationkey", Type::getInteger()}, {"c_phone", Type::getChar(15)}, {"c_acctbal", Type::getDecimal(12, 2)}, {"c_mktsegment", Type::getChar(10)}, {"c_comment", Type::getVarchar(117)}}); 42 | createTable("orders", {{"o_orderkey", Type::getInteger()}, {"o_custkey", Type::getInteger()}, {"o_orderstatus", Type::getChar(1)}, {"o_totalprice", Type::getDecimal(12, 2)}, {"o_orderdate", Type::getDate()}, {"o_orderpriority", Type::getChar(15)}, {"o_clerk", Type::getChar(15)}, {"o_shippriority", Type::getInteger()}, {"o_comment", Type::getVarchar(79)}}); 43 | createTable("lineitem", {{"l_orderkey", Type::getInteger()}, {"l_partkey", Type::getInteger()}, {"l_suppkey", Type::getInteger()}, {"l_linenumber", Type::getInteger()}, {"l_quantity", Type::getDecimal(12, 2)}, {"l_extendedprice", Type::getDecimal(12, 2)}, {"l_discount", Type::getDecimal(12, 2)}, {"l_tax", Type::getDecimal(12, 2)}, {"l_returnflag", Type::getChar(1)}, {"l_linestatus", Type::getChar(1)}, {"l_shipdate", Type::getDate()}, {"l_commitdate", Type::getDate()}, {"l_receiptdate", Type::getDate()}, {"l_shipinstruct", Type::getChar(25)}, {"l_shipmode", Type::getChar(10)}, {"l_comment", Type::getVarchar(44)}}); 44 | } 45 | //--------------------------------------------------------------------------- 46 | void Schema::populateSchema() 47 | // Create initial schema objects 48 | { 49 | // For now we hard-code TPC-H for experiments 50 | createTPCH(); 51 | } 52 | //--------------------------------------------------------------------------- 53 | const Schema::Table* Schema::lookupTable(const std::string& name) const 54 | // Check if a table exists in the schema 55 | { 56 | auto iter = tables.find(name); 57 | if (iter != tables.end()) 58 | return &(iter->second); 59 | return nullptr; 60 | } 61 | //--------------------------------------------------------------------------- 62 | } 63 | //--------------------------------------------------------------------------- 64 | -------------------------------------------------------------------------------- /infra/Schema.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_Schema 2 | #define H_saneql_Schema 3 | //--------------------------------------------------------------------------- 4 | #include 5 | #include 6 | #include 7 | //--------------------------------------------------------------------------- 8 | // SaneQL 9 | // (c) 2023 Thomas Neumann 10 | // SPDX-License-Identifier: BSD-3-Clause 11 | //--------------------------------------------------------------------------- 12 | namespace saneql { 13 | //--------------------------------------------------------------------------- 14 | /// An SQL data type 15 | class Type { 16 | public: 17 | /// Known types 18 | enum Tag { 19 | Unknown, 20 | Bool, 21 | Integer, 22 | Decimal, 23 | Char, 24 | Varchar, 25 | Text, 26 | Date, 27 | Interval 28 | }; 29 | 30 | private: 31 | /// The type 32 | Tag tag; 33 | /// The modifier 34 | unsigned modifier; 35 | 36 | /// Constructor 37 | constexpr Type(Tag tag, unsigned modifier) : tag(tag), modifier(modifier) {} 38 | 39 | public: 40 | /// Get the type tag 41 | constexpr Tag getType() const { return tag; } 42 | 43 | /// Get the name (for error reporting) 44 | std::string getName() const; 45 | 46 | /// Is the type nullable? 47 | constexpr bool isNullable() const { return modifier & 1; } 48 | /// Make the type nullable 49 | constexpr Type asNullable() const { return Type(tag, modifier | 1); } 50 | /// Change the nullability 51 | constexpr Type withNullable(bool nullable) const { return Type(tag, ((modifier >> 1) << 1) | nullable); } 52 | 53 | /// Get the precision of decimal types 54 | constexpr unsigned getPrecision() const { return modifier >> 9; } 55 | /// Get the scale of decimal types 56 | constexpr unsigned getScale() const { return (modifier >> 1) & 0xFF; } 57 | /// Get the maximum length of text types 58 | constexpr unsigned getLength() const { return (modifier >> 1); } 59 | 60 | /// Comparison 61 | bool operator==(const Type& o) const { return ((tag == o.tag) && (modifier == o.modifier)); } 62 | /// Comparison 63 | bool operator!=(const Type& o) const { return ((tag != o.tag) || (modifier != o.modifier)); } 64 | 65 | /// Create an unknown type 66 | static constexpr Type getUnknown() { return Type(Unknown, 0); } 67 | /// Create a boolean type 68 | static constexpr Type getBool() { return Type(Bool, 0); } 69 | /// Create an integer type 70 | static constexpr Type getInteger() { return Type(Integer, 0); } 71 | /// Create a decimal type 72 | static constexpr Type getDecimal(unsigned precision, unsigned scale) { return Type(Decimal, (precision << 9) | (scale << 1)); } 73 | /// Create a char type 74 | static constexpr Type getChar(unsigned maxLen) { return Type(Char, maxLen << 1); } 75 | /// Create a varchar type 76 | static constexpr Type getVarchar(unsigned maxLen) { return Type(Varchar, maxLen << 1); } 77 | /// Create a text type 78 | static constexpr Type getText() { return Type(Text, 0); } 79 | /// Create a date type 80 | static constexpr Type getDate() { return Type(Date, 0); } 81 | /// Create an interval type 82 | static constexpr Type getInterval() { return Type(Interval, 0); } 83 | }; 84 | //--------------------------------------------------------------------------- 85 | /// Access to the database schema 86 | class Schema { 87 | public: 88 | /// A column definition 89 | struct Column { 90 | /// The name 91 | std::string name; 92 | /// The type 93 | Type type; 94 | }; 95 | /// A table definition 96 | struct Table { 97 | /// The columns 98 | std::vector columns; 99 | }; 100 | 101 | private: 102 | /// The tables 103 | std::unordered_map tables; 104 | 105 | /// Create a table 106 | void createTable(std::string name, std::initializer_list columns); 107 | /// Create the TPC-H schema 108 | void createTPCH(); 109 | 110 | public: 111 | /// Create some test schema for experiments 112 | void populateSchema(); 113 | 114 | /// Check if a table exists in the schema 115 | const Table* lookupTable(const std::string& name) const; 116 | }; 117 | //--------------------------------------------------------------------------- 118 | } 119 | //--------------------------------------------------------------------------- 120 | #endif 121 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "algebra/Operator.hpp" 2 | #include "infra/Schema.hpp" 3 | #include "parser/ASTBase.hpp" 4 | #include "parser/SaneQLLexer.hpp" 5 | #include "parser/SaneQLParser.hpp" 6 | #include "semana/SemanticAnalysis.hpp" 7 | #include "sql/SQLWriter.hpp" 8 | #include 9 | #include 10 | #include 11 | //--------------------------------------------------------------------------- 12 | using namespace std; 13 | using namespace saneql; 14 | //--------------------------------------------------------------------------- 15 | // (c) 2023 Thomas Neumann 16 | //--------------------------------------------------------------------------- 17 | static string readFiles(unsigned count, char* files[]) { 18 | ostringstream output; 19 | for (unsigned i = 0; i != count; i++) { 20 | ifstream in(files[i]); 21 | if (!in.is_open()) { 22 | cerr << "unable to read " << files[i] << endl; 23 | exit(1); 24 | } 25 | output << in.rdbuf(); 26 | output << "\n"; 27 | } 28 | return output.str(); 29 | } 30 | //--------------------------------------------------------------------------- 31 | int main(int argc, char* argv[]) { 32 | if (argc < 2) { 33 | cerr << "usage: " << argv[0] << " file..." << endl; 34 | return 1; 35 | } 36 | 37 | Schema schema; 38 | schema.populateSchema(); 39 | 40 | string query = readFiles(argc - 1, argv + 1); 41 | ASTContainer container; 42 | ast::AST* tree = nullptr; 43 | try { 44 | tree = SaneQLParser::parse(container, query); 45 | } catch (const exception& e) { 46 | cerr << e.what() << endl; 47 | return 1; 48 | } 49 | 50 | SemanticAnalysis semana(schema); 51 | try { 52 | auto res = semana.analyzeQuery(tree); 53 | SQLWriter sql; 54 | if (res.isScalar()) { 55 | sql.write("select "); 56 | res.scalar()->generate(sql); 57 | } else { 58 | algebra::Sort* sort = nullptr; 59 | auto tree = res.table().get(); 60 | if (auto s = dynamic_cast(tree)) { 61 | sort = s; 62 | tree = sort->input.get(); 63 | } 64 | sql.write("select "); 65 | bool first = true; 66 | for (auto& c : res.getBinding().getColumns()) { 67 | if (first) 68 | first = false; 69 | else 70 | sql.write(", "); 71 | sql.writeIU(c.iu); 72 | sql.write(" as "); 73 | sql.writeIdentifier(c.name); 74 | } 75 | sql.write(" from "); 76 | tree->generate(sql); 77 | sql.write(" s"); 78 | if (sort) { 79 | if (!sort->order.empty()) { 80 | sql.write(" order by "); 81 | bool first = true; 82 | for (auto& o : sort->order) { 83 | if (first) 84 | first = false; 85 | else 86 | sql.write(", "); 87 | o.value->generate(sql); 88 | if (o.collate != Collate{}) sql.write(" collate TODO"); // TODO 89 | if (o.descending) sql.write(" desc"); 90 | } 91 | } 92 | if (sort->limit.has_value()) { 93 | sql.write(" limit "); 94 | sql.write(to_string(*(sort->limit))); 95 | } 96 | if (sort->offset.has_value()) { 97 | sql.write(" offset "); 98 | sql.write(to_string(*(sort->offset))); 99 | } 100 | } 101 | } 102 | cout << sql.getResult() << endl; 103 | } catch (const exception& e) { 104 | cerr << e.what() << endl; 105 | return 1; 106 | } 107 | 108 | return 0; 109 | } 110 | -------------------------------------------------------------------------------- /makeutil/patchbison: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | bison="$1" 4 | srcfile="$2" 5 | dstfile="$3" 6 | "$bison" -o$dstfile $srcfile -Wcounterexamples 7 | sed 's/private:/public:/' $dstfile > $dstfile.tmp 8 | mv $dstfile.tmp $dstfile 9 | -------------------------------------------------------------------------------- /parser/ASTBase.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/ASTBase.hpp" 2 | #include "parser/SaneQLLexer.hpp" 3 | #include 4 | #include 5 | #include 6 | //--------------------------------------------------------------------------- 7 | // SaneQL 8 | // (c) 2023 Thomas Neumann 9 | // SPDX-License-Identifier: BSD-3-Clause 10 | //--------------------------------------------------------------------------- 11 | using namespace std; 12 | //--------------------------------------------------------------------------- 13 | namespace saneql { 14 | //--------------------------------------------------------------------------- 15 | namespace { 16 | struct NodeTemplate : ASTBase { 17 | ASTBase* entries[]; 18 | NodeTemplate(string_view content, unsigned descriptor) : ASTBase(content, descriptor) {} 19 | }; 20 | struct ListTemplate : ASTBase { 21 | ASTBase *head, *tail, *end; 22 | ListTemplate(string_view content, unsigned descriptor, ASTBase* head, ASTBase* tail, ASTBase* end) : ASTBase(content, descriptor), head(head), tail(tail), end(end) {} 23 | }; 24 | struct Hash { 25 | static uint64_t hash(const void* data, uint64_t len, uint64_t seed); 26 | }; 27 | uint64_t Hash::hash(const void* data, uint64_t len, uint64_t seed) { 28 | uint64_t result = 0xcbf29ce484222325 + seed; 29 | for (auto iter = static_cast(data), limit = iter + len; iter != limit; ++iter) { 30 | result ^= static_cast(*iter); 31 | result *= 0x100000001b3; 32 | } 33 | return result; 34 | } 35 | } 36 | //--------------------------------------------------------------------------- 37 | void ASTBase::computeHash() 38 | // Compute the hash value 39 | { 40 | if (!getRawType()) { 41 | hash = Hash::hash(content.data(), content.size(), 0); 42 | } else { 43 | unsigned count = getRawEntryCount(); 44 | vector hashes(count + 1); 45 | hashes[0] = descriptor; 46 | for (unsigned index = 0; index != count; ++index) { 47 | auto e = static_cast(this)->entries[index]; 48 | hashes[index + 1] = e ? e->getHash() : 0; 49 | } 50 | hash = Hash::hash(hashes.data(), (count + 1) * sizeof(uint64_t), 0); 51 | } 52 | } 53 | //--------------------------------------------------------------------------- 54 | string ASTBase::getTokenValue(unsigned encoding) const 55 | // Get the value of a token 56 | { 57 | return SaneQLLexer::TokenInfo{content, static_cast(encoding)}.asString(); 58 | } 59 | //--------------------------------------------------------------------------- 60 | bool ASTBase::isEquivalent(const ASTBase* other) const 61 | // Check for equivalence 62 | { 63 | // Trivial checks first 64 | if (this == other) return true; 65 | if ((!other) || (descriptor != other->descriptor) || (hash != other->hash)) return false; 66 | 67 | // A token? 68 | if (!getRawType()) 69 | return content == other->content; 70 | 71 | // Check the whole tree 72 | vector> todo; 73 | todo.emplace_back(this, other); 74 | while (!todo.empty()) { 75 | auto current = todo.back(); 76 | todo.pop_back(); 77 | 78 | // Trivial checks 79 | if (current.first == current.second) continue; 80 | if ((!current.first) || (!current.second) || (current.first->descriptor != current.second->descriptor) || (current.first->hash != current.second->hash)) return false; 81 | 82 | // A token? 83 | if (!current.first->getRawType()) { 84 | if (current.first->content != current.second->content) return false; 85 | continue; 86 | } 87 | 88 | // Recurse 89 | unsigned entryCount = current.first->descriptor & ((1u << entryCountBits) - 1); 90 | auto a = static_cast(current.first), b = static_cast(current.second); 91 | for (unsigned index = 0; index != entryCount; ++index) 92 | todo.emplace_back(a->entries[index], b->entries[index]); 93 | } 94 | return true; 95 | } 96 | //--------------------------------------------------------------------------- 97 | ASTContainer::ASTContainer() 98 | // Constructor 99 | { 100 | } 101 | //--------------------------------------------------------------------------- 102 | ASTContainer::~ASTContainer() 103 | // Destructor 104 | { 105 | while (chunks) { 106 | auto next = chunks->next; 107 | delete[] reinterpret_cast(chunks); 108 | chunks = next; 109 | } 110 | } 111 | //--------------------------------------------------------------------------- 112 | void ASTContainer::allocateNewChunk(size_t size) 113 | // Allocate a new chunk 114 | { 115 | unsigned newSize = sizeof(Chunk) + size; 116 | auto* newChunk = reinterpret_cast(new char[newSize]); 117 | lastSize = newSize; 118 | totalSize += newSize; 119 | newChunk->next = chunks; 120 | freeBegin = newChunk->data; 121 | freeEnd = freeBegin + size; 122 | chunks = newChunk; 123 | } 124 | //--------------------------------------------------------------------------- 125 | void* ASTContainer::allocateRaw(unsigned size) 126 | // Allocate a node 127 | { 128 | // Do we need more space? 129 | if ((freeEnd - freeBegin) < size) [[unlikely]] { 130 | auto targetSize = max(size, max(lastSize, totalSize / 4)); 131 | auto lowerBound = max(size, 1024); 132 | auto upperBound = max(size, (10u << 20) - sizeof(Chunk)); 133 | allocateNewChunk(clamp(targetSize, lowerBound, upperBound)); 134 | } 135 | 136 | auto result = freeBegin; 137 | freeBegin += size; 138 | return result; 139 | } 140 | //--------------------------------------------------------------------------- 141 | ASTBase* ASTContainer::allocateAST(string_view view, unsigned descriptor, initializer_list entries) 142 | // Allocate an AST node 143 | { 144 | unsigned size = sizeof(NodeTemplate) + entries.size() * sizeof(ASTBase*); 145 | auto result = new (allocateRaw(size)) NodeTemplate(view, descriptor); 146 | unsigned slot = 0; 147 | for (auto i : entries) 148 | result->entries[slot++] = i; 149 | result->computeHash(); 150 | return result; 151 | } 152 | //--------------------------------------------------------------------------- 153 | ASTBase* ASTContainer::allocateASTImpl(string_view view, unsigned descriptor, ...) 154 | // Allocate an AST node. C-style interface to reduce compile time 155 | { 156 | va_list ap; 157 | va_start(ap, descriptor); 158 | unsigned count = descriptor & ((1u << ASTBase::entryCountBits) - 1); 159 | unsigned size = sizeof(NodeTemplate) + count * sizeof(ASTBase*); 160 | auto result = new (allocateRaw(size)) NodeTemplate(view, descriptor); 161 | for (unsigned slot = 0; slot != count; ++slot) 162 | result->entries[slot] = va_arg(ap, ASTBase*); 163 | va_end(ap); 164 | result->computeHash(); 165 | return result; 166 | } 167 | //--------------------------------------------------------------------------- 168 | ASTBase* ASTContainer::createList(string_view view, unsigned descriptor, ASTBase* head) 169 | // Create a new list 170 | { 171 | auto res = new (allocateRaw(sizeof(ListTemplate))) ListTemplate(view, descriptor, head, nullptr, nullptr); 172 | res->computeHash(); 173 | return res; 174 | } 175 | //--------------------------------------------------------------------------- 176 | ASTBase* ASTContainer::appendList(string_view view, unsigned descriptor, ASTBase* head, ASTBase* tail) 177 | // Append a list result 178 | { 179 | // Append to an empty list? 180 | if (!head) { 181 | auto res = new (allocateRaw(sizeof(ListTemplate))) ListTemplate(view, descriptor, tail, nullptr, nullptr); 182 | res->computeHash(); 183 | return res; 184 | } 185 | 186 | // No, examine the list 187 | auto list = static_cast(head); 188 | auto existingTail = list; 189 | uint64_t hash = list->getHash(); 190 | if (!existingTail->end) { 191 | while (existingTail->tail) { 192 | existingTail = static_cast(existingTail->tail); 193 | uint64_t hashPart = existingTail->head ? existingTail->head->getHash() : 0; 194 | hash = Hash::hash(&hashPart, sizeof(uint64_t), hash); 195 | } 196 | } else { 197 | existingTail = static_cast(existingTail->end); 198 | } 199 | 200 | // And append 201 | auto append = new (allocateRaw(sizeof(ListTemplate))) ListTemplate(view, descriptor, tail, nullptr, nullptr); 202 | existingTail->tail = append; 203 | uint64_t hashPart = tail ? tail->getHash() : 0; 204 | list->hash = Hash::hash(&hashPart, sizeof(uint64_t), hash); 205 | list->end = append; 206 | return list; 207 | } 208 | //--------------------------------------------------------------------------- 209 | } 210 | //--------------------------------------------------------------------------- 211 | -------------------------------------------------------------------------------- /parser/ASTBase.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_ASTBase 2 | #define H_saneql_ASTBase 3 | //--------------------------------------------------------------------------- 4 | #include 5 | #include 6 | #include 7 | #include 8 | //--------------------------------------------------------------------------- 9 | // SaneQL 10 | // (c) 2023 Thomas Neumann 11 | // SPDX-License-Identifier: BSD-3-Clause 12 | //--------------------------------------------------------------------------- 13 | namespace saneql { 14 | //--------------------------------------------------------------------------- 15 | class ASTContainer; 16 | //--------------------------------------------------------------------------- 17 | /// Base class for all ast nodes 18 | class ASTBase { 19 | private: 20 | /// The number of bits to use to represent the entry count 21 | static constexpr unsigned entryCountBits = 10; 22 | /// The number of bits to use for the sub type 23 | static constexpr unsigned subTypeBits = 10; 24 | 25 | /// The content 26 | std::string_view content; 27 | /// The descriptor 28 | uintptr_t descriptor; 29 | /// The hash value 30 | uint64_t hash; 31 | 32 | protected: 33 | /// Get the value of a token 34 | std::string getTokenValue(unsigned encoding) const; 35 | 36 | friend class ASTContainer; 37 | 38 | public: 39 | /// Constructor 40 | ASTBase(std::string_view content, uintptr_t descriptor) : content(content), descriptor(descriptor), hash(0) {} 41 | 42 | /// Build an descriptor 43 | template 44 | static constexpr uintptr_t buildDescriptor(A type, B subType, uintptr_t entryCount) { 45 | return entryCount | (static_cast(subType) << entryCountBits) | (static_cast(type) << (entryCountBits + subTypeBits)); 46 | } 47 | 48 | /// Compute the hash value. Is usually called automatically in the derived constructors, but is needed for manual node constructions 49 | void computeHash(); 50 | 51 | /// Get the raw type 52 | unsigned getRawType() const { return descriptor >> (entryCountBits + subTypeBits); } 53 | /// Get the raw subtype 54 | unsigned getRawSubType() const { return (descriptor >> entryCountBits) & ((1u << subTypeBits) - 1); } 55 | /// Get the raw entry count 56 | unsigned getRawEntryCount() const { return descriptor & ((1u << entryCountBits) - 1); } 57 | /// Get the hash value 58 | uint64_t getHash() const { return hash; } 59 | 60 | /// Get the content 61 | std::string_view getContent() const { return content; } 62 | /// Get the content 63 | const char* begin() const { return content.begin(); } 64 | /// Get the content 65 | const char* end() const { return content.end(); } 66 | 67 | /// Check for equivalence 68 | bool isEquivalent(const ASTBase* other) const; 69 | }; 70 | //--------------------------------------------------------------------------- 71 | /// A container for AST nodes 72 | class ASTContainer { 73 | private: 74 | /// A memory chunk 75 | struct Chunk { 76 | /// The next chunk 77 | Chunk* next; 78 | /// The data 79 | char data[]; 80 | }; 81 | /// The chunks 82 | Chunk* chunks = nullptr; 83 | /// The free memory 84 | char *freeBegin = nullptr, *freeEnd = nullptr; 85 | /// The result 86 | ASTBase* result = nullptr; 87 | /// The last allocated size 88 | size_t lastSize = 0; 89 | /// The total allocated size 90 | size_t totalSize = 0; 91 | 92 | ASTContainer(const ASTContainer&) = delete; 93 | void operator=(const ASTContainer&) = delete; 94 | 95 | /// Allocate a new chunk 96 | void allocateNewChunk(size_t size); 97 | 98 | public: 99 | /// Constructor 100 | ASTContainer(); 101 | /// Destructor 102 | ~ASTContainer(); 103 | 104 | /// Set the result 105 | void setResult(ASTBase* ast) { result = ast; } 106 | /// Get the result 107 | template 108 | T* getResult() const { return static_cast(result); } 109 | 110 | /// Allocate memory 111 | void* allocateRaw(unsigned size); 112 | /// Allocate an AST node 113 | ASTBase* allocateAST(std::string_view view, unsigned descriptor, std::initializer_list entries); 114 | /// Allocate an AST node. C-style variadic function to reduce compile time 115 | ASTBase* allocateASTImpl(std::string_view view, unsigned descriptor, ...); 116 | /// Createa new list 117 | ASTBase* createList(std::string_view view, unsigned descriptor, ASTBase* head); 118 | /// Append a list result 119 | ASTBase* appendList(std::string_view view, unsigned descriptor, ASTBase* head, ASTBase* tail); 120 | }; 121 | //--------------------------------------------------------------------------- 122 | } 123 | //--------------------------------------------------------------------------- 124 | #endif 125 | -------------------------------------------------------------------------------- /parser/Keywords.hpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // SaneQL 3 | // (c) 2023 Thomas Neumann 4 | // SPDX-License-Identifier: BSD-3-Clause 5 | //--------------------------------------------------------------------------- 6 | KEYWORD("let", LET, RESERVED_KEYWORD) 7 | KEYWORD("defun", DEFUN, RESERVED_KEYWORD) 8 | KEYWORD("null", NULL_P, RESERVED_KEYWORD) 9 | KEYWORD("true", TRUE_P, RESERVED_KEYWORD) 10 | KEYWORD("false", FALSE_P, RESERVED_KEYWORD) 11 | KEYWORD("table", TABLE, RESERVED_KEYWORD) 12 | KEYWORD("&&", AND, RESERVED_KEYWORD) 13 | KEYWORD("||", OR, RESERVED_KEYWORD) 14 | //--------------------------------------------------------------------------- 15 | -------------------------------------------------------------------------------- /parser/SaneQLLexer.cpp: -------------------------------------------------------------------------------- 1 | #include "parser/SaneQLLexer.hpp" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | //--------------------------------------------------------------------------- 10 | // SaneQL 11 | // (c) 2023 Thomas Neumann 12 | // SPDX-License-Identifier: BSD-3-Clause 13 | //--------------------------------------------------------------------------- 14 | using namespace std; 15 | //--------------------------------------------------------------------------- 16 | namespace saneql { 17 | //--------------------------------------------------------------------------- 18 | namespace { 19 | //--------------------------------------------------------------------------- 20 | /// A keyword category 21 | enum class KeywordCategory : unsigned { RESERVED_KEYWORD, 22 | UNRESERVED_KEYWORD, 23 | TYPE_FUNC_NAME_KEYWORD, 24 | COL_NAME_KEYWORD, 25 | PSEUDO_KEYWORD }; 26 | //--------------------------------------------------------------------------- 27 | /// A keyword description 28 | struct KeywordInfo { 29 | SaneQLLexer::Token token; 30 | KeywordCategory category; 31 | }; 32 | //--------------------------------------------------------------------------- 33 | struct HashCaseInsensitive { 34 | uint64_t operator()(string_view v) const { 35 | uint64_t result = 0xcbf29ce484222325; 36 | for (unsigned c : v) { 37 | result ^= ((c >= 'A') && (c <= 'Z')) ? (c + 'a' - 'A') : c; 38 | result *= 0x100000001b3; 39 | } 40 | return result; 41 | } 42 | }; 43 | //--------------------------------------------------------------------------- 44 | static auto getKeywordsTable() 45 | // Build the entries for the keywords loookup table 46 | { 47 | using enum KeywordCategory; 48 | constexpr unsigned numKeywords = 49 | #define KEYWORD(A, B, C) +1 50 | #include "parser/Keywords.hpp" 51 | #undef KEYWORD 52 | ; 53 | 54 | struct RawKeyword { 55 | string_view keyword; 56 | SaneQLLexer::Token token; 57 | KeywordCategory category; 58 | }; 59 | static RawKeyword rawKeywords[] = { 60 | #define KEYWORD(A, B, C) {A##sv, SaneQLLexer::Token::B, C}, 61 | #include "parser/Keywords.hpp" 62 | #undef KEYWORD 63 | }; 64 | 65 | unordered_map res; 66 | for (unsigned reader = 0; reader != numKeywords; ++reader) { 67 | if (rawKeywords[reader].category == PSEUDO_KEYWORD) 68 | continue; 69 | res[rawKeywords[reader].keyword] = {rawKeywords[reader].token, rawKeywords[reader].category}; 70 | } 71 | return res; 72 | } 73 | //--------------------------------------------------------------------------- 74 | static const auto keywordsHashTable = getKeywordsTable(); 75 | //--------------------------------------------------------------------------- 76 | static bool isWS(unsigned c) 77 | // Recognize whitespace characters (including Unicode whitespace) 78 | { 79 | switch (c) { 80 | case 0x09: 81 | case 0x0A: 82 | case 0x0B: 83 | case 0x0C: 84 | case 0x0D: 85 | case 0x20: 86 | case 0x00A0: 87 | case 0x2000: 88 | case 0x2001: 89 | case 0x2002: 90 | case 0x2003: 91 | case 0x2004: 92 | case 0x2005: 93 | case 0x2006: 94 | case 0x2007: 95 | case 0x2008: 96 | case 0x2009: 97 | case 0x200A: 98 | case 0x200B: 99 | case 0x200C: 100 | case 0x200D: 101 | case 0x200E: 102 | case 0x200F: 103 | case 0x3000: 104 | case 0x2028: 105 | case 0x2029: 106 | case 0xFEFF: return true; 107 | } 108 | return false; 109 | } 110 | //--------------------------------------------------------------------------- 111 | static const char* skipWS(const char* current, const char* end, bool* nl = nullptr) 112 | // Skip whitespaces 113 | { 114 | if (nl) *nl = false; 115 | while (current < end) { 116 | unsigned c = *(current++); 117 | switch (c) { 118 | case 0x09: 119 | case 0x0B: 120 | case 0x0C: 121 | case 0x20: continue; 122 | case 0x0A: 123 | case 0x0D: 124 | if (nl) *nl = true; 125 | continue; 126 | case '-': 127 | if (!((current < end) && ((*current) == '-'))) 128 | return current - 1; 129 | ++current; 130 | while (true) { 131 | if (current >= end) 132 | return end; 133 | char c = *(current++); 134 | if ((c == 0x0A) || (c == 0x0D)) 135 | break; 136 | } 137 | if (nl) *nl = true; // (SQL Standard 5.2, Syntax Rule 10, comments are supposed to be treated like newlines) 138 | continue; 139 | case '/': 140 | if (!((current < end) && ((*current) == '*'))) 141 | return current - 1; 142 | ++current; 143 | while (true) { 144 | if (current >= end) 145 | return end; 146 | if (*(current++) == '/') 147 | if (current[-2] == '*') 148 | break; 149 | } 150 | if (nl) *nl = true; // (SQL Standard 5.2, Syntax Rule 10, comments are supposed to be treated like newlines) 151 | continue; 152 | default: 153 | // TODO accept Unicode whitespaces 154 | return current - 1; 155 | } 156 | } 157 | return end; 158 | } 159 | //--------------------------------------------------------------------------- 160 | static const char* findNextLiteralFragment(const char* current, const char* end) 161 | // SQL allows literals to continue if separated by newline. Comments are supposed to be treated as newlines 162 | { 163 | bool nl = false; 164 | current = skipWS(current, end, &nl); 165 | 166 | // we can only continue if we had a newline in between (SQL Standard 5.3, Syntax Rule 7) 167 | if (nl && (current < end) && (current[0] == '\'')) 168 | return current + 1; 169 | else 170 | return nullptr; 171 | } 172 | //--------------------------------------------------------------------------- 173 | } 174 | //--------------------------------------------------------------------------- 175 | SaneQLLexer::SaneQLLexer(string_view input, Token forcedToken) 176 | : input(input), current(input.begin()), forcedToken(forcedToken) 177 | // Constructor 178 | { 179 | } 180 | //--------------------------------------------------------------------------- 181 | SaneQLLexer::Token SaneQLLexer::next(TokenInfo& info) 182 | // Get the next token 183 | { 184 | // Handle forced tokens 185 | if (forcedToken != Token::Eof) { 186 | info.content = ""sv; 187 | info.encoding = TokenInfo::Encoding::Raw; 188 | auto result = forcedToken; 189 | forcedToken = Token::Eof; 190 | return result; 191 | } 192 | 193 | auto result = nextImpl(info); 194 | return result; 195 | } 196 | //--------------------------------------------------------------------------- 197 | unsigned SaneQLLexer::nextChar() 198 | // Get the next character 199 | { 200 | // Check of of input 201 | if (current == input.end()) 202 | return 0; 203 | 204 | // Handle ASCII 205 | unsigned c = (*current) & 0xFF; 206 | if (!(c & 0x80)) { 207 | current += !!c; 208 | return c; 209 | } 210 | 211 | // TODO unicode support 212 | current += !!c; 213 | return c; 214 | } 215 | //--------------------------------------------------------------------------- 216 | unsigned SaneQLLexer::peekChar() 217 | // Retrieve the next character without consuming it. Does not interpret Unicode characters 218 | { 219 | return (current != input.end()) ? (*current) : 0; 220 | } 221 | //--------------------------------------------------------------------------- 222 | SaneQLLexer::Token SaneQLLexer::nextImpl(TokenInfo& info) 223 | // Get the next token 224 | { 225 | while (true) { 226 | // Many tokens are single-character anyway, prepare for handling these directly 227 | auto old = current; 228 | unsigned c = nextChar(); 229 | info.content = string_view(old, current - old); 230 | info.encoding = TokenInfo::Encoding::Raw; 231 | 232 | // Handle EOF 233 | if (!c) 234 | return (current == input.end()) ? Token::Eof : Token::Error; 235 | 236 | // Handle Unicode characters 237 | if (__builtin_expect(c >= 0x80, 0)) { 238 | // Recognize Unicode whitespace characters 239 | if (isWS(c)) 240 | continue; 241 | return lexIdentifier(info); 242 | } 243 | 244 | // Handle ASCII characters 245 | switch (c) { 246 | case 0x09: 247 | case 0x0A: 248 | case 0x0B: 249 | case 0x0C: 250 | case 0x0D: 251 | case 0x20: continue; 252 | case '!': 253 | case '#': return lexOperator(info); 254 | case '$': 255 | c = peekChar(); 256 | if ((c >= '0') && (c <= '9')) 257 | return lexParameter(info); 258 | [[fallthrough]]; 259 | case '%': 260 | case '&': return lexOperator(info); 261 | case '(': return Token::LParen; 262 | case ')': return Token::RParen; 263 | case '*': 264 | case '+': return lexOperator(info); 265 | case ',': return Token::Comma; 266 | case '-': 267 | // -- start a single-line comment 268 | if (peekChar() == '-') { 269 | while (current != input.end()) { 270 | c = *(current++); 271 | if ((c == '\n') || (c == '\r')) 272 | break; 273 | } 274 | continue; 275 | } 276 | return lexOperator(info); 277 | case '.': 278 | // Recognize .. as DotDot and .[0-9] as number 279 | c = peekChar(); 280 | if (c == '.') { 281 | ++current; 282 | info.content = string_view(old, current - old); 283 | return Token::DotDot; 284 | } 285 | if ((c >= '0') && (c <= '9')) 286 | return lexNumber(info); 287 | return Token::Dot; 288 | case '/': 289 | // /* starts a multi-line comment 290 | if (peekChar() == '*') { 291 | ++current; 292 | unsigned c2 = 0; 293 | while (true) { 294 | c = nextChar(); 295 | if (!c) { 296 | ++current; 297 | info.content = string_view(old, current - old); 298 | return Token::UnterminatedMultilineComment; 299 | } 300 | if ((c2 == '*') && (c == '/')) 301 | break; 302 | c2 = c; 303 | } 304 | continue; 305 | } 306 | return lexOperator(info); 307 | case '0': 308 | case '1': 309 | case '2': 310 | case '3': 311 | case '4': 312 | case '5': 313 | case '6': 314 | case '7': 315 | case '8': 316 | case '9': return lexNumber(info); 317 | case ':': 318 | c = peekChar(); 319 | if ((c == ':') || (c == '=')) { 320 | ++current; 321 | info.content = string_view(old, current - old); 322 | return (c == ':') ? Token::ColonColon : Token::ColonEquals; 323 | } 324 | return Token::Colon; 325 | case ';': return Token::SemiColon; 326 | case '<': 327 | case '=': 328 | case '>': 329 | case '?': 330 | case '@': return lexOperator(info); 331 | case '[': return Token::LSquare; 332 | case '\"': return lexIdentifierLiteral(info); 333 | case '\'': return lexStringLiteral(info); 334 | case '\\': return lexClientCommand(info); 335 | case ']': return Token::RSquare; 336 | case '^': 337 | case '`': return lexOperator(info); 338 | case '{': return Token::LCurly; 339 | case '|': return lexOperator(info); 340 | case '}': return Token::RCurly; 341 | case '~': return lexOperator(info); 342 | default: 343 | if (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || (c == '_')) 344 | return lexIdentifier(info); 345 | return Token::Error; 346 | } 347 | } 348 | } 349 | //--------------------------------------------------------------------------- 350 | SaneQLLexer::Token SaneQLLexer::lexClientCommand(TokenInfo& info) 351 | // Lex a client command 352 | { 353 | unsigned c = *(current++); 354 | info.content = {info.content.begin(), 2}; 355 | if (!(((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c == '?') || (c == '_'))) 356 | return Token::Error; 357 | 358 | while (true) { 359 | c = peekChar(); 360 | if (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c == '?') || (c == '_')) { 361 | ++current; 362 | continue; 363 | } 364 | info.content = string_view(info.content.begin(), current - info.content.begin()); 365 | return Token::ClientCommand; 366 | } 367 | } 368 | //--------------------------------------------------------------------------- 369 | SaneQLLexer::Token SaneQLLexer::lexIdentifier(TokenInfo& info) 370 | // Lex an identifier 371 | { 372 | // Consume the identifier body 373 | while (true) { 374 | // Accept literal characters 375 | unsigned c = peekChar(); 376 | if (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || ((c >= '0') && (c <= '9')) || (c == '_')) { 377 | ++current; 378 | continue; 379 | } 380 | 381 | // Every other non-Unicode character terminates the identifier 382 | if (!(c & 0x80)) 383 | break; 384 | 385 | // Accept non-whitespace Unicode characters 386 | auto old = current; 387 | c = nextChar(); 388 | if (isWS(c)) { 389 | current = old; 390 | break; 391 | } 392 | } 393 | info.content = string_view(info.content.begin(), current - info.content.begin()); 394 | info.encoding = TokenInfo::Encoding::Identifier; 395 | 396 | // Our identifier could be a keyword, check that 397 | auto keywordInfo = keywordsHashTable.find(info.content); 398 | if (keywordInfo != keywordsHashTable.end()) 399 | return keywordInfo->second.token; 400 | return Token::Identifier; 401 | } 402 | //--------------------------------------------------------------------------- 403 | SaneQLLexer::Token SaneQLLexer::lexIdentifierLiteral(TokenInfo& info) 404 | // Lex an identifier literal 405 | { 406 | for (auto limit = input.end(); current < limit;) { 407 | if ((*(current++)) == '\"') { 408 | // Recognize escaped quotes 409 | if ((current < limit) && ((*current) == '\"')) { 410 | ++current; 411 | continue; 412 | } 413 | 414 | info.content = string_view(info.content.begin(), current - info.content.begin()); 415 | info.encoding = TokenInfo::Encoding::IdentifierLiteral; 416 | return Token::Identifier; 417 | } 418 | } 419 | return Token::UnterminatedLiteral; 420 | } 421 | //--------------------------------------------------------------------------- 422 | SaneQLLexer::Token SaneQLLexer::lexNumber(TokenInfo& info) 423 | // Lex a number 424 | { 425 | auto begin = info.content.begin(); 426 | auto limit = input.end(); 427 | 428 | // The integer part 429 | if (begin[0] != '.') { 430 | while (true) { 431 | if (current == limit) { 432 | info.content = string_view(begin, current - begin); 433 | return Token::Integer; 434 | } 435 | char c = *(current++); 436 | if ((c >= '0') && (c <= '9')) 437 | continue; 438 | if ((c == '.') || (c == 'E') || (c == 'e')) 439 | break; 440 | --current; 441 | info.content = string_view(begin, current - begin); 442 | return Token::Integer; 443 | } 444 | } 445 | 446 | // The fractional part 447 | if (current[-1] == '.') { 448 | while (true) { 449 | if (current == limit) { 450 | info.content = string_view(begin, current - begin); 451 | return Token::Float; 452 | } 453 | char c = *(current++); 454 | if ((c >= '0') && (c <= '9')) 455 | continue; 456 | if ((c == 'E') || (c == 'e')) 457 | break; 458 | --current; 459 | info.content = string_view(begin, current - begin); 460 | return Token::Float; 461 | } 462 | } 463 | 464 | // The exponent part 465 | auto beginExponent = current - 1; 466 | unsigned c = peekChar(); 467 | if ((c == '+') || (c == '-')) { 468 | ++current; 469 | c = peekChar(); 470 | } 471 | if (!((c >= '0') && (c <= '9'))) { 472 | current = beginExponent; 473 | info.content = string_view(begin, current - begin); 474 | return Token::Float; 475 | } 476 | while (true) { 477 | if (current == limit) { 478 | info.content = string_view(begin, current - begin); 479 | return Token::Float; 480 | } 481 | char c = *(current++); 482 | if ((c >= '0') && (c <= '9')) 483 | continue; 484 | --current; 485 | info.content = string_view(begin, current - begin); 486 | return Token::Float; 487 | } 488 | } 489 | //--------------------------------------------------------------------------- 490 | SaneQLLexer::Token SaneQLLexer::lexOperator(TokenInfo& info) 491 | // Lex an operator 492 | { 493 | // Find the end of the operator string 494 | bool specialChars = false; 495 | for (auto limit = input.end(); current < limit; ++current) { 496 | switch (*current) { 497 | case '*': 498 | case '+': 499 | case '<': 500 | case '=': 501 | case '>': continue; 502 | case '!': 503 | case '#': 504 | case '%': 505 | case '&': 506 | case '?': 507 | case '@': 508 | case '^': 509 | case '`': 510 | case '|': 511 | case '~': specialChars = true; continue; 512 | case '-': 513 | if (((current + 1) < limit) && (current[1] == '-')) break; 514 | continue; // Stop for "--" comments 515 | case '/': 516 | if (((current + 1) < limit) && (current[1] == '*')) break; 517 | continue; // Stop for "/*" comments 518 | } 519 | break; 520 | } 521 | 522 | // Drop trailing + and - for regular operators 523 | auto begin = info.content.begin(); 524 | if (!specialChars) { 525 | while (true) { 526 | if (((current - begin) <= 1) || ((current[-1] != '+') && (current[-1] != '-'))) 527 | break; 528 | --current; 529 | } 530 | } 531 | info.content = string_view(begin, current - begin); 532 | 533 | // Recognize some named operators 534 | if ((current - begin) == 1) { 535 | switch (begin[0]) { 536 | case '%': return Token::Percent; 537 | case '*': return Token::Asterisk; 538 | case '+': return Token::Plus; 539 | case '-': return Token::Minus; 540 | case '/': return Token::Solidus; 541 | case '<': return Token::Less; 542 | case '=': return Token::Equals; 543 | case '>': return Token::Greater; 544 | case '^': return Token::Circumflex; 545 | case '!': return Token::Exclamation; 546 | } 547 | } else if ((current - begin) == 2) { 548 | char c = begin[0]; 549 | if (c == '<') { 550 | char c = begin[1]; 551 | if (c == '>') 552 | return Token::NotEquals; 553 | if (c == '=') 554 | return Token::LessEquals; 555 | } else if (c == '>') { 556 | char c = begin[1]; 557 | if (c == '=') 558 | return Token::GreaterEquals; 559 | } else if (c == '!') { 560 | char c = begin[1]; 561 | if (c == '=') 562 | return Token::NotEquals; 563 | } else if (c == '=') { 564 | char c = begin[1]; 565 | if (c == '>') 566 | return Token::EqualsGreater; 567 | } else if (c == '&') { 568 | char c = begin[1]; 569 | if (c == '&') 570 | return Token::AND; 571 | } else if (c == '|') { 572 | char c = begin[1]; 573 | if (c == '|') 574 | return Token::OR; 575 | } 576 | } 577 | return Token::Operator; 578 | } 579 | //--------------------------------------------------------------------------- 580 | SaneQLLexer::Token SaneQLLexer::lexParameter(TokenInfo& info) 581 | // Lex a parameter 582 | { 583 | while (true) { 584 | unsigned c = peekChar(); 585 | if ((c >= '0') && (c <= '9')) 586 | ++current; 587 | else 588 | break; 589 | } 590 | 591 | info.content = string_view(info.content.begin(), current - info.content.begin()); 592 | info.encoding = TokenInfo::Encoding::Parameter; 593 | return Token::Parameter; 594 | } 595 | //--------------------------------------------------------------------------- 596 | SaneQLLexer::Token SaneQLLexer::lexStringLiteral(TokenInfo& info) 597 | // Lex a string literal 598 | { 599 | for (auto limit = input.end(); current < limit;) { 600 | char c = *(current++); 601 | if (c == '\'') { 602 | // Escaped quote? 603 | if ((current < limit) && ((*current) == '\'')) { 604 | ++current; 605 | continue; 606 | } 607 | 608 | // End of literal? 609 | auto next = findNextLiteralFragment(current, limit); 610 | if (!next) { 611 | info.content = string_view(info.content.begin(), current - info.content.begin()); 612 | info.encoding = TokenInfo::Encoding::StringLiteral; 613 | return Token::String; 614 | } 615 | current = next; 616 | } 617 | } 618 | return Token::UnterminatedLiteral; 619 | } 620 | //--------------------------------------------------------------------------- 621 | string SaneQLLexer::TokenInfo::asString() const 622 | // Get the content converted into a regular string 623 | { 624 | switch (encoding) { 625 | case Encoding::Raw: 626 | return string(content.begin(), content.end()); 627 | case Encoding::Identifier: { 628 | string result; 629 | result.reserve(content.size()); 630 | for (char c : content) { 631 | if ((c >= 'A') && (c <= 'Z')) 632 | c += 'a' - 'A'; 633 | result += c; 634 | } 635 | return result; 636 | } 637 | case Encoding::IdentifierLiteral: { 638 | string result; 639 | for (auto iter = content.begin() + 1, limit = content.end() - 1; iter < limit; ++iter) { 640 | char c = *iter; 641 | // Handle quotes within the literal 642 | if (c == '\"') 643 | ++iter; // skip the double quote 644 | result += c; 645 | } 646 | return result; 647 | } 648 | case Encoding::Parameter: 649 | return string(content.begin() + 1, content.end()); 650 | case Encoding::StringLiteral: { 651 | string result; 652 | for (auto iter = content.begin() + 1, limit = content.end() - 1; iter < limit; ++iter) { 653 | assert(iter); 654 | char c = *iter; 655 | // Handle quotes within the literal 656 | if (c == '\'') { 657 | if (iter[1] == '\'') { 658 | ++iter; 659 | } else { 660 | // Must have another fragment 661 | iter = findNextLiteralFragment(iter + 1, limit) - 1; 662 | continue; 663 | } 664 | } 665 | result += c; 666 | } 667 | return result; 668 | } 669 | } 670 | __builtin_unreachable(); 671 | } 672 | //--------------------------------------------------------------------------- 673 | bool SaneQLLexer::isKeyword(string_view symbol) 674 | // Check if a symbol is a keyword 675 | { 676 | return keywordsHashTable.find(symbol) != keywordsHashTable.end(); 677 | } 678 | //--------------------------------------------------------------------------- 679 | static void appendSymbol(string& message, const char* symbol) 680 | // Append a symbol, stripping quotes if needed 681 | { 682 | unsigned len = strlen(symbol); 683 | if (len && (symbol[len - 1] == '"') && (symbol[0] != '"')) --len; 684 | message.append(symbol, len); 685 | } 686 | //--------------------------------------------------------------------------- 687 | void SaneQLLexer::reportSyntaxError(unsigned column, const char* gotSymbol, const std::vector& expectedSymbols) 688 | // Report a syntax error in the current input 689 | { 690 | // Bounds check the column 691 | if (column > 0) --column; 692 | if (column > input.size()) column = input.size(); 693 | 694 | // Provide a query extract 695 | constexpr unsigned contextSize = 25; 696 | string message = "syntax error near '"; 697 | if (column > contextSize) { 698 | message.append("...", 3); 699 | message.append(input.begin() + column - contextSize + 3, contextSize - 3); 700 | } else { 701 | message.append(input.begin(), column); 702 | } 703 | if (column) 704 | message.append(" ", 1); 705 | message.append("*error*"); 706 | if (column < input.size()) 707 | message.append(" ", 1); 708 | if ((input.size() - column) > contextSize) { 709 | message.append(input.begin() + column, contextSize - 3); 710 | message.append("...", 3); 711 | } else { 712 | message.append(input.begin() + column, input.size() - column); 713 | } 714 | message.append("'", 1); 715 | 716 | // Show the current symbol 717 | if (gotSymbol) { 718 | message.append(", got ", 6); 719 | appendSymbol(message, gotSymbol); 720 | } 721 | 722 | // Make suggestions 723 | if (!expectedSymbols.empty()) { 724 | message.append(", expected ", 11); 725 | bool first = true; 726 | for (auto s : expectedSymbols) { 727 | if (first) 728 | first = false; 729 | else 730 | message.append(" or ", 4); 731 | appendSymbol(message, s); 732 | } 733 | } 734 | message.append("."); 735 | 736 | // And throw 737 | throw runtime_error(move(message)); 738 | } 739 | //--------------------------------------------------------------------------- 740 | } 741 | //--------------------------------------------------------------------------- 742 | -------------------------------------------------------------------------------- /parser/SaneQLLexer.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_SaneQLLexer 2 | #define H_saneql_SaneQLLexer 3 | //--------------------------------------------------------------------------- 4 | #include 5 | #include 6 | //--------------------------------------------------------------------------- 7 | // SaneQL 8 | // (c) 2023 Thomas Neumann 9 | // SPDX-License-Identifier: BSD-3-Clause 10 | //--------------------------------------------------------------------------- 11 | namespace saneql { 12 | //--------------------------------------------------------------------------- 13 | /// A lexer for SaneQL statements 14 | class SaneQLLexer { 15 | public: 16 | /// A token 17 | enum class Token : unsigned { 18 | Eof, 19 | Error, 20 | Asterisk, 21 | Circumflex, 22 | ClientCommand, 23 | Colon, 24 | ColonColon, 25 | ColonEquals, 26 | Comma, 27 | Dot, 28 | DotDot, 29 | Equals, 30 | EqualsGreater, 31 | Exclamation, 32 | Float, 33 | Greater, 34 | GreaterEquals, 35 | Identifier, 36 | Integer, 37 | LCurly, 38 | LParen, 39 | LSquare, 40 | Less, 41 | LessEquals, 42 | Minus, 43 | NotEquals, 44 | Operator, 45 | Percent, 46 | Parameter, 47 | Plus, 48 | RCurly, 49 | RParen, 50 | RSquare, 51 | SemiColon, 52 | Solidus, 53 | String, 54 | UnterminatedLiteral, 55 | UnterminatedMultilineComment, 56 | #define KEYWORD(A, B, C) B, 57 | #include "parser/Keywords.hpp" 58 | #undef KEYWORD 59 | }; 60 | /// The content of a token 61 | struct TokenInfo { 62 | /// Possible token encodings 63 | enum class Encoding : unsigned { 64 | Raw, 65 | Identifier, 66 | IdentifierLiteral, 67 | Parameter, 68 | StringLiteral, 69 | }; 70 | 71 | /// The content 72 | std::string_view content; 73 | /// The encoding 74 | Encoding encoding; 75 | 76 | /// Get the content converted into a regular string 77 | std::string asString() const; 78 | }; 79 | 80 | private: 81 | /// Get the next character 82 | unsigned nextChar(); 83 | /// Retrieve the next character without consuming it. Does not interpret unicode characters 84 | inline unsigned peekChar(); 85 | 86 | /// Get the next token 87 | Token nextImpl(TokenInfo& info); 88 | 89 | /// Lex a client command 90 | Token lexClientCommand(TokenInfo& info); 91 | /// Lex an identifier 92 | Token lexIdentifier(TokenInfo& info); 93 | /// Lex an identifier literal 94 | Token lexIdentifierLiteral(TokenInfo& info); 95 | /// Lex a number 96 | Token lexNumber(TokenInfo& info); 97 | /// Lex an operator 98 | Token lexOperator(TokenInfo& info); 99 | /// Lex a parameter 100 | Token lexParameter(TokenInfo& info); 101 | /// Lex a string literal 102 | Token lexStringLiteral(TokenInfo& info); 103 | 104 | private: 105 | /// The input 106 | std::string_view input; 107 | /// The current position 108 | std::string_view::iterator current; 109 | /// A forced token (if any) 110 | Token forcedToken; 111 | 112 | public: 113 | /// Constructor 114 | explicit SaneQLLexer(std::string_view input, Token forcedToken = Token::Eof); 115 | 116 | /// Access the full text 117 | std::string_view getFullText() const { return input; } 118 | /// Get the current position 119 | std::string_view::iterator savePosition() const { return current; } 120 | /// Go back to a previously saved position 121 | void restorePosition(std::string_view::iterator p) { current = p; } 122 | 123 | /// Get the next token 124 | Token next(TokenInfo& info); 125 | 126 | /// Get the column of a certain position 127 | unsigned getColumnId(const char* pos) const { return pos - input.begin() + 1; } 128 | /// Get a content range from bounds 129 | std::string_view getRangeFromBounds(unsigned beginColumn, unsigned endColumn) const { return std::string_view{input.begin() + beginColumn - 1, endColumn - beginColumn}; } 130 | /// Check if a symbol is a keyword 131 | static bool isKeyword(std::string_view symbol); 132 | 133 | /// Report a syntax error in the current input 134 | void reportSyntaxError(unsigned column, const char* gotSymbol, const std::vector& expectedSymbols); 135 | }; 136 | //--------------------------------------------------------------------------- 137 | } 138 | //--------------------------------------------------------------------------- 139 | #endif 140 | -------------------------------------------------------------------------------- /parser/SaneQLParser.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_SaneQLParser 2 | #define H_saneql_SaneQLParser 3 | //--------------------------------------------------------------------------- 4 | #include 5 | //--------------------------------------------------------------------------- 6 | // SaneQL 7 | // (c) 2023 Thomas Neumann 8 | // SPDX-License-Identifier: BSD-3-Clause 9 | //--------------------------------------------------------------------------- 10 | namespace saneql { 11 | //--------------------------------------------------------------------------- 12 | class ASTContainer; 13 | //--------------------------------------------------------------------------- 14 | namespace ast { 15 | class AST; 16 | } 17 | //--------------------------------------------------------------------------- 18 | /// The interface to the generated SaneQL parser 19 | class SaneQLParser { 20 | public: 21 | /// Parse the input 22 | static ast::AST* parse(ASTContainer& container, std::string_view input); 23 | }; 24 | //--------------------------------------------------------------------------- 25 | } 26 | //--------------------------------------------------------------------------- 27 | #endif 28 | -------------------------------------------------------------------------------- /parser/astspec: -------------------------------------------------------------------------------- 1 | Access : base part; 2 | BinaryExpression :: Plus Minus Mul Div Mod Pow Less Greater Equals NotEquals LessOrEqual GreaterOrEqual And Or : left right; 3 | Call : func args; 4 | Cast : value type; 5 | DefineFunction : name args body; 6 | FuncArg :: Flat List : name value; 7 | FuncArgNamed :: Flat List Case : name value; 8 | LetArg : name type value; 9 | LetEntry : name args body; 10 | List : head tail; 11 | Literal :: Integer Float String True False Null : arg; 12 | QueryBody : lets body; 13 | Type :: Simple SubTypes Parameter : name arg; 14 | TypeArg : name value; 15 | UnaryExpression :: Plus Minus Not : value; 16 | -------------------------------------------------------------------------------- /parser/saneql.ypp: -------------------------------------------------------------------------------- 1 | %skeleton "lalr1.cc" 2 | %require "3.0" 3 | %define api.namespace {saneql::saneqlparser} 4 | %define api.value.type {saneql::ast::AST*} 5 | %locations 6 | %define parse.error verbose 7 | 8 | %lex-param { saneql::ASTContainer& container } 9 | %lex-param { saneql::SaneQLLexer& lexer } 10 | %parse-param { saneql::ASTContainer& container } 11 | %parse-param { saneql::SaneQLLexer& lexer } 12 | 13 | %code requires{ 14 | #include "parser/AST.hpp" 15 | #include "parser/SaneQLLexer.hpp" 16 | #include "parser/SaneQLParser.hpp" 17 | 18 | #ifdef __clang__ 19 | #pragma clang diagnostic push 20 | #pragma clang diagnostic ignored "-Wunused-but-set-variable" 21 | #endif 22 | 23 | // LCOV_EXCL_START 24 | } 25 | %code{ 26 | static int yylex(saneql::saneqlparser::parser::semantic_type* yylval,saneql::saneqlparser::parser::location_type* yyloc,saneql::ASTContainer& container,saneql::SaneQLLexer& lexer); 27 | } 28 | 29 | 30 | // Lexer tokens 31 | %token EndOfFile 0 "end-of-file" 32 | %token Error 33 | %token IDENT "identifier" 34 | %token FCONST "floatliteral" 35 | %token SCONST "stringliteral" 36 | %token BCONST "bitstringliteral" 37 | %token XCONST "hexstringliteral" 38 | %token Op "" 39 | %token ICONST "intliteral" 40 | %token PARAM "'$?'" 41 | %token TYPECAST "'::'" 42 | %token DOT_DOT "'..'" 43 | %token COLON_EQUALS "':='" 44 | %token EQUALS_GREATER "'=>'" 45 | %token LESS_EQUALS "'<='" 46 | %token GREATER_EQUALS "'>='" 47 | %token NOT_EQUALS "'<>'" 48 | %token MODE_EXPRESSION 49 | %token MODE_TYPE 50 | %token AND "'&&'" 51 | %token OR "'||'" 52 | %token NOT "'!'" 53 | 54 | // Keywords 55 | %token LET "let" 56 | %token DEFUN "defun" 57 | %token NULL_P "null" 58 | %token TRUE_P "true" 59 | %token FALSE_P "false" 60 | %token TABLE "table" 61 | 62 | // Precedence 63 | %left OR 64 | %left AND 65 | %right NOT 66 | %nonassoc IS 67 | %nonassoc '<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS 68 | %nonassoc ESCAPE 69 | %left POSTFIXOP 70 | %nonassoc IDENT NULL_P 71 | %left Op OPERATOR 72 | %left '+' '-' 73 | %left '*' '/' '%' 74 | %left '^' 75 | %right UMINUS 76 | %left '[' ']' 77 | %left '(' ')' 78 | %left TYPECAST 79 | %left '.' 80 | 81 | %% 82 | 83 | //--------------------------------------------------------------------------- 84 | // Query 85 | 86 | 87 | Query: 88 | DefineFunction { raw container.setResult($1); } 89 | | QueryBody { raw container.setResult($1); } 90 | ; 91 | 92 | DefineFunction: 93 | DEFUN Identifier '(' ')' QueryBody { ast DefineFunction name=2 body=5 } 94 | | DEFUN Identifier '(' LetArgsList ')' QueryBody { ast DefineFunction name=2 args=4 body=6 } 95 | ; 96 | 97 | QueryBody: 98 | LetList Expression { ast QueryBody lets=1 body=2 } 99 | ; 100 | 101 | LetList: 102 | LetList LetEntry { list List head=1 tail=2 } 103 | | %empty { null } 104 | ; 105 | 106 | LetEntry: 107 | LET Identifier LetArgs COLON_EQUALS Expression ',' { ast LetEntry name=2 args=3 body=5 } 108 | ; 109 | 110 | LetArgs: 111 | '(' LetArgsList ')' { return 2 } 112 | | %empty { null } 113 | ; 114 | 115 | LetArgsList: 116 | LetArgsList ',' LetArg { list List head=1 tail=3 } 117 | | LetArg { list List head=1 } 118 | ; 119 | 120 | LetArg: 121 | Identifier { ast LetArg name=1 } 122 | | Identifier Type { ast LetArg name=1 type=2 } 123 | | Identifier COLON_EQUALS Expression { ast LetArg name=1 value=3 } 124 | | Identifier Type COLON_EQUALS Expression { ast LetArg name=1 type=2 value=4 } 125 | ; 126 | 127 | Identifier: 128 | IDENT 129 | | Softkeywords 130 | ; 131 | 132 | Softkeywords: 133 | TABLE 134 | ; 135 | 136 | //--------------------------------------------------------------------------- 137 | // types 138 | 139 | Type: 140 | Identifier { ast Type Simple name=1 } 141 | | Identifier '{' TypeList '}' { ast Type SubTypes name=1 arg=3 } 142 | | Identifier '{' TypeArgList '}' { ast Type Parameter name=1 arg=3 } 143 | ; 144 | 145 | TypeList: 146 | Type { list List head=1 } 147 | | TypeList ',' Type { list List head=1 tail=3 } 148 | ; 149 | 150 | TypeArgList: 151 | TypeArg { list List head=1 } 152 | | TypeArgList ',' TypeArg { list List head=1 tail=3 } 153 | ; 154 | 155 | TypeArg: 156 | Iconst { ast TypeArg value=1 } 157 | | Identifier COLON_EQUALS Iconst { ast TypeArg name=1 value=3 } 158 | ; 159 | 160 | Iconst: 161 | ICONST 162 | ; 163 | 164 | 165 | //--------------------------------------------------------------------------- 166 | // expressions 167 | 168 | Expression: 169 | a_expr 170 | ; 171 | 172 | a_expr: 173 | Identifier { return 1 } 174 | | AexprConst { return 1 } 175 | | '(' a_expr ')' { return 2 } 176 | | a_expr TYPECAST Type { ast Cast value=1 type=3 } 177 | | '+' a_expr %prec UMINUS { ast UnaryExpression Plus value=2 } 178 | | '-' a_expr %prec UMINUS { ast UnaryExpression Minus value=2 } 179 | | a_expr '+' a_expr { ast BinaryExpression Plus left=1 right=3 } 180 | | a_expr '-' a_expr { ast BinaryExpression Minus left=1 right=3 } 181 | | a_expr '*' a_expr { ast BinaryExpression Mul left=1 right=3 } 182 | | a_expr '/' a_expr { ast BinaryExpression Div left=1 right=3 } 183 | | a_expr '%' a_expr { ast BinaryExpression Mod left=1 right=3 } 184 | | a_expr '^' a_expr { ast BinaryExpression Pow left=1 right=3 } 185 | | a_expr '<' a_expr { ast BinaryExpression Less left=1 right=3 } 186 | | a_expr '>' a_expr { ast BinaryExpression Greater left=1 right=3 } 187 | | a_expr '=' a_expr { ast BinaryExpression Equals left=1 right=3 } 188 | | a_expr LESS_EQUALS a_expr { ast BinaryExpression LessOrEqual left=1 right=3 } 189 | | a_expr GREATER_EQUALS a_expr { ast BinaryExpression GreaterOrEqual left=1 right=3 } 190 | | a_expr NOT_EQUALS a_expr { ast BinaryExpression NotEquals left=1 right=3 } 191 | | a_expr AND a_expr { ast BinaryExpression And left=1 right=3 } 192 | | a_expr OR a_expr { ast BinaryExpression Or left=1 right=3 } 193 | | NOT a_expr { ast UnaryExpression Not value=2 } 194 | | a_expr '.' Identifier { ast Access base=1 part=3 } 195 | | a_expr '(' ')' { ast Call func=1 } 196 | | a_expr '(' FuncArgs ')' { ast Call func=1 args=3 } 197 | ; 198 | 199 | AexprConst: 200 | Iconst { ast Literal Integer arg=1 } 201 | | FCONST { ast Literal Float arg=1 } 202 | | Sconst { ast Literal String arg=1 } 203 | | TRUE_P { ast Literal True } 204 | | FALSE_P { ast Literal False } 205 | | NULL_P { ast Literal Null } 206 | ; 207 | 208 | Sconst: 209 | SCONST 210 | ; 211 | 212 | FuncArgs: 213 | FuncArgs ',' FuncArg { list List head=1 tail=3 } 214 | | FuncArg { list List head=1 } 215 | ; 216 | 217 | FuncArg: 218 | Identifier COLON_EQUALS Expression { ast FuncArg Flat name=1 value=3 } 219 | | Identifier COLON_EQUALS '{' FuncArgList '}' { ast FuncArg List name=1 value=4 } 220 | | Expression { ast FuncArg Flat value=1 } 221 | | '{' FuncArgList '}' { ast FuncArg List value=2 } 222 | ; 223 | 224 | FuncArgList: 225 | FuncArgListNe 226 | | %empty { null } 227 | ; 228 | 229 | FuncArgListNe: 230 | FuncArgListNe ',' FuncArgNamed { list List head=1 tail=3 } 231 | | FuncArgNamed { list List head=1 } 232 | ; 233 | 234 | FuncArgNamed: 235 | Expression { ast FuncArgNamed Flat value=1 } 236 | | Identifier COLON_EQUALS Expression { ast FuncArgNamed Flat name=1 value=3 } 237 | | Expression EQUALS_GREATER Expression { ast FuncArgNamed Case name=1 value=3 } 238 | | '{' FuncArgList '}' { ast FuncArgNamed List value=2 } 239 | | Identifier COLON_EQUALS '{' FuncArgList '}' { ast FuncArgNamed List name=1 value=4 } 240 | ; 241 | 242 | %% 243 | 244 | namespace saneql { namespace saneqlparser { 245 | 246 | void parser::error(const location_type&,const std::string&) { 247 | // unreachable 248 | } 249 | 250 | class reportingparser final : public parser { 251 | 252 | public: 253 | reportingparser(saneql::ASTContainer& container,saneql::SaneQLLexer& lexer) : parser(container,lexer) {} 254 | 255 | #define N_TOKENS_VAR YYNTOKENS 256 | std::string yysyntax_error_ (const context& yyctx) const override 257 | { 258 | auto yystate = yyctx.yyparser_.yystack_[0].state; 259 | auto& yyla = yyctx.lookahead(); 260 | const char* gotSymbol=nullptr; 261 | std::vector expectedSymbols; 262 | #define ERROR_TOKEN_VAR symbol_kind::S_YYerror 263 | if (!yyla.empty ()) { 264 | const int firstKeyword = yytranslate_(saneqlparser::parser::token::LET); 265 | const int identifier = yytranslate_(saneqlparser::parser::token::IDENT); 266 | int yytoken = yyla.type_get (); 267 | gotSymbol = yytname_[yytoken]; 268 | if ((*gotSymbol=='"')&&(yytoken') A(GreaterEquals,GREATER_EQUALS) A(Identifier,IDENT) \ 318 | A(Integer,ICONST) B(LCurly,'{') B(LParen,'(') B(LSquare,'[') B(Less,'<') A(LessEquals,LESS_EQUALS) B(Minus,'-') A(NotEquals,NOT_EQUALS) A(Operator,Op) B(Percent,'%') \ 319 | A(Parameter,PARAM) B(Plus,'+') B(RCurly,'}') B(RParen,')') B(RSquare,']') B(SemiColon,';') B(Solidus,'/') A(String,SCONST) A(UnterminatedLiteral,Error) A(UnterminatedMultilineComment,Error) 320 | 321 | // The mapping table from SaneQLLexer tokens to bison tokens 322 | static constexpr saneql::saneqlparser::parser::token::yytokentype tokenTable[]={ 323 | #define A(X,Y) saneql::saneqlparser::parser::token::Y, 324 | #define B(X,Y) static_cast(Y), 325 | SQLLEXER_TOKENS(A,B) 326 | #undef A 327 | #undef B 328 | 329 | #define KEYWORD(A,B,C) saneql::saneqlparser::parser::token::B, 330 | #include "parser/Keywords.hpp" 331 | #undef KEYWORD 332 | }; 333 | 334 | // LCOV_EXCL_STOP 335 | 336 | // Verify that the mapping table is correct 337 | #define A(X,Y) static_assert(tokenTable[static_cast(saneql::SaneQLLexer::Token::X)]==saneql::saneqlparser::parser::token::Y,"incorrect token mapping"); 338 | #define B(X,Y) static_assert(tokenTable[static_cast(saneql::SaneQLLexer::Token::X)]==static_cast(Y),"incorrect token mapping"); 339 | SQLLEXER_TOKENS(A,B) 340 | #undef A 341 | #undef B 342 | 343 | #define KEYWORD(A,B,C) static_assert(tokenTable[static_cast(saneql::SaneQLLexer::Token::B)]==saneql::saneqlparser::parser::token::B,"incorrect token mapping"); 344 | #include "parser/Keywords.hpp" 345 | #undef KEYWORD 346 | 347 | // Lexer interface 348 | static int yylex(saneql::saneqlparser::parser::semantic_type* yylval,saneql::saneqlparser::parser::location_type* yyloc,saneql::ASTContainer& container,saneql::SaneQLLexer& lexer) 349 | { 350 | // Get the next token 351 | saneql::SaneQLLexer::TokenInfo info; 352 | auto token=lexer.next(info); 353 | 354 | // Construct a token node 355 | *yylval=new (container.allocateRaw(sizeof(saneql::ast::Token))) saneql::ast::Token(info); 356 | 357 | // Construct a location 358 | #define POSITION_COUNTER_TYPE saneql::saneqlparser::position::counter_type 359 | *yyloc=saneql::saneqlparser::parser::location_type{saneql::saneqlparser::position{nullptr,1,static_cast(lexer.getColumnId(info.content.begin()))},saneql::saneqlparser::position{nullptr,1,static_cast(lexer.getColumnId(info.content.end()))}}; 360 | #undef POSITION_COUNTER_TYPE 361 | 362 | // And return the mapped token 363 | return tokenTable[static_cast(token)]; 364 | } 365 | 366 | namespace saneql { 367 | 368 | ast::AST* SaneQLParser::parse(ASTContainer& container,std::string_view input) 369 | { 370 | SaneQLLexer lexer(input); 371 | saneqlparser::reportingparser parser(container,lexer); 372 | if (parser.parse()!=0) return nullptr; 373 | return container.getResult(); 374 | } 375 | 376 | } 377 | 378 | #ifdef __clang__ 379 | #pragma clang diagnostic pop 380 | #endif 381 | -------------------------------------------------------------------------------- /semana/Functions.cpp: -------------------------------------------------------------------------------- 1 | #include "semana/Functions.hpp" 2 | //--------------------------------------------------------------------------- 3 | // (c) 2023 Thomas Neumann 4 | //--------------------------------------------------------------------------- 5 | using namespace std; 6 | //--------------------------------------------------------------------------- 7 | namespace saneql { 8 | //--------------------------------------------------------------------------- 9 | namespace { 10 | //--------------------------------------------------------------------------- 11 | using Builtin = Functions::Builtin; 12 | using TypeCategory = Functions::TypeCategory; 13 | //--------------------------------------------------------------------------- 14 | /// Functions that are defined on all scalar types 15 | static const Functions scalarFunctions(nullptr, 16 | { 17 | // list of functions 18 | {"asc", {Builtin::Asc, {}}}, // change order to ascending 19 | {"desc", {Builtin::Desc, {}}}, // change order to descending 20 | {"collate", {Builtin::Desc, {{"collate", TypeCategory::Symbol}}}}, // change collate 21 | {"is", {Builtin::Is, {{"other", TypeCategory::Scalar}}}}, // check for equality handling NULL 22 | {"between", {Builtin::Between, {{"lower", TypeCategory::Scalar}, {"upper", TypeCategory::Scalar}}}}, // range check 23 | {"in", {Builtin::In, {{"values", TypeCategory::ExpressionList}}}} // check against a list of values 24 | }); 25 | //--------------------------------------------------------------------------- 26 | /// Functions that are defined on text types 27 | static const Functions textFunctions(&scalarFunctions, 28 | { 29 | // list of functions 30 | {"like", {Builtin::Like, {{"pattern", Type::getText()}}}}, // a like predicate 31 | {"substr", {Builtin::Substr, {{"from", Type::getInteger(), true}, {"for", Type::getInteger(), true}}}} // a like predicate 32 | }); 33 | //--------------------------------------------------------------------------- 34 | /// Functions that are defined on date values 35 | static const Functions dateFunctions(&scalarFunctions, 36 | { 37 | // list of functions 38 | {"extract", {Builtin::Extract, {{"part", TypeCategory::Symbol}}}} // extract part of a date 39 | }); 40 | //--------------------------------------------------------------------------- 41 | } 42 | //--------------------------------------------------------------------------- 43 | /// Functions that are defined on tables 44 | const Functions Functions::table(nullptr, 45 | { 46 | // list of functions 47 | {"filter", {Builtin::Filter, {{"condition", TypeCategory::Expression}}}}, // filter tuples 48 | {"join", {Builtin::Join, {{"table", TypeCategory::Table}, {"on", TypeCategory::Expression}, {"type", TypeCategory::Symbol, true}}}}, // join tables 49 | {"groupby", {Builtin::GroupBy, {{"groups", TypeCategory::ExpressionList}, {"aggregates", TypeCategory::ExpressionList, true}, {"type", TypeCategory::Symbol, true}, {"sets", TypeCategory::ExpressionList, true}}}}, // aggregate 50 | {"aggregate", {Builtin::Aggregate, {{"aggregate", TypeCategory::Expression}}}}, // aggregate to scalar 51 | {"distinct", {Builtin::Distinct, {}}}, // remove duplicates 52 | {"orderby", {Builtin::OrderBy, {{"expressions", TypeCategory::ExpressionList}, {"limit", TypeCategory::Expression, true}, {"offset", TypeCategory::Expression, true}}}}, // order entries 53 | {"map", {Builtin::Map, {{"expressions", TypeCategory::ExpressionList}}}}, // compute new columns 54 | {"project", {Builtin::Project, {{"expressions", TypeCategory::ExpressionList}}}}, // restrict the result columns 55 | {"projectout", {Builtin::ProjectOut, {{"columns", TypeCategory::ExpressionList}}}}, // restrict the result columns 56 | {"as", {Builtin::As, {{"name", TypeCategory::Symbol}}}}, // rename the scope 57 | {"alias", {Builtin::Alias, {{"name", TypeCategory::Symbol}}}}, // provide alias name for columns 58 | {"union", {Builtin::Union, {{"table", TypeCategory::Table}, {"all", TypeCategory::Symbol, true}}}}, // set union 59 | {"except", {Builtin::Except, {{"table", TypeCategory::Table}, {"all", TypeCategory::Symbol, true}}}}, // set except 60 | {"intersect", {Builtin::Intersect, {{"table", TypeCategory::Table}, {"all", TypeCategory::Symbol, true}}}}, // set intersect 61 | {"window", {Builtin::Window, {{"expressions", TypeCategory::ExpressionList}, {"partitionby", TypeCategory::ExpressionList, true}, {"orderby", TypeCategory::ExpressionList, true}, {"framebegin", TypeCategory::Expression, true}, {"framend", TypeCategory::Expression, true}, {"frametype", TypeCategory::Symbol, true}}}}, // window computation 62 | }); 63 | //--------------------------------------------------------------------------- 64 | /// The free functions 65 | const Functions Functions::freeFunctions(nullptr, 66 | { 67 | // list of functions 68 | {"count", {Builtin::AggCount, {{"value", TypeCategory::Expression, true}, {"distinct", TypeCategory::Symbol, true}}}}, // aggregate 69 | {"sum", {Builtin::AggSum, {{"value", TypeCategory::Expression}, {"distinct", TypeCategory::Symbol, true}}}}, // aggregate 70 | {"avg", {Builtin::AggAvg, {{"value", TypeCategory::Expression}, {"distinct", TypeCategory::Symbol, true}}}}, // aggregate 71 | {"min", {Builtin::AggMin, {{"value", TypeCategory::Expression}}}}, // aggregate 72 | {"max", {Builtin::AggMax, {{"value", TypeCategory::Expression}}}}, // aggregate 73 | {"row_number", {Builtin::WindowRowNumber, {}}}, // windw function 74 | {"rank", {Builtin::WindowRank, {{"value", TypeCategory::Expression }}}}, // windw function 75 | {"dense_rank", {Builtin::WindowDenseRank, {{"value", TypeCategory::Expression }}}}, // windw function 76 | {"ntile", {Builtin::WindowNTile, {{"n", TypeCategory::Expression }}}}, // windw function 77 | {"lead", {Builtin::WindowLead, {{"value", TypeCategory::Expression }, {"offset", TypeCategory::Expression, true}, {"default", TypeCategory::Expression, true}}}}, // windw function 78 | {"lag", {Builtin::WindowLag, {{"value", TypeCategory::Expression }, {"offset", TypeCategory::Expression, true}, {"default", TypeCategory::Expression, true}}}}, // windw function 79 | {"first_value", {Builtin::WindowFirstValue, {{"value", TypeCategory::Expression }}}}, // windw function 80 | {"last_value", {Builtin::WindowLastValue, {{"value", TypeCategory::Expression }}}}, // windw function 81 | {"table", {Builtin::Table, {{"values", TypeCategory::ExpressionList}}}}, // table construction 82 | {"case", {Builtin::Case, {{"cases", TypeCategory::ExpressionList}, {"else", TypeCategory::Expression, true}, {"search", TypeCategory::Scalar, true}}}}, // case expression 83 | {"gensym", {Builtin::Gensym, {{"name", TypeCategory::Symbol, true}}}}, // create a unique symbol 84 | {"foreigncall", {Builtin::ForeignCall, {{"name", Type::getText()}, {"returns", TypeCategory::Symbol}, {"arguments", TypeCategory::ExpressionList, true}, {"type", TypeCategory::Symbol, true}}}} // declare that a function with the given arguments exists 85 | }); 86 | //--------------------------------------------------------------------------- 87 | Functions::Functions(const Functions* parent, std::initializer_list> signatures) 88 | : parent(parent), functions(signatures.begin(), signatures.end()) 89 | // Constructor 90 | { 91 | } 92 | //--------------------------------------------------------------------------- 93 | Functions::~Functions() 94 | // Destructor 95 | { 96 | } 97 | //--------------------------------------------------------------------------- 98 | const Functions::Signature* Functions::lookup(const std::string& name) const 99 | // Find a function 100 | { 101 | for (auto iter = this; iter; iter = iter->parent) { 102 | auto iter2 = iter->functions.find(name); 103 | if (iter2 != iter->functions.end()) return &(iter2->second); 104 | } 105 | return nullptr; 106 | } 107 | //--------------------------------------------------------------------------- 108 | const Functions* Functions::getFunctions(Type type) 109 | // Find the associated functions 110 | { 111 | switch (type.getType()) { 112 | case Type::Unknown: return &scalarFunctions; 113 | case Type::Bool: return &scalarFunctions; 114 | case Type::Integer: return &scalarFunctions; 115 | case Type::Decimal: return &scalarFunctions; 116 | case Type::Char: return &textFunctions; 117 | case Type::Varchar: return &textFunctions; 118 | case Type::Text: return &textFunctions; 119 | case Type::Date: return &dateFunctions; 120 | case Type::Interval: return &scalarFunctions; 121 | } 122 | __builtin_unreachable(); 123 | } 124 | //--------------------------------------------------------------------------- 125 | } 126 | //--------------------------------------------------------------------------- 127 | -------------------------------------------------------------------------------- /semana/Functions.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_Functions 2 | #define H_saneql_Functions 3 | //--------------------------------------------------------------------------- 4 | #include "infra/Schema.hpp" 5 | #include 6 | //--------------------------------------------------------------------------- 7 | // SaneQL 8 | // (c) 2023 Thomas Neumann 9 | // SPDX-License-Identifier: BSD-3-Clause 10 | //--------------------------------------------------------------------------- 11 | namespace saneql { 12 | //--------------------------------------------------------------------------- 13 | /// A collection of functions 14 | class Functions { 15 | public: 16 | /// Builtins 17 | enum class Builtin { 18 | Asc, 19 | Desc, 20 | Collate, 21 | Is, 22 | Like, 23 | Substr, 24 | Between, 25 | In, 26 | Extract, 27 | Case, 28 | Filter, 29 | Join, 30 | Gensym, 31 | GroupBy, 32 | Aggregate, 33 | Distinct, 34 | OrderBy, 35 | Map, 36 | Project, 37 | ProjectOut, 38 | Union, 39 | Except, 40 | Intersect, 41 | Window, 42 | As, 43 | Alias, 44 | AggCount, 45 | AggSum, 46 | AggAvg, 47 | AggMin, 48 | AggMax, 49 | WindowRowNumber, 50 | WindowRank, 51 | WindowDenseRank, 52 | WindowNTile, 53 | WindowLead, 54 | WindowLag, 55 | WindowFirstValue, 56 | WindowLastValue, 57 | Table, 58 | ForeignCall 59 | }; 60 | /// Type category 61 | enum class TypeCategory { 62 | Scalar, 63 | Table, 64 | Expression, 65 | ExpressionList, 66 | Symbol, 67 | SymbolList 68 | }; 69 | /// An argument type 70 | struct ArgumentType { 71 | /// The type category 72 | TypeCategory category; 73 | /// The underlying type (if scalar) 74 | Type type; 75 | 76 | /// Constructor for scalar types 77 | ArgumentType(Type type) : category(TypeCategory::Scalar), type(type) {} 78 | /// Constructor for other categories 79 | ArgumentType(TypeCategory category) : category(category), type(Type::getUnknown()) {} 80 | }; 81 | /// An argument 82 | struct Argument { 83 | /// The name 84 | std::string name; 85 | /// The argument type 86 | ArgumentType type; 87 | /// Does it have a default value? 88 | bool hasDefault = false; 89 | }; 90 | /// A signature 91 | struct Signature { 92 | /// The builtin function 93 | Builtin builtin; 94 | /// The arguments 95 | std::vector arguments; 96 | }; 97 | 98 | private: 99 | /// The parent set (if any) 100 | const Functions* parent; 101 | /// All functions contained here 102 | std::unordered_map functions; 103 | 104 | public: 105 | /// Constructor 106 | Functions(const Functions* parent, std::initializer_list> signatures); 107 | /// Destructor 108 | ~Functions(); 109 | 110 | /// Find a function 111 | const Signature* lookup(const std::string& name) const; 112 | 113 | /// Get the functions for a given type 114 | static const Functions* getFunctions(Type type); 115 | /// The functions defined on tables 116 | static const Functions table; 117 | /// The free functions 118 | static const Functions freeFunctions; 119 | }; 120 | //--------------------------------------------------------------------------- 121 | } 122 | //--------------------------------------------------------------------------- 123 | #endif 124 | -------------------------------------------------------------------------------- /semana/SemanticAnalysis.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_SemanticAnalysis 2 | #define H_saneql_SemanticAnalysis 3 | //--------------------------------------------------------------------------- 4 | #include "infra/Schema.hpp" 5 | #include "semana/Functions.hpp" 6 | #include 7 | #include 8 | #include 9 | //--------------------------------------------------------------------------- 10 | // SaneQL 11 | // (c) 2023 Thomas Neumann 12 | // SPDX-License-Identifier: BSD-3-Clause 13 | //--------------------------------------------------------------------------- 14 | namespace saneql { 15 | //--------------------------------------------------------------------------- 16 | namespace ast { 17 | class AST; 18 | class Access; 19 | class BinaryExpression; 20 | class Call; 21 | class Cast; 22 | class FuncArg; 23 | class LetEntry; 24 | class Literal; 25 | class Type; 26 | class UnaryExpression; 27 | } 28 | //--------------------------------------------------------------------------- 29 | namespace algebra { 30 | class Expression; 31 | class IU; 32 | class Operator; 33 | } 34 | //--------------------------------------------------------------------------- 35 | /// Collate info 36 | enum class Collate : uint8_t { 37 | None 38 | }; 39 | //--------------------------------------------------------------------------- 40 | /// Information about collation and ordering 41 | class OrderingInfo { 42 | /// The collate 43 | Collate collate = Collate::None; 44 | /// Descending? 45 | bool descending = false; 46 | 47 | /// Explicit constructor 48 | constexpr OrderingInfo(Collate collate, bool descending) : collate(collate), descending(descending) {} 49 | 50 | public: 51 | /// Default constructor 52 | constexpr OrderingInfo() = default; 53 | 54 | /// Mark as ascending 55 | void markAscending() { descending = false; } 56 | /// Mark as descending 57 | void markDescending() { descending = true; } 58 | /// Is descending? 59 | bool isDescending() const { return descending; } 60 | /// Get the collate 61 | Collate getCollate() const { return collate; } 62 | /// Change the collate 63 | void setCollate(Collate newCollate) { collate = newCollate; } 64 | 65 | /// Construct the default order 66 | static OrderingInfo defaultOrder() { return {}; } 67 | /// Lookup a collate. Throws if not found 68 | static Collate lookupCollate(const std::string& name); 69 | }; 70 | //--------------------------------------------------------------------------- 71 | /// Semantic analysis for saneql queries 72 | class SemanticAnalysis { 73 | public: 74 | /// Binding information 75 | class BindingInfo { 76 | public: 77 | /// Helper for group by 78 | class GroupByScope; 79 | 80 | /// A column description 81 | struct Column { 82 | /// The name 83 | std::string name; 84 | /// The IU 85 | const algebra::IU* iu; 86 | }; 87 | 88 | private: 89 | /// Scope information 90 | struct Scope { 91 | /// The columns 92 | std::unordered_map columns; 93 | /// Is the scope ambiguous? 94 | bool ambiguous = false; 95 | }; 96 | /// Alias information 97 | struct Alias { 98 | /// The columns 99 | std::vector columns; 100 | /// Is the alias ambiguous? 101 | bool ambiguous = false; 102 | }; 103 | /// Argument information 104 | struct ArgumentInformation { 105 | /// The possible states 106 | std::variant, std::string> entry; 107 | 108 | /// Is a valid entry? 109 | bool isValid() const { return entry.index() > 0; } 110 | /// Is a value argument 111 | bool isValue() const { return entry.index() == 1; } 112 | /// Get the value reference 113 | const ast::AST* getValueRef() const { return std::get<1>(entry).first; } 114 | /// Get the value scope 115 | const BindingInfo* getValueScope() const { return std::get<1>(entry).second; } 116 | /// Is a symbol argument? 117 | bool isSymbol() const { return entry.index() == 2; } 118 | /// Get the symbol value 119 | const std::string& getSymbol() const { return std::get<2>(entry); } 120 | }; 121 | /// The well defined column order 122 | std::vector columns; 123 | /// Mapping from column name to IU 124 | std::unordered_map columnLookup; 125 | /// Scoped columns 126 | std::unordered_map scopes; 127 | /// Column aliases 128 | std::unordered_map aliases; 129 | /// The arguments 130 | std::unordered_map arguments; 131 | /// The parent scope for function calls (if any) 132 | const BindingInfo* parentScope = nullptr; 133 | /// The group by scope (if any) 134 | GroupByScope* gbs = nullptr; 135 | 136 | friend class SemanticAnalysis; 137 | 138 | public: 139 | /// Marker for ambiguous IUs 140 | static const algebra::IU* const ambiguousIU; 141 | 142 | /// Constant (empty) root scope 143 | static const BindingInfo& rootScope(); 144 | 145 | /// Access all columns 146 | const auto& getColumns() const { return columns; } 147 | /// Add a new scope, mark it as ambiguous if it already exists 148 | Scope* addScope(const std::string& name); 149 | /// Add a binding 150 | void addBinding(Scope* scope, const std::string& column, const algebra::IU* iu); 151 | 152 | /// Lookup a column 153 | const algebra::IU* lookup(const std::string& name) const; 154 | /// Lookup a column 155 | const algebra::IU* lookup(const std::string& binding, const std::string& name) const; 156 | 157 | /// Register an argument 158 | void registerArgument(const std::string& name, const ast::AST* ast, const BindingInfo* scope); 159 | /// Register a symbol argument 160 | void registerSymbolArgument(const std::string& name, const std::string& symbol); 161 | /// Check for an argument 162 | ArgumentInformation lookupArgument(const std::string& name) const; 163 | 164 | /// Merge after a join 165 | void join(const BindingInfo& other); 166 | 167 | /// Get the group by scope 168 | GroupByScope* getGroupByScope() const { return gbs; } 169 | }; 170 | 171 | private: 172 | /// The schema 173 | const Schema& schema; 174 | 175 | /// An expression container 176 | struct ExpressionResult { 177 | /// Content for scalar expressions 178 | struct ScalarInfo { 179 | /// The expression 180 | std::unique_ptr expression; 181 | /// Collation and ordering 182 | OrderingInfo ordering; 183 | }; 184 | /// Content for table expressions 185 | struct TableInfo { 186 | /// The operator 187 | std::unique_ptr op; 188 | /// The column bindings 189 | BindingInfo binding; 190 | }; 191 | std::variant content; 192 | 193 | public: 194 | /// Constructor 195 | ExpressionResult(std::unique_ptr expression, OrderingInfo ordering); 196 | /// Constructor 197 | ExpressionResult(std::unique_ptr op, BindingInfo binding); 198 | /// Constructor 199 | ExpressionResult(ExpressionResult&&) = default; 200 | /// Destructor 201 | ~ExpressionResult(); 202 | 203 | ExpressionResult& operator=(ExpressionResult&&) = default; 204 | 205 | /// Do we have a scalar result? 206 | bool isScalar() const { return content.index() == 0; } 207 | /// Access the scalar value 208 | auto& scalar() { return std::get<0>(content).expression; } 209 | /// Access the ordering 210 | auto& accessOrdering() { return std::get<0>(content).ordering; } 211 | /// Access the ordering 212 | OrderingInfo getOrdering() const { return std::get<0>(content).ordering; } 213 | /// Do we have a table result? 214 | bool isTable() const { return content.index() == 1; } 215 | /// Access the table value 216 | auto& table() { return std::get<1>(content).op; } 217 | /// Access the binding 218 | BindingInfo& accessBinding() { return std::get<1>(content).binding; } 219 | /// Access the binding 220 | const BindingInfo& getBinding() const { return std::get<1>(content).binding; } 221 | }; 222 | /// Information about an extended type 223 | struct ExtendedType { 224 | /// The content 225 | std::variant content; 226 | 227 | /// Constructor 228 | ExtendedType(Type type) : content(type) {} 229 | 230 | /// Is a basic type? 231 | bool isBasic() const { return content.index() == 0; } 232 | /// Get the contained basic type 233 | Type getBasicType() const { return std::get<0>(content); } 234 | }; 235 | /// Information about a let 236 | struct LetInfo { 237 | /// The signature (if any) 238 | Functions::Signature signature; 239 | /// The default values (if any) 240 | std::vector defaultValues; 241 | /// The body of the let 242 | const ast::AST* body; 243 | }; 244 | 245 | /// All lets 246 | std::vector lets; 247 | /// Lookup of lets by name 248 | std::unordered_map letLookup; 249 | /// Visibility limit for lets 250 | unsigned letScopeLimit = ~0u; 251 | /// The next symbol id 252 | unsigned nextSymbolId = 1; 253 | 254 | /// Change the let scope limit 255 | class SetLetScopeLimit { 256 | SemanticAnalysis* semana; 257 | unsigned oldLimit; 258 | 259 | public: 260 | SetLetScopeLimit(SemanticAnalysis* semana, unsigned newLimit) : semana(semana), oldLimit(semana->letScopeLimit) { semana->letScopeLimit = newLimit; } 261 | ~SetLetScopeLimit() { semana->letScopeLimit = oldLimit; } 262 | }; 263 | 264 | public: 265 | /// Report an error 266 | [[noreturn]] void reportError(std::string message); 267 | /// Invalid AST node 268 | [[noreturn]] void invalidAST(); 269 | /// Extract a string value 270 | std::string extractString(const ast::AST* token); 271 | /// Extract a symbol name without override capabilities 272 | std::string extractRawSymbol(const ast::AST* token); 273 | /// Extract a symbol name 274 | std::string extractSymbol(const BindingInfo& scope, const ast::AST* token); 275 | /// Parse a type string for a simple type 276 | saneql::Type parseSimpleTypeName(const std::string& name); 277 | /// Analyze a type 278 | ExtendedType analyzeType(const ast::Type& type); 279 | 280 | private: 281 | /// Recognize gensym calls. Returns an empty string otherwise 282 | std::string recognizeGensym(const ast::AST* ast); 283 | /// Analyze a literal 284 | ExpressionResult analyzeLiteral(const ast::Literal& literal); 285 | /// Analyze access 286 | ExpressionResult analyzeAccess(const BindingInfo& scope, const ast::Access& ast); 287 | /// Analyze a binary expression 288 | ExpressionResult analyzeBinaryExpression(const BindingInfo& scope, const ast::BinaryExpression& ast); 289 | /// Analyze a unary expression 290 | ExpressionResult analyzeUnaryExpression(const BindingInfo& scope, const ast::UnaryExpression& ast); 291 | /// Analyze a case computation 292 | ExpressionResult analyzeCase(const BindingInfo& scope, const std::vector& args); 293 | /// Analyze a join computation 294 | ExpressionResult analyzeJoin(const BindingInfo& scope, ExpressionResult& input, const std::vector& args); 295 | /// Analyze a groupby computation 296 | ExpressionResult analyzeGroupBy(ExpressionResult& input, const std::vector& args); 297 | /// Analyze an aggregate computation 298 | ExpressionResult analyzeAggregate(ExpressionResult& input, const std::vector& args); 299 | /// Analyze a distinct computation 300 | ExpressionResult analyzeDistinct(ExpressionResult& input); 301 | /// Analyze a set computation 302 | ExpressionResult analyzeSetOperation(const BindingInfo& scope, Functions::Builtin builtin, ExpressionResult& input, const std::vector& args); 303 | /// Analyze a window computation 304 | ExpressionResult analyzeWindow(ExpressionResult& input, const std::vector& args); 305 | /// Analyze an orderby computation 306 | ExpressionResult analyzeOrderBy(ExpressionResult& input, const std::vector& args); 307 | /// Analyze a map or project computation 308 | ExpressionResult analyzeMap(ExpressionResult& input, const std::vector& args, bool project); 309 | /// Analyze a projectout computation 310 | ExpressionResult analyzeProjectOut(ExpressionResult& input, const std::vector& args); 311 | /// Handle a symbol argument 312 | std::string symbolArgument(const BindingInfo& scope, const std::string& funcName, const std::string& argName, const ast::FuncArg* arg); 313 | /// Handle a constant boolean argument 314 | bool constBoolArgument(const std::string& funcName, const std::string& argName, const ast::FuncArg* arg); 315 | /// Handle a constant string argument 316 | std::string constStringArgument(const std::string& funcName, const std::string& argName, const ast::FuncArg* arg); 317 | /// Handle a scalar argument 318 | ExpressionResult scalarArgument(const BindingInfo& scope, const std::string& funcName, const std::string& argName, const ast::FuncArg* arg); 319 | /// Handle a list of scalar arguments 320 | std::vector scalarArgumentList(const BindingInfo& scope, const std::string& funcName, const std::string& argName, const ast::FuncArg* arg); 321 | /// Handle a table argument 322 | ExpressionResult tableArgument(const BindingInfo& scope, const std::string& funcName, const std::string& argName, const ast::FuncArg* arg); 323 | /// Expression argument 324 | struct ExpressionArg { 325 | /// The name (if any) 326 | std::string name; 327 | /// The expression 328 | ExpressionResult value; 329 | }; 330 | /// Handle expression list arguments 331 | std::vector expressionListArgument(const BindingInfo& scope, const ast::FuncArg* arg); 332 | /// Make sure two values are comparable 333 | void enforceComparable(ExpressionResult& a, ExpressionResult& b); 334 | /// Make sure two values are comparable 335 | void enforceComparable(std::unique_ptr& sa, std::unique_ptr& sb); 336 | /// Analyze a call expression 337 | ExpressionResult analyzeCall(const BindingInfo& scope, const ast::Call& ast); 338 | /// Analyze a cast expression 339 | ExpressionResult analyzeCast(const BindingInfo& scope, const ast::Cast& ast); 340 | /// Analyze a table construction expression 341 | ExpressionResult analyzeTableConstruction(const BindingInfo& scope, const ast::FuncArg* ast); 342 | /// Analyze a token 343 | ExpressionResult analyzeToken(const BindingInfo& scope, const ast::AST* exp); 344 | /// Analyze an expression 345 | ExpressionResult analyzeExpression(const BindingInfo& scope, const ast::AST* exp); 346 | /// Analyze a let construction 347 | void analyzeLet(const ast::LetEntry& ast); 348 | 349 | public: 350 | /// Constructor 351 | explicit SemanticAnalysis(Schema& schema) : schema(schema) {} 352 | 353 | /// Analyze a query 354 | ExpressionResult analyzeQuery(const ast::AST* query); 355 | }; 356 | //--------------------------------------------------------------------------- 357 | } 358 | //--------------------------------------------------------------------------- 359 | #endif 360 | -------------------------------------------------------------------------------- /sql/SQLWriter.cpp: -------------------------------------------------------------------------------- 1 | #include "sql/SQLWriter.hpp" 2 | #include "infra/Schema.hpp" 3 | //--------------------------------------------------------------------------- 4 | // (c) 2023 Thomas Neumann 5 | //--------------------------------------------------------------------------- 6 | using namespace std; 7 | //--------------------------------------------------------------------------- 8 | namespace saneql { 9 | //--------------------------------------------------------------------------- 10 | SQLWriter::SQLWriter() 11 | : target(&result) 12 | // Constructor 13 | { 14 | } 15 | //--------------------------------------------------------------------------- 16 | SQLWriter::~SQLWriter() 17 | // Destructor 18 | { 19 | } 20 | //--------------------------------------------------------------------------- 21 | void SQLWriter::write(std::string_view sql) 22 | // Write a SQL fragment 23 | { 24 | auto& writer = *target; 25 | writer += sql; 26 | } 27 | //--------------------------------------------------------------------------- 28 | void SQLWriter::writeIdentifier(std::string_view identifier) 29 | // Write an identifier, quoting as needed 30 | { 31 | auto& writer = *target; 32 | writer += '"'; 33 | for (char c : identifier) { 34 | if (c == '"') { 35 | writer += "\"\""; 36 | } else { 37 | writer += c; 38 | } 39 | } 40 | writer += '"'; 41 | } 42 | //--------------------------------------------------------------------------- 43 | void SQLWriter::writeIU(const algebra::IU* iu) 44 | // Write an IU 45 | { 46 | auto& writer = *target; 47 | if (auto iter = iuNames.find(iu); iter != iuNames.end()) { 48 | writer += iter->second; 49 | } else { 50 | string name = "v_"s + to_string(iuNames.size() + 1); 51 | writer += name; 52 | iuNames[iu] = move(name); 53 | } 54 | } 55 | //--------------------------------------------------------------------------- 56 | void SQLWriter::writeString(std::string_view str) 57 | // Write a string literal 58 | { 59 | auto& writer = *target; 60 | writer += '\''; 61 | for (char c : str) { 62 | if (c == '\'') { 63 | writer += "''"; 64 | } else { 65 | writer += c; 66 | } 67 | } 68 | writer += '\''; 69 | } 70 | //--------------------------------------------------------------------------- 71 | void SQLWriter::writeType(Type type) 72 | // Write a type 73 | { 74 | auto& writer = *target; 75 | switch (type.getType()) { 76 | case Type::Unknown: writer += "unknown"; break; // this can only happen for NULL values 77 | case Type::Bool: writer += "boolean"; break; 78 | case Type::Integer: writer += "integer"; break; 79 | case Type::Decimal: writer += "decimal(" + to_string(type.getPrecision()) + "," + to_string(type.getScale()) + ")"; break; 80 | case Type::Char: writer += "char(" + to_string(type.getLength()) + ")"; break; 81 | case Type::Varchar: writer += "varchar(" + to_string(type.getLength()) + ")"; break; 82 | case Type::Text: writer += "text"; break; 83 | case Type::Date: writer += "date"; break; 84 | case Type::Interval: writer += "interval"; break; 85 | } 86 | } 87 | //--------------------------------------------------------------------------- 88 | } 89 | //--------------------------------------------------------------------------- 90 | -------------------------------------------------------------------------------- /sql/SQLWriter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef H_saneql_SQLWriter 2 | #define H_saneql_SQLWriter 3 | //--------------------------------------------------------------------------- 4 | #include 5 | #include 6 | #include 7 | //--------------------------------------------------------------------------- 8 | // SaneQL 9 | // (c) 2023 Thomas Neumann 10 | // SPDX-License-Identifier: BSD-3-Clause 11 | //--------------------------------------------------------------------------- 12 | namespace saneql { 13 | //--------------------------------------------------------------------------- 14 | class Type; 15 | //--------------------------------------------------------------------------- 16 | namespace algebra { 17 | class IU; 18 | } 19 | //--------------------------------------------------------------------------- 20 | /// Helper class to generate SQL 21 | class SQLWriter { 22 | private: 23 | /// The result buffer 24 | std::string result; 25 | /// The current target 26 | std::string* target; 27 | /// All assigned IU names 28 | std::unordered_map iuNames; 29 | 30 | public: 31 | /// Constructor 32 | SQLWriter(); 33 | /// Destructor 34 | ~SQLWriter(); 35 | 36 | /// Write a SQL fragment 37 | void write(std::string_view sql); 38 | /// Write an identifier, quoting as needed 39 | void writeIdentifier(std::string_view identifier); 40 | /// Write an IU 41 | void writeIU(const algebra::IU* iu); 42 | /// Write a string literal 43 | void writeString(std::string_view str); 44 | /// Write a type 45 | void writeType(Type type); 46 | 47 | /// Get the result 48 | std::string getResult() const { return result; } 49 | }; 50 | //--------------------------------------------------------------------------- 51 | } 52 | //--------------------------------------------------------------------------- 53 | #endif 54 | --------------------------------------------------------------------------------