├── .gitignore ├── .gitmodules ├── .idea ├── editor.xml ├── inspectionProfiles │ └── Project_Default.xml ├── misc.xml ├── modules.xml ├── tails.iml ├── vcs.xml └── workspace.xml ├── CMakeLists.txt ├── README.md ├── Syntax.md ├── Tails.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ ├── IDEWorkspaceChecks.plist │ │ └── WorkspaceSettings.xcsettings └── xcshareddata │ └── xcschemes │ ├── REPL.xcscheme │ └── Tests.xcscheme ├── build.sh └── src ├── compiler ├── compiler+stackcheck.hh ├── compiler.cc ├── compiler.hh ├── disassembler.hh ├── parser.cc ├── stack_effect_parser.hh ├── vocabulary.cc └── vocabulary.hh ├── core ├── core_words.cc ├── core_words.hh ├── instruction.hh ├── platform.hh ├── stack_effect.hh ├── utils.hh └── word.hh ├── io.hh ├── more_words.cc ├── more_words.hh ├── repl.cc ├── test.cc └── values ├── gc.cc ├── gc.hh ├── nan_tagged.hh ├── value.cc └── value.hh /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | tails_test 3 | tails 4 | build_cmake 5 | cmake-build-debug 6 | cmake-build-release 7 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/linenoise"] 2 | path = vendor/linenoise 3 | url = https://github.com/rain-1/linenoise-mob.git 4 | -------------------------------------------------------------------------------- /.idea/editor.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 115 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 18 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/tails.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 9 | 12 | 13 | 15 | 16 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 52 | 53 | 55 | 56 | 57 | 59 | 65 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 144 | 147 | 148 | 149 | 155 | 179 | 180 | 181 | 182 | 184 | 185 | 186 | 187 | 189 | 190 | 191 | 192 | 194 | 195 | 196 | 197 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 1737250555372 211 | 216 | 217 | 218 | 219 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | file://$PROJECT_DIR$/src/core/core_words.cc 231 | 205 232 | 234 | 235 | 236 | 237 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | 3 | project( tails 4 | VERSION 0.1.0 5 | DESCRIPTION "Minimal Forth-like language" 6 | LANGUAGES CXX C 7 | ) 8 | 9 | 10 | set(CMAKE_CXX_STANDARD 20) 11 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 12 | set(CMAKE_C_STANDARD 11) 13 | set(CMAKE_C_STANDARD_REQUIRED ON) 14 | 15 | 16 | #### CONFIG 17 | 18 | 19 | if (NOT(CMAKE_BUILD_TYPE STREQUAL "Debug")) 20 | set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) # Enable link-time optimization 21 | endif() 22 | 23 | 24 | if (MSVC) 25 | # MSVC: 26 | add_definitions(-DWIN32_LEAN_AND_MEAN -D_WIN32_WINNT=0x0A00 -DNOMINMAX) 27 | else() 28 | # Clang & GCC: 29 | add_compile_options( 30 | -Werror 31 | -Wall 32 | -Wpedantic 33 | -Wno-gnu-case-range 34 | -Wno-vla-extension 35 | -Wno-unknown-pragmas 36 | -Wno-unknown-warning-option 37 | ) 38 | if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX) 39 | # GCC-specific: 40 | add_compile_options( 41 | -Wno-psabi # suppress annoying GCC ABI warning 42 | -Wno-sign-compare # apparently comes with `pedantic` in GCC 43 | -D_FORTIFY_SOURCE=2 # static+dynamic buffer-overflow checks 44 | ) 45 | else() 46 | # Clang-specific: 47 | add_compile_options( 48 | -Wno-nullability-extension 49 | -Wno-gnu-zero-variadic-macro-arguments 50 | -Wno-gnu-statement-expression-from-macro-expansion 51 | -Wno-ambiguous-reversed-operator 52 | ) 53 | endif() 54 | endif() 55 | 56 | add_compile_options( 57 | $<$:-fsanitize=address> 58 | $<$:-fsanitize=undefined> 59 | 60 | $<$:-Ofast> # This improves speed, at least w/Clang 16 61 | ) 62 | 63 | add_link_options( 64 | $<$:-fsanitize=address> 65 | ) 66 | 67 | 68 | add_library( libtails STATIC 69 | src/compiler/compiler.cc 70 | src/compiler/parser.cc 71 | src/compiler/vocabulary.cc 72 | src/core/core_words.cc 73 | src/values/gc.cc 74 | src/values/value.cc 75 | src/more_words.cc 76 | ) 77 | 78 | target_include_directories( libtails PUBLIC 79 | src/ 80 | src/compiler/ 81 | src/core/ 82 | src/values/ 83 | ) 84 | 85 | # These flags _should_ speed up the interpreter by removing extra stack manipulation instructions, 86 | # but as of Jan 2025 (M1 MacBook Pro, AppleClang 16) they seem to make no difference: 87 | #target_compile_options(libtails PRIVATE 88 | # $<$:-fomit-frame-pointer> 89 | # $<$:-fno-stack-protector> 90 | #) 91 | 92 | 93 | #### TESTS 94 | 95 | 96 | add_executable( tails_tests 97 | src/test.cc 98 | ) 99 | 100 | target_link_libraries( tails_tests 101 | libtails 102 | ) 103 | 104 | 105 | #### CLI 106 | 107 | 108 | add_executable( tails 109 | src/repl.cc 110 | vendor/linenoise/linenoise.c 111 | vendor/linenoise/utf8.c 112 | ) 113 | 114 | target_include_directories( tails PRIVATE 115 | vendor/linenoise/ 116 | ) 117 | 118 | target_link_libraries( tails 119 | libtails 120 | ) 121 | -------------------------------------------------------------------------------- /Syntax.md: -------------------------------------------------------------------------------- 1 | # Tails Syntax 2 | 3 | The syntax of Tails is quite simple: 4 | 5 | * A line of input is broken into tokens. Tokens are mostly separated by whitespace, but the quote and bracket characters that mark string, array and quotation literals are also token boundaries. 6 | * A token matching a literal _(q.v.)_ pushes that literal value on the stack. 7 | * `IF`, `THEN`, `ELSE`, `BEGIN`, `WHILE`, `REPEAT` are reserved words for control structures, as described below. 8 | * Anything else is looked up as a word (function) in the vocabulary, and evaluates that word. 9 | * If a token doesn't match anything, it's a syntax error. 10 | 11 | > NOTE: All name lookup is **case-insensitive**, at least for ASCII letters. 12 | 13 | 14 | ## 1. Literals 15 | 16 | | Type | Syntax | Examples | 17 | |--------|--------|----------| 18 | | Null | `NULL` | `NULL`, `null`, `Null` | 19 | | Number | Same as C | `42`, `-8`, `0xbeef`, `1.234`, `6.02e23` | 20 | | String | Double-quoted, backslash escape | `"foo"`, `""`, `"\"wow\""` | 21 | | Array | Square brackets, space-separated | `[1 2 NULL "foo"]`, `[]` | 22 | | Quotation | Curly braces; optional stack effect | `{DUP *}`, `{(# # -- #) DUP +}` | 23 | 24 | Quotations are described in more detail below, under **Defining New Words**. 25 | 26 | 27 | ## 2. Built-In Words 28 | 29 | | Name | Inputs | Outputs | Description | 30 | |--------|--------|---------|-------------| 31 | | `DROP` | a | | Remove top value from stack | 32 | | `DUP` | a | a a | Duplicates top value | 33 | | `OVER` | a b | a b a | Copies 2nd value to top | 34 | | `ROT` | a b c | b c a | Moves 3rd value to top | 35 | | `SWAP` | a b | b a | Swaps top two values | 36 | | `+` | a b | c | Addition, or string or array concatenation | 37 | | `-` | # # | # | Subtraction | 38 | | `*` | # # | # | Multiplication | 39 | | `/` | # # | # | Division | 40 | | `=` | a b | # | Outputs 1 if a=b, else 0 | 41 | | `<>` | a b | # | Outputs 1 if a≠b, else 0 | 42 | | `>` | a b | # | Outputs 1 if a>b, else 0 | 43 | | `>=` | a b | # | Outputs 1 if a>b, else 0 | 44 | | `<` | a b | # | Outputs 1 if a\` | a | # | Outputs 1 if a≠0, else 0 | 48 | | `0>` | a | # | Outputs 1 if a>0, else 0 | 49 | | `0<` | a | # | Outputs 1 if a\<0, else 0 | 50 | | `LENGTH`| a | # | Length of string or array | 51 | | `ABS` | # | # | Absolute value | 52 | | `MAX` | a b | max | Maximum of a, b | 53 | | `MIN` | a b | min | Minimum of a, b | 54 | | `.` | a | | Writes text representation of `a` to stdout. | 55 | | `SP.` | | | Writes a space character to stdout. | 56 | | `NL.` | | | Writes a newline to stdout. | 57 | | `NL?` | | | Writes a newline, if necessary to start a new line. | 58 | | `DEFINE`| quote name | | Registers `quote` as a new word named `name`. | 59 | | `CALL` | ... quote| ? | Evaluates a quotation _(can't be used directly yet)_ | 60 | 61 | 62 | ## 3. Control Structures 63 | 64 | ("Truthy" means any value but `0` or `NULL`.) 65 | 66 | | Type | Syntax | Description | 67 | |-------------|---------------|------------| 68 | | Conditional | `IF ... THEN` | `IF` pops a value; if it's truthy, evaluates the words before `THEN`. | 69 | | | `IF ... ELSE ... THEN` | `IF` pops a value; if it's truthy, evaluates the words before `THEN`, else evaluates words before `ELSE`. | 70 | | | `a {...} {...} IFELSE` | Pops 3 params. If `a` is truthy calls the first quote, else calls the second. 71 | | Loop | `BEGIN ... WHILE ... REPEAT` | `WHILE` pops a value, jumps past `REPEAT` if it's zero/null. `REPEAT` jumps back to `BEGIN`. | 72 | | Recursion | `RECURSE` | Calls the current word recursively. | 73 | 74 | 75 | ## 4. Defining New Words 76 | 77 | A quotation is a value that's a function, i.e. a sequence of words to evaluate. The only difference between a quotation and a word is that a word is registered in a vocabulary for the parser to find. 78 | 79 | To define a word, just write a quotation literal, then the name as a string, then invoke `DEFINE`: 80 | 81 | { .... } "name" DEFINE 82 | 83 | or, with a _stack effect_ declaration: 84 | 85 | { (... -- ...) .... } "name" DEFINE 86 | 87 | 88 | ### Stack Effects 89 | 90 | Every word and quotation has a stack effect, which declares the number of input and output values, and optionally their types. The compiler uses this to ensure that the stack doesn't get unbalanced, and can't underflow or overflow, and that there aren't any type mismatches. (Yes, words with variable numbers of inputs or outputs are not allowed.) 91 | 92 | Declaring a stack effect for a quotation is optional; if you don't, the compiler will figure it out on its own by examining the stack effects of the words the quotation calls. But in a quotation that defines a word it's good to declare it up front; both as human readable documentation, and so that the compiler can check your work and give you an error if the actual effect doesn't match the declaration. 93 | 94 | A stack effect declaration follows the opening `{` of a quotation and consists of: 95 | 96 | 1. An open paren `(` 97 | 2. Zero or more tokens, each representing an input value on the stack, with the top of stack on the right. 98 | 3. A separator `--` 99 | 4. Zero or more tokens for output values, with top of stack on the right. 100 | 5. A close paren `)` 101 | 102 | The input/output tokens can contain ASCII letters, underscores, and these punctuation marks that denote specific types, in no particular order: 103 | 104 | | Mark | Type | 105 | |-------------|------| 106 | | `?` | Null | 107 | | `#` | Number | 108 | | `$` | String | 109 | | `[` or `]` | Array | 110 | | `{` or `}` | Quote | 111 | 112 | A token with no punctuation marks represents a value of _any type_. Otherwise it represents only the given type(s). So for example `foo` can be any type, while `[foo]#?` can only be a number, an array or null. 113 | 114 | If **an output token exactly matches an input token**, that declares that at runtime it's _exactly the same type_ as the corresponding input. So for example `SWAP` has a stack effect of `(a b -- b a)` which declares that the types in the output are the reverse of the input types. And `+` is declared `(a#$[] b#$[] -- a#$[])`, indicating that the parameters can be numbers, strings or arrays, and that the output type is the same as the first parameter's type. 115 | -------------------------------------------------------------------------------- /Tails.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Tails.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /Tails.xcodeproj/project.xcworkspace/xcshareddata/WorkspaceSettings.xcsettings: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEWorkspaceSharedSettings_AutocreateContextsIfNeeded 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /Tails.xcodeproj/xcshareddata/xcschemes/REPL.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 43 | 45 | 51 | 52 | 53 | 54 | 60 | 62 | 68 | 69 | 70 | 71 | 73 | 74 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /Tails.xcodeproj/xcshareddata/xcschemes/Tests.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 46 | 48 | 54 | 55 | 56 | 57 | 61 | 62 | 63 | 64 | 70 | 72 | 78 | 79 | 80 | 81 | 83 | 84 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -e 2 | # Super-stupid build script. Feel free to submit a real make / CMake project :) 3 | 4 | echo "Building Tails ..." 5 | cd src 6 | rm -f *.o 7 | 8 | CC=clang 9 | CPP=clang++ 10 | # The following lines let me force GCC, for compatibility checking. --Jens 11 | #CC=/usr/local/bin/gcc-11 12 | #CPP=/usr/local/bin/g++-11 13 | 14 | compile="$CPP -std=c++17 -I . -I core -I values -I compiler -Wall -Wno-sign-compare" 15 | 16 | # Compile core_words.cc with special flags to suppress unnecessary stack frames 17 | $compile -c -O3 -fomit-frame-pointer -fno-stack-check -fno-stack-protector \ 18 | core/core_words.cc more_words.cc 19 | 20 | $compile -c {values,compiler}/*.cc 21 | 22 | echo "Testing..." 23 | $compile *.o test.cc -o ../tails_test 24 | ../tails_test >/dev/null || ../tails_test 25 | 26 | echo "Building 'tails' REPL ..." 27 | $CC -c -I ../vendor/linenoise ../vendor/linenoise/{linenoise,utf8}.c 28 | $compile -c -O3 {values,compiler}/*.cc 29 | $compile -O3 -I ../vendor/linenoise *.o repl.cc -o ../tails 30 | rm *.o 31 | 32 | echo "Done." 33 | -------------------------------------------------------------------------------- /src/compiler/compiler+stackcheck.hh: -------------------------------------------------------------------------------- 1 | // 2 | // compiler+stackcheck.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "core_words.hh" 21 | #include "utils.hh" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | namespace tails { 28 | using namespace std; 29 | using namespace tails::core_words; 30 | 31 | 32 | #pragma mark - EFFECTSTACK 33 | 34 | 35 | /// Simulates the runtime stack at compile time, while verifying stack effects. 36 | class Compiler::EffectStack { 37 | public: 38 | // A stack item can be either a TypeSet (set of types) or a literal Value. 39 | using Item = variant; 40 | 41 | EffectStack(const StackEffect &initial) { 42 | auto inputs = initial.inputs(); 43 | for (auto i = inputs.rbegin(); i != inputs.rend(); ++i) 44 | _stack.emplace_back(*i); 45 | _maxDepth = _initialDepth = depth(); 46 | } 47 | 48 | size_t depth() const {return _stack.size();} 49 | size_t maxGrowth() const {return _maxDepth - _initialDepth;} 50 | 51 | const Item& at(size_t i) const { 52 | assert(i < _stack.size()); 53 | return _stack[_stack.size() - 1 - i]; 54 | } 55 | 56 | optional literalAt(size_t i) const { 57 | if (i < depth()) { 58 | if (auto valP = std::get_if(&at(i)); valP) 59 | return *valP; 60 | } 61 | return nullopt; 62 | } 63 | 64 | bool operator==(const EffectStack &other) const { 65 | return _stack == other._stack; 66 | } 67 | 68 | /// Adds the stack effect of calling a word. Throws an exception on failure. 69 | void add(const Word *word, const StackEffect &effect, const char *sourceCode) { 70 | // Check that the inputs match what's on the stack: 71 | const auto nInputs = effect.inputCount(); 72 | if (nInputs > depth()) 73 | throw compile_error(format("Calling `%s` would underflow (%zu needed, %zu available)", 74 | word->name(), nInputs, depth()), 75 | sourceCode); 76 | int i; 77 | if (auto badType = typeCheck(effect.inputs(), &i); badType) 78 | throw compile_error(format("Type mismatch passing %s to `%s` (depth %i)", 79 | Value::typeName(*badType), word->name(), i), 80 | sourceCode); 81 | 82 | Item inputs[max(nInputs, 1)]; 83 | for (i = 0; i < nInputs; ++i) 84 | inputs[i] = at(i); 85 | 86 | _maxDepth = max(_maxDepth, depth() + effect.max()); 87 | 88 | // Pop the inputs off the stack: 89 | _stack.resize(depth() - nInputs); 90 | 91 | // Push the outputs to the stack: 92 | for (int i = effect.outputCount() - 1; i >= 0; --i) { 93 | TypeSet ef = effect.outputs()[i]; 94 | if (auto in = ef.inputMatch(); in >= 0) 95 | _stack.emplace_back(inputs[in]); 96 | else 97 | _stack.emplace_back(ef); 98 | } 99 | } 100 | 101 | /// Pushes a literal to the stack. 102 | void add(Value value) { 103 | _stack.emplace_back(value); 104 | _maxDepth = max(_maxDepth, depth()); 105 | } 106 | 107 | /// Inserts a type at the _bottom_ of the stack -- used if deducing the input effect. 108 | void addAtBottom(TypeSet entry) { 109 | _stack.insert(_stack.begin(), entry); 110 | _maxDepth = max(_maxDepth, depth()); 111 | } 112 | 113 | /// Merges myself with another stack -- used when two flows of control join. 114 | void mergeWith(const EffectStack &other, const char *sourceCode) { 115 | size_t d = depth(); 116 | if (d != other.depth()) 117 | throw compile_error("Inconsistent stack depth", sourceCode); 118 | for (size_t i = 0; i < d; ++i) { 119 | Item &mine = _stack[_stack.size() - 1 - i]; 120 | const Item others = other.at(i); 121 | if (others != mine) 122 | mine = itemTypes(mine) | itemTypes(others); 123 | } 124 | } 125 | 126 | /// Checks whether the current stack matches a StackEffect's outputs. 127 | /// if `canAddOutputs` is true, extra items on the stack will be added to the effect. 128 | void checkOutputs(StackEffect &effect, bool canAddOutputs) const { 129 | const auto nOutputs = effect.outputCount(); 130 | const auto myDepth = depth(); 131 | if (nOutputs > myDepth) 132 | throw compile_error(format("Insufficient outputs: have %zu, declared %zu", 133 | myDepth, nOutputs), nullptr); 134 | // Check effect outputs against stack: 135 | int i; 136 | if (auto badType = typeCheck(effect.outputs(), &i); badType) 137 | throw compile_error(format("Output type mismatch: can't be %s (depth %d)", 138 | Value::typeName(*badType), i), nullptr); 139 | 140 | // Add extra stack items to effect, if allowed: 141 | for (i = nOutputs; i < myDepth; ++i) { 142 | if (!canAddOutputs) 143 | throw compile_error(format("Too many outputs: have %zu, declared %zu", 144 | myDepth, nOutputs), nullptr); 145 | auto entry = itemTypes(at(i)); 146 | effect.addOutputAtBottom(entry); 147 | } 148 | } 149 | 150 | private: 151 | static TypeSet itemTypes(const Item &item) { 152 | if (auto valP = std::get_if(&item); valP) 153 | return TypeSet(valP->type()); 154 | else 155 | return std::get(item); 156 | } 157 | 158 | /// Checks if the stack items all match the allowed TypesView; 159 | /// if not, returns one of the invalid types. 160 | std::optional typeCheck(TypesView types, int *outStackIndex) const { 161 | for (int i = 0; i < types.size(); ++i) { 162 | TypeSet badTypes = itemTypes(at(i)) - types[i]; 163 | if (auto badType = badTypes.firstType(); badType) { 164 | *outStackIndex = i; 165 | return badType; 166 | } 167 | } 168 | return std::nullopt; 169 | } 170 | 171 | std::vector _stack; 172 | size_t _initialDepth = 0; 173 | size_t _maxDepth = 0; 174 | }; 175 | 176 | 177 | #pragma mark - SOURCEWORD: 178 | 179 | 180 | /// Extension of WordRef that adds private fields used by the compiler. 181 | struct Compiler::SourceWord : public Compiler::WordRef { 182 | SourceWord(const WordRef &ref, const char *source =nullptr) 183 | :WordRef(ref) 184 | ,sourceCode(source) 185 | { } 186 | 187 | void branchesTo(InstructionPos pos) { 188 | branchTo = pos; 189 | pos->isBranchDestination = true; 190 | } 191 | 192 | const char* sourceCode; // Points to source code where word appears 193 | std::optional knownStack; // Stack effect at this point, once known 194 | std::optional branchTo; // Points to where a branch goes 195 | int pc; // Relative address during code-gen 196 | const Word* interpWord = nullptr; // Which INTERP-family word to use 197 | bool isBranchDestination = false; // True if a branch points here 198 | }; 199 | 200 | 201 | #pragma mark - COMPILER STACK CHECKER: 202 | 203 | 204 | // Computes the stack effect of the word, throwing if it's inconsistent. 205 | void Compiler::computeEffect() { 206 | computeEffect(_words.begin(), EffectStack(_effect)); 207 | } 208 | 209 | 210 | // Subroutine that traces control flow, memoizing stack effects at each instruction. 211 | // @param i The item in `_words` to start at 212 | // @param curStack The known stack before the word at `i` 213 | // @param finalEffect The cumulative stack effect will be stored here. 214 | // @throw compile_error if stack is inconsistent or there's an invalid branch offset. 215 | void Compiler::computeEffect(InstructionPos i, EffectStack curStack) 216 | { 217 | while (true) { 218 | assert(i != _words.end()); 219 | // Store (memoize) the current stack at i, or verify it matches a previously stored one: 220 | if (i->knownStack) { 221 | if (*i->knownStack == curStack) 222 | return; // Nothing to do: already handled this control flow + types 223 | else 224 | curStack.mergeWith(*i->knownStack, i->sourceCode); 225 | } 226 | i->knownStack = curStack; 227 | 228 | // apply the instruction's effect: 229 | if (i->word == &_LITERAL) { 230 | // A literal, just push it 231 | curStack.add(i->param.literal); 232 | } else { 233 | // Determine the effect of a word: 234 | StackEffect nextEffect = i->word->stackEffect(); 235 | if (nextEffect.isWeird()) { 236 | if (i->word == &_RECURSE) { 237 | if (_effectCanAddInputs || _effectCanAddOutputs) 238 | throw compile_error("RECURSE requires an explicit stack effect declaration", 239 | i->sourceCode); 240 | nextEffect = _effect; 241 | if (!returnsImmediately(next(i))) { 242 | if (_flags & Word::Inline) 243 | throw compile_error("Illegal recursion in an inline word", 244 | i->sourceCode); 245 | nextEffect = nextEffect.withUnknownMax(); // non-tail recursion 246 | } 247 | } else if (i->word == &IFELSE) { 248 | nextEffect = effectOfIFELSE(i, curStack); 249 | } else { 250 | throw compile_error("Oops, don't know word's stack effect", i->sourceCode); 251 | } 252 | } 253 | 254 | if (_effectCanAddInputs) { 255 | // We are parsing code with unknown inputs, i.e. a quotation. If the word being 256 | // called takes more inputs than are on the stack, make them inputs of this code. 257 | const auto nInputs = nextEffect.inputCount(); 258 | auto nAvailable = curStack.depth(); 259 | for (auto i = int(nAvailable); i < nInputs; ++i) { 260 | auto entry = nextEffect.inputs()[i]; 261 | curStack.addAtBottom(entry); 262 | _effect.addInputAtBottom(entry); 263 | } 264 | } 265 | 266 | // apply the word's effect: 267 | curStack.add(i->word, nextEffect, i->sourceCode); 268 | } 269 | 270 | if (i->word == &_RETURN) { 271 | // The stack when RETURN is reached determines the word's output effect. 272 | curStack.checkOutputs(_effect, _effectCanAddOutputs); 273 | _effectCanAddOutputs = false; 274 | if (curStack.maxGrowth() > _effect.max()) 275 | _effect = _effect.withMax(int(curStack.maxGrowth())); 276 | return; 277 | 278 | } else if (i->word == &_BRANCH || i->word == &_ZBRANCH) { 279 | assert(i->branchTo); 280 | // If this is a 0BRANCH, recurse to follow the non-branch case too: 281 | if (i->word == &_ZBRANCH) 282 | computeEffect(next(i), curStack); 283 | 284 | // Follow the branch: 285 | i = *i->branchTo; 286 | 287 | } else { 288 | // Continue to next instruction: 289 | ++i; 290 | } 291 | } 292 | } 293 | 294 | 295 | StackEffect Compiler::effectOfIFELSE(InstructionPos pos, EffectStack &curStack) { 296 | // Special case for IFELSE, which has a non-constant stack effect. 297 | // The two top stack items must be literal quotation values (not just types): 298 | auto getQuoteEffect = [&](int i) { 299 | if (auto valP = curStack.literalAt(i); valP) { 300 | if (auto quote = valP->asQuote(); quote) 301 | return quote->stackEffect(); 302 | } 303 | throw compile_error("IFELSE must be preceded by two quotations", pos->sourceCode); 304 | }; 305 | StackEffect a = getQuoteEffect(1), b = getQuoteEffect(0); 306 | 307 | // Check if the quotations' effects are compatible, and merge them: 308 | if (a.net() != b.net()) 309 | throw compile_error("IFELSE quotes have inconsistent stack depths", nullptr); 310 | 311 | StackEffect result = a; 312 | 313 | for (int i = 0; i < b.inputCount(); i++) { 314 | auto entry = b.inputs()[i]; 315 | if (i < a.inputCount()) { 316 | entry = entry & result.inputs()[i]; 317 | if (!entry) 318 | throw compile_error(format("IFELSE quotes have incompatible parameter #%d", i), 319 | pos->sourceCode); 320 | result.inputs()[i] = entry; 321 | } else { 322 | result.addInput(entry); 323 | } 324 | } 325 | 326 | for (int i = 0; i < b.outputCount(); i++) { 327 | auto entry = b.outputs()[i]; 328 | if (i < a.outputCount()) { 329 | result.outputs()[i] = result.outputs()[i] | entry; 330 | } else { 331 | result.addOutput(entry); 332 | } 333 | } 334 | 335 | // Add the inputs of IFELSE itself -- the test and quotes: 336 | result.addInput(TypeSet::anyType()); 337 | result.addInput(TypeSet(Value::AQuote)); 338 | result.addInput(TypeSet(Value::AQuote)); 339 | 340 | return result.withMax( max(0, max(a.max(), b.max()) - 3) ); 341 | } 342 | 343 | } 344 | -------------------------------------------------------------------------------- /src/compiler/compiler.cc: -------------------------------------------------------------------------------- 1 | // 2 | // compiler.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "compiler.hh" 20 | #include "compiler+stackcheck.hh" 21 | #include "disassembler.hh" 22 | #include "core_words.hh" 23 | #include "stack_effect_parser.hh" 24 | #include "utils.hh" 25 | #include "vocabulary.hh" 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | 32 | namespace tails { 33 | using namespace std; 34 | using namespace tails::core_words; 35 | 36 | 37 | #pragma mark - COMPILEDWORD: 38 | 39 | 40 | CompiledWord::CompiledWord(string &&name, StackEffect effect, vector &&instrs) 41 | :_nameStr(toupper(name)) 42 | ,_instrs(std::move(instrs)) 43 | { 44 | _effect = effect; 45 | _instr = &_instrs.front(); 46 | if (!_nameStr.empty()) { 47 | _name = _nameStr.c_str(); 48 | Compiler::activeVocabularies.current()->add(*this); 49 | } 50 | } 51 | 52 | 53 | CompiledWord::CompiledWord(Compiler &&compiler) 54 | :CompiledWord(std::move(compiler._name), {}, compiler.generateInstructions()) 55 | { 56 | // Compiler's flags & effect are not valid until after generateInstructions(), above. 57 | assert((compiler._flags & ~(Word::Inline | Word::Recursive | Word::Magic)) == 0); 58 | _flags = compiler._flags; 59 | _effect = compiler._effect; 60 | } 61 | 62 | 63 | CompiledWord::CompiledWord(const CompiledWord &word, std::string &&name) 64 | :CompiledWord(std::move(name), word.stackEffect(), vector(word._instrs)) 65 | { 66 | _flags = word._flags; 67 | } 68 | 69 | 70 | #pragma mark - COMPILER: 71 | 72 | 73 | VocabularyStack Compiler::activeVocabularies; 74 | 75 | 76 | Compiler::Compiler() { 77 | assert(activeVocabularies.current() != nullptr); 78 | _words.push_back({NOP}); 79 | } 80 | 81 | 82 | Compiler::~Compiler() = default; 83 | 84 | 85 | void Compiler::setInputStack(const Value *bottom, const Value *top) { 86 | _effect = StackEffect(); 87 | if (bottom && top) { 88 | for (auto vp = bottom; vp <= top; ++vp) 89 | _effect.addInput(TypeSet(vp->type())); 90 | } 91 | _effectCanAddInputs = false; 92 | _effectCanAddOutputs = true; 93 | } 94 | 95 | 96 | CompiledWord Compiler::compile(std::initializer_list words) { 97 | Compiler compiler; 98 | for (auto &ref : words) 99 | compiler.add(ref); 100 | return std::move(compiler).finish(); 101 | } 102 | 103 | 104 | Compiler::InstructionPos Compiler::add(const WordRef &ref, const char *source) { 105 | auto i = prev(_words.end()); 106 | bool isDst = i->isBranchDestination; 107 | *i = SourceWord(ref, source); 108 | i->isBranchDestination = isDst; // preserve this flag 109 | 110 | _words.push_back({NOP}); 111 | return i; 112 | } 113 | 114 | 115 | void Compiler::addInline(const Word &word, const char *source) { 116 | if (word.isNative()) { 117 | add({word}); 118 | } else { 119 | Disassembler dis(word.instruction().word); 120 | while (true) { 121 | WordRef ref = dis.next(); 122 | if (ref.word == &_RETURN) 123 | break; 124 | add(ref, source); 125 | } 126 | } 127 | } 128 | 129 | 130 | void Compiler::addRecurse() { 131 | add({_RECURSE, intptr_t(-1)})->branchesTo(_words.begin()); 132 | } 133 | 134 | 135 | void Compiler::addBranchBackTo(InstructionPos pos) { 136 | add({_BRANCH, intptr_t(-1)})->branchesTo(pos); 137 | } 138 | 139 | 140 | void Compiler::fixBranch(InstructionPos src) { 141 | src->branchesTo(prev(_words.end())); 142 | } 143 | 144 | 145 | /// Adds a branch instruction (unless `branch` is NULL) 146 | /// and pushes its location onto the control-flow stack. 147 | void Compiler::pushBranch(char identifier, const Word *branch) { 148 | InstructionPos branchRef; 149 | if (branch) 150 | branchRef = add({*branch, intptr_t(-1)}, _curToken.data()); 151 | else 152 | branchRef = prev(_words.end()); // Will point to next word to be added 153 | _controlStack.push_back({identifier, branchRef}); 154 | } 155 | 156 | /// Pops the control flow stack, checks that the popped identifier matches, 157 | /// and returns the address of its branch instruction. 158 | Compiler::InstructionPos Compiler::popBranch(const char *matching) { 159 | if (!_controlStack.empty()) { 160 | auto ctrl = _controlStack.back(); 161 | if (strchr(matching, ctrl.first)) { 162 | _controlStack.pop_back(); 163 | return ctrl.second; 164 | } 165 | } 166 | throw compile_error("no matching IF or WHILE", _curToken.data()); 167 | } 168 | 169 | 170 | // Returns true if this instruction a RETURN, or a BRANCH to a RETURN. 171 | bool Compiler::returnsImmediately(Compiler::InstructionPos pos) { 172 | if (pos->word == &_BRANCH) 173 | return returnsImmediately(*pos->branchTo); 174 | else 175 | return (pos->word == &_RETURN); 176 | } 177 | 178 | 179 | vector Compiler::generateInstructions() { 180 | if (!_controlStack.empty()) 181 | throw compile_error("Unfinished IF-ELSE-THEN or BEGIN-WHILE-REPEAT)", nullptr); 182 | 183 | // Add a RETURN, replacing the "next word" placeholder: 184 | assert(_words.back().word == &NOP); 185 | _words.back() = {_RETURN}; 186 | 187 | // Compute the stack effect and do type-checking: 188 | computeEffect(); 189 | 190 | // Assign a PC offset to each instruction, and do some optimizations: 191 | int interpCount = 0; 192 | InstructionPos firstInterp; 193 | bool afterBranch = false; 194 | int pc = 0; 195 | for (auto i = _words.begin(); i != _words.end();) { 196 | if (afterBranch && !i->isBranchDestination) { 197 | // Unreachable instruction after a branch 198 | i = _words.erase(i); 199 | continue; 200 | } 201 | 202 | i->pc = pc; 203 | if (i->word->isNative()) { 204 | if (i->word == &_RECURSE) { 205 | // Detect tail recursion: Change RECURSE to BRANCH if it's followed by RETURN: 206 | if (returnsImmediately(next(i))) 207 | i->word = &_BRANCH; 208 | else 209 | _flags = Word::Flags(_flags | Word::Recursive); 210 | } 211 | if (auto dst = i->branchTo; dst) { 212 | // Follow chains of branches: 213 | while ((*dst)->word == &_BRANCH) 214 | dst = (*dst)->branchTo; 215 | i->branchTo = dst; 216 | } 217 | // Note: We could optimize a BRANCH to RETURN into a RETURN; but currently we use 218 | // RETURN as an end-of-word marker, so it can only appear at the end of a word. 219 | interpCount = 0; 220 | pc += i->word->parameters(); 221 | } else { 222 | // In a series of 1 or more interpreted words, set the _first_ one's `interpWord` to 223 | // the appropriate word. As more words are found it's changed from INTERP to INTERP2 224 | // etc.; and if the final one is followed by RETURN it's changed to the matching 225 | // TAILINTERP word. 226 | if (interpCount == 0 || interpCount >= kMaxInterp || i->isBranchDestination) { 227 | interpCount = 0; 228 | firstInterp = i; 229 | pc += 1; 230 | } 231 | bool isTail = returnsImmediately(next(i)); 232 | firstInterp->interpWord = kInterpWords[isTail][interpCount]; 233 | ++interpCount; 234 | } 235 | afterBranch = (i->word == &_BRANCH); 236 | ++pc; 237 | ++i; 238 | } 239 | 240 | // Assemble `_words` into a series of instructions: 241 | vector instrs; 242 | instrs.reserve(pc); 243 | for (auto i = _words.begin(); i != _words.end(); ++i) { 244 | if (i->word->isNative()) { 245 | // Add a native word. If it's a branch, compute its PC offset. Then add any param: 246 | instrs.push_back(*i->word); 247 | if (i->branchTo) 248 | i->param.offset = (*i->branchTo)->pc - i->pc - 2; 249 | if (i->word->parameters()) 250 | instrs.push_back(i->param); 251 | } else { 252 | // The first of a series of interpreted words will have `interpWord` set to the 253 | // appropriate INTERP-family native word, so emit it: 254 | if (i->interpWord) 255 | instrs.push_back(*i->interpWord); 256 | // For each interpreted word add its word as a parameter: 257 | instrs.push_back(*i->word); 258 | } 259 | } 260 | assert(instrs.size() == pc); 261 | return instrs; 262 | } 263 | 264 | 265 | CompiledWord Compiler::finish() && { 266 | return CompiledWord(std::move(*this)); 267 | // the CompiledWord constructor will call generateInstructions() 268 | } 269 | 270 | 271 | #pragma mark WORDS: 272 | 273 | 274 | namespace core_words { 275 | 276 | NATIVE_WORD(DEFINE, "DEFINE", "{code} $name -- "_sfx) { 277 | auto name = sp[0].asString(); 278 | auto quote = (const CompiledWord*)sp[-1].asQuote(); 279 | sp -= 2; 280 | new CompiledWord(*quote, string(name)); 281 | NEXT(); 282 | } 283 | 284 | } 285 | } 286 | -------------------------------------------------------------------------------- /src/compiler/compiler.hh: -------------------------------------------------------------------------------- 1 | // 2 | // compiler.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "word.hh" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | 28 | namespace tails { 29 | 30 | class Compiler; 31 | class VocabularyStack; 32 | 33 | namespace core_words { 34 | extern const Word _LITERAL; 35 | } 36 | 37 | 38 | /// A subclass of Word that manages storage of its name and instructions, so it can be 39 | /// created at runtime. 40 | class CompiledWord : public Word { 41 | public: 42 | CompiledWord(std::string &&name, StackEffect effect, std::vector &&instrs); 43 | 44 | /// Constructs a word from a compiler. Call this instead of Compiler::finish. 45 | explicit CompiledWord(Compiler&&); 46 | 47 | /// Copies a CompiledWord, adding a name. 48 | CompiledWord(const CompiledWord&, std::string &&name); 49 | 50 | private: 51 | std::string const _nameStr; // Backing store for inherited _name 52 | std::vector const _instrs {}; // Backing store for inherited _instr 53 | }; 54 | 55 | 56 | class compile_error : public std::runtime_error { 57 | public: 58 | compile_error(const char *msg, const char *loc) :runtime_error(msg), location(loc) { } 59 | compile_error(const std::string &msg, const char *loc) :runtime_error(msg), location(loc) { } 60 | 61 | const char *const location; 62 | }; 63 | 64 | 65 | /// An object that assembles an interpreted word from a list of words to call. 66 | /// It computes and validates the word's stack effect. 67 | class Compiler { 68 | public: 69 | class EffectStack; 70 | 71 | /// A reference to a word and its parameter (if any), used during compilation. 72 | struct WordRef { 73 | WordRef(const Word &w) :word(&w), param((Op)0) {assert(!w.parameters());} 74 | WordRef(const Word &w, Instruction p):word(&w), param(p) {assert(w.parameters() > 0);} 75 | WordRef(const Word &w, Value v) :word(&w), param(v) {assert(w.parameters() > 0);} 76 | WordRef(const Word &w, intptr_t o) :word(&w), param(o) {assert(w.parameters() > 0);} 77 | 78 | WordRef(Value v) :WordRef(core_words::_LITERAL, v) { } 79 | WordRef(double d) :WordRef(core_words::_LITERAL, Value(d)) { } 80 | 81 | bool hasParam() const {return word->parameters() || !word->isNative();} 82 | 83 | const Word* word; // The word (interpreted or native) 84 | Instruction param; // Optional parameter, if it has one 85 | 86 | private: 87 | friend class Compiler; 88 | WordRef() :param(intptr_t(0)) { } 89 | }; 90 | 91 | Compiler(); 92 | explicit Compiler(std::string name) :Compiler() {_name = std::move(name);} 93 | ~Compiler(); 94 | 95 | Compiler(const Compiler&) = delete; 96 | Compiler(Compiler&&); 97 | 98 | /// Declares what the word's stack effect must be. 99 | /// If the actual stack effect (computed during \ref finish) is different, a 100 | /// compile error is thrown. 101 | /// @param effect The stack effect. 102 | /// @param canAddInputs If true, additional inputs are allowed (the words can reach 103 | /// deeper into the stack) and the effect will be updated accordingly. 104 | /// @param canAddOutputs If true, additional outputs are allowed (more values may be left 105 | /// on the stack) and the effect will be updated accordingly. 106 | 107 | void setStackEffect(const StackEffect &effect, 108 | bool canAddInputs = false, 109 | bool canAddOutputs = false) 110 | { 111 | _effect = effect; 112 | _effectCanAddInputs = canAddInputs; 113 | _effectCanAddOutputs = canAddOutputs; 114 | } 115 | 116 | /// Sets the word's input stack effect from the given actual stack. The output effect is TBD. 117 | void setInputStack(const Value *bottom, const Value *top); 118 | 119 | void setInline() {_flags = Word::Flags(_flags | Word::Inline);} 120 | 121 | /// Breaks the input string into words and adds them. 122 | void parse(const std::string &input); 123 | 124 | //---- Adding individual words: 125 | 126 | struct SourceWord; 127 | /// A reference to a WordRef added to the Compiler. 128 | using InstructionPos = std::list::iterator; 129 | 130 | /// Adds an instruction to a word being compiled. 131 | /// @return An opaque reference to this instruction, that can be used later to fix branches. 132 | InstructionPos add(const WordRef&, const char *source =nullptr); 133 | 134 | /// Adds a word by inlining its definition, if it's interpreted. Native words added normally. 135 | void addInline(const Word&, const char *source); 136 | 137 | void addBranchBackTo(InstructionPos); 138 | 139 | void addRecurse(); 140 | 141 | /// Updates a previously-written `BRANCH` or `ZBRANCH` instruction, to branch to the 142 | /// next instruction to be written. 143 | /// @param src The branch instruction to update. 144 | void fixBranch(InstructionPos src); 145 | 146 | //---- Creating the CompiledWord: 147 | 148 | /// Finishes a word being compiled. Adds a RETURN instruction, and registers it with the 149 | /// global Vocabulary (unless it's unnamed.) 150 | /// The Compiler object should not be used any more after this is called. 151 | CompiledWord finish() &&; 152 | 153 | /// Creates a finished, anonymous CompiledWord from a list of word references. 154 | /// (Mostly just for tests.) 155 | static CompiledWord compile(std::initializer_list words); 156 | 157 | //---- Vocabularies 158 | 159 | /// The vocabularies the parser looks up words from 160 | static VocabularyStack activeVocabularies; 161 | 162 | private: 163 | friend class CompiledWord; 164 | 165 | using BranchTarget = std::pair; 166 | 167 | std::vector generateInstructions(); 168 | const char* parse(const char *input); 169 | Value parseString(std::string_view token); 170 | Value parseArray(const char* &input); 171 | Value parseQuote(const char* &input); 172 | void pushBranch(char identifier, const Word *branch =nullptr); 173 | InstructionPos popBranch(const char *matching); 174 | bool returnsImmediately(InstructionPos); 175 | void computeEffect(); 176 | void computeEffect(InstructionPos i, 177 | EffectStack stack); 178 | StackEffect effectOfIFELSE(InstructionPos, EffectStack&); 179 | 180 | std::string _name; 181 | Word::Flags _flags {}; 182 | std::list _words; 183 | StackEffect _effect; 184 | bool _effectCanAddInputs = true; 185 | bool _effectCanAddOutputs = true; 186 | std::string_view _curToken; 187 | std::vector _controlStack; 188 | }; 189 | 190 | } 191 | -------------------------------------------------------------------------------- /src/compiler/disassembler.hh: -------------------------------------------------------------------------------- 1 | // 2 | // disassembler.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "compiler.hh" 21 | #include "core_words.hh" 22 | #include "word.hh" 23 | #include "vocabulary.hh" 24 | 25 | namespace tails { 26 | using namespace std; 27 | 28 | class Disassembler { 29 | public: 30 | Disassembler(const Instruction *pc) :_pc(pc) { } 31 | 32 | void setLiteral(bool literal) {_literal = literal;} 33 | 34 | explicit operator bool() const {return _pc != nullptr;} 35 | 36 | std::optional _next() { 37 | assert(_pc); 38 | const Word *word = Compiler::activeVocabularies.lookup(*_pc++); 39 | if (!_literal && word && word->hasWordParams()) 40 | word = Compiler::activeVocabularies.lookup(*_pc++); 41 | if (!word) 42 | return nullopt; 43 | else if (word->parameters()) 44 | return Compiler::WordRef(*word, *_pc++); 45 | else { 46 | if (word == &core_words::_RETURN) 47 | _pc = nullptr; 48 | return Compiler::WordRef(*word); 49 | } 50 | } 51 | 52 | Compiler::WordRef next() { 53 | if (auto ref = _next(); ref) 54 | return *ref; 55 | throw runtime_error("Unknown instruction"); 56 | } 57 | 58 | 59 | static Compiler::WordRef wordOrParamAt(const Instruction *instr) { 60 | Disassembler dis(instr); 61 | if (auto word = dis._next(); word) 62 | return *word; 63 | dis = Disassembler(instr - 1); 64 | if (auto prev = dis.next(); prev.word->parameters()) 65 | return prev; 66 | else 67 | throw runtime_error("Unknown instruction"); 68 | } 69 | 70 | 71 | static vector disassembleWord(const Instruction *instr, 72 | bool literal = false) 73 | { 74 | Disassembler dis(instr); 75 | dis.setLiteral(literal); 76 | vector instrs; 77 | while (dis) 78 | instrs.push_back(dis.next()); 79 | return instrs; 80 | } 81 | 82 | 83 | private: 84 | const Instruction* _pc; 85 | bool _literal = false; 86 | }; 87 | 88 | } 89 | -------------------------------------------------------------------------------- /src/compiler/parser.cc: -------------------------------------------------------------------------------- 1 | // 2 | // parser.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "compiler.hh" 20 | #include "core_words.hh" 21 | #include "stack_effect_parser.hh" 22 | #include "vocabulary.hh" 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | 29 | namespace tails { 30 | using namespace std; 31 | using namespace tails::core_words; 32 | 33 | 34 | static void skipWhitespace(const char* &input) { 35 | while (*input != 0 && isspace(*input)) 36 | ++input; 37 | } 38 | 39 | static char peek(const char* &input) { 40 | skipWhitespace(input); 41 | return *input; 42 | } 43 | 44 | static bool match(string_view token, string_view str) { 45 | return token.size() == str.size() 46 | && strncasecmp(token.data(), str.data(), token.size()) == 0; 47 | } 48 | 49 | /// Skips whitespace, then reads & returns the next token: 50 | /// * an empty string at EOF; 51 | /// * a string literal, starting and ending with double-quotes; 52 | /// * a "{" or "}"; 53 | /// * a "[" or "]"; 54 | /// * else the largest number of consecutive non-whitespace non-closing-brace characters. 55 | static string_view readToken(const char* &input) { 56 | skipWhitespace(input); 57 | auto start = input; 58 | switch (*input) { 59 | case 0: 60 | // EOF: 61 | break; 62 | case '"': 63 | // String literal: 64 | do { 65 | ++input; 66 | //TODO: Handle escape sequences 67 | } while (*input != 0 && *input != '"'); 68 | if (*input) 69 | ++input; // include the trailing quote 70 | break; 71 | case '(': 72 | case '{': 73 | case '[': 74 | // Open array or quotation -- just return the single delimiter character 75 | ++input; 76 | break; 77 | default: 78 | // General token: read until next whitespace or closing brace/bracket: 79 | do { 80 | ++input; 81 | } while (*input != 0 && !isspace(*input) 82 | && *input != ')'&& *input != '}' && *input != ']'); 83 | break; 84 | } 85 | return {start, size_t(input - start)}; 86 | } 87 | 88 | 89 | /// Tries to parse `token` as an integer (decimal or hex) or floating-point number. 90 | /// Returns `nullopt` if it's not. Throws `compile_error` if it's an out-of-range number. 91 | static optional asNumber(string_view token) { 92 | try { 93 | size_t pos; 94 | double d = stod(string(token), &pos); 95 | if (pos == token.size() && !isnan(d) && !isinf(d)) 96 | return d; 97 | } catch (const std::out_of_range&) { 98 | throw compile_error("Number out of range", token.data()); 99 | } catch (const std::invalid_argument&) { 100 | // if invalid number, just return nullopt 101 | } 102 | return nullopt; 103 | } 104 | 105 | 106 | void Compiler::parse(const string &input) { 107 | const char *remainder = parse(input.c_str()); 108 | if (*remainder != 0) 109 | throw compile_error("Unexpected delimiter; expected end of input", remainder); 110 | } 111 | 112 | 113 | const char* Compiler::parse(const char *input) { 114 | while (true) { 115 | string_view token = _curToken = readToken(input); 116 | const char *sourcePos = token.data(); 117 | if (token.empty()) { 118 | // End of input 119 | break; 120 | 121 | } else if (token == "}") { 122 | // end of a nested word (quotation). Exit, but don't consume the '}'. 123 | --input; 124 | break; 125 | 126 | } else if (token[0] == '"') { 127 | // String literal: 128 | add({_LITERAL, parseString(token)}, sourcePos); 129 | 130 | } else if (token == "[") { 131 | add({_LITERAL, parseArray(input)}, token.data()); 132 | 133 | } else if (token == "{") { 134 | add({_LITERAL, parseQuote(input)}, token.data()); 135 | 136 | } else if (match(token, "IF")) { 137 | // IF compiles into 0BRANCH, with offset TBD: 138 | pushBranch('i', &_ZBRANCH); 139 | 140 | } else if (match(token, "ELSE")) { 141 | // ELSE compiles into BRANCH, with offset TBD, and resolves the IF's branch: 142 | auto ifPos = popBranch("i"); 143 | pushBranch('e', &_BRANCH); 144 | fixBranch(ifPos); 145 | 146 | } else if (match(token, "THEN")) { 147 | // THEN generates no code but completes the remaining branch from IF or ELSE: 148 | auto ifPos = popBranch("ie"); 149 | fixBranch(ifPos); 150 | 151 | } else if (match(token, "BEGIN")) { 152 | // BEGIN generates no code but remembers the current address: 153 | pushBranch('b'); 154 | 155 | } else if (match(token, "WHILE")) { 156 | // IF compiles into 0BRANCH, with offset TBD: 157 | if (_controlStack.empty() || _controlStack.back().first != 'b') 158 | throw compile_error("no matching BEGIN for this WHILE", sourcePos); 159 | pushBranch('w', &_ZBRANCH); 160 | 161 | } else if (match(token, "REPEAT")) { 162 | // REPEAT generates a BRANCH back to the BEGIN's position, 163 | // and fixes up the WHILE to point to the next instruction: 164 | auto whilePos = popBranch("w"); 165 | auto beginPos = popBranch("b"); 166 | addBranchBackTo(beginPos); 167 | fixBranch(whilePos); 168 | 169 | } else if (match(token, "RECURSE")) { 170 | addRecurse(); 171 | 172 | } else if (const Word *word = Compiler::activeVocabularies.lookup(token); word) { 173 | // Known word is added as an instruction: 174 | if (word->isMagic()) 175 | throw compile_error("Special word " + string(token) 176 | + " cannot be added by parser", sourcePos); 177 | if (word->parameters()) { 178 | assert(word->parameters() == 1); 179 | auto numTok = readToken(input); 180 | auto param = asNumber(numTok); 181 | if (!param || (*param != intptr_t(*param))) 182 | throw compile_error("Invalid param after " + string(token), numTok.data()); 183 | if (word->hasIntParams()) 184 | add({*word, (intptr_t)*param}, sourcePos); 185 | else 186 | add({*word, Value(*param)}, sourcePos); 187 | } else if (word->hasFlag(Word::Inline)) { 188 | addInline(*word, sourcePos); 189 | } else { 190 | add(*word, sourcePos); 191 | } 192 | 193 | } else if (auto np = asNumber(token); np) { 194 | // A number is added as a LITERAL instruction: 195 | add({_LITERAL, Value(*np)}, sourcePos); 196 | 197 | } else { 198 | throw compile_error("Unknown word '" + string(token) + "'", sourcePos); 199 | } 200 | } 201 | _curToken = {}; 202 | return input; 203 | } 204 | 205 | 206 | Value Compiler::parseString(string_view token) { 207 | if (token.size() == 1 || token[token.size()-1] != '"') 208 | throw compile_error("Unfinished string literal", token.end()); 209 | token = token.substr(1, token.size() - 2); 210 | return Value(token.data(), token.size()); 211 | } 212 | 213 | 214 | Value Compiler::parseArray(const char* &input) { 215 | Value arrayVal({}); 216 | std::vector *array = arrayVal.asArray(); 217 | while (true) { 218 | string_view token = readToken(input); 219 | if (token == "]") 220 | break; 221 | else if (token.empty()) 222 | throw compile_error("Unfinished array literal", input); 223 | else if (token[0] == '"') 224 | array->push_back(parseString(token)); 225 | else if (token == "[") 226 | array->push_back(parseArray(input)); 227 | else if (auto np = asNumber(token); np) 228 | array->push_back(Value(*np)); 229 | else 230 | throw compile_error("Invalid literal '" + string(token) + "' in array", token.data()); 231 | } 232 | return arrayVal; 233 | } 234 | 235 | 236 | Value Compiler::parseQuote(const char* &input) { 237 | Compiler quoteCompiler; 238 | // Check if there's a stack effect declaration: 239 | if (peek(input) == '(') { 240 | const char *start = input + 1; 241 | do { 242 | ++input; 243 | if (*input == 0) 244 | throw compile_error("Missing ') to end quotation stack effect'", input); 245 | } while (*input != ')'); 246 | quoteCompiler.setStackEffect(parseStackEffect(start, input)); 247 | ++input; 248 | } 249 | 250 | // parse tokens to a new Word until the ']' delimiter: 251 | input = quoteCompiler.parse(input); 252 | if (*input != '}') 253 | throw compile_error("Missing '}'; unfinished quotation", input); 254 | ++input; 255 | 256 | return Value(new CompiledWord(std::move(quoteCompiler))); 257 | } 258 | 259 | } 260 | -------------------------------------------------------------------------------- /src/compiler/stack_effect_parser.hh: -------------------------------------------------------------------------------- 1 | // 2 | // stack_effect_parser.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "stack_effect.hh" 20 | 21 | namespace tails { 22 | 23 | /// Compile-time stack-effect parser. This lets you represent stack effects as strings in 24 | /// Tails syntax, without incurring any parsing overhead; in fact the StackEffect object can 25 | /// be stored as-is in the program's binary. 26 | /// The most convenient way to use this is with the `_sfx` string literal suffix, e.g: 27 | /// `static constexpr StackEffect kEffect = "a# b# -- c#"_sfx;` 28 | 29 | 30 | // Adds a type to a TypeSet given its stack-effect symbol. 31 | constexpr void addTypeSymbol(TypeSet &ts, char symbol) { 32 | switch (symbol) { 33 | case '?': ts.addType(Value::ANull); break; 34 | case '$': ts.addType(Value::AString); break; 35 | case '[': case ']': ts.addType(Value::AnArray); break; 36 | case '{': case '}': ts.addType(Value::AQuote); break; 37 | case '#': ts.addType(Value::ANumber); break; 38 | case 'a'...'z': 39 | case 'A'...'Z': 40 | case '0'...'9': 41 | case '_': break; 42 | default: throw std::runtime_error("Unknown stack type symbol"); 43 | } 44 | } 45 | 46 | /// Creates a TypeSet from a token string: 47 | /// - Alphanumerics and `_` are ignored 48 | /// - `?` means a null 49 | /// - `#` means a number 50 | /// - `$` means a string 51 | /// - `{` or `}` means an array 52 | /// - `[` or `]` means a quotation 53 | /// - If more than one type is given, either is allowed. 54 | /// - If no types are given, or only null, then any type is allowed. 55 | constexpr TypeSet parseTypeSet(const char *token, const char *tokenEnd = nullptr) { 56 | TypeSet ts; 57 | while (token != tokenEnd && *token) 58 | addTypeSymbol(ts, *token++); 59 | if (!ts.exists() || ts.flags() == 1) 60 | ts.addAllTypes(); 61 | return ts; 62 | } 63 | 64 | 65 | /// Initializes a StackEffect instance from a human-readable stack effect declaration. 66 | /// Generally you call \ref parseStackEffect instead, which returns a new instance. 67 | /// - Each token before the `--` is an input, each one after is an output. 68 | /// - Punctuation marks in tokens denote types, as described in the \ref TypeSet constructor; 69 | /// alphanumeric characters don't imply a type. If no type is given, any type is allowed. 70 | /// - If an output token exactly matches an input, and contains alphanumerics, that means it 71 | /// has the same type as that input. So output "x" matches input "x". Output "n#?" matches 72 | /// input "n#?" but not "n#". Output "#" can't match anything. 73 | constexpr void _parseStackEffect(StackEffect &effect, const char *str, const char *end) { 74 | const char* tokenStart[StackEffect::kMaxEntries] = {}; // Start of each token in `str` 75 | size_t tokenLen[ StackEffect::kMaxEntries] = {}; // Length of each token in `str` 76 | auto entry = effect._entries.begin(); // Current TypeSet being populated 77 | bool inputs = true; // Are we still parsing inputs? 78 | const char *token = nullptr; // Current token, or NULL 79 | bool tokenIsNamed = false; // Does token have alphanumerics? 80 | 81 | for (const char *c = str; c <= end; ++c) { 82 | if (c == end || *c == 0 || *c == ' ' || *c == '\t') { 83 | if (token) { 84 | // End of token: 85 | if (!entry->exists() || entry->flags() == 0x1) 86 | entry->addAllTypes(); 87 | if (inputs) { 88 | if (tokenIsNamed) { 89 | tokenStart[effect._ins] = token; 90 | tokenLen[effect._ins] = c - token; 91 | } 92 | ++effect._ins; 93 | } else { 94 | // look for input token match: 95 | if (tokenIsNamed) { 96 | for (unsigned b = 0; b < effect._ins; b++) { 97 | if (tokenLen[b] == (c - token) 98 | && _compare(tokenStart[b], token, tokenLen[b])) { 99 | entry->setInputMatch(effect._entries[b], effect._ins - 1 - b); 100 | break; 101 | } 102 | } 103 | } 104 | ++effect._outs; 105 | } 106 | ++entry; 107 | token = nullptr; 108 | tokenIsNamed = false; 109 | } 110 | } else if (*c == '-') { 111 | // Separator: 112 | if (c+1 == end || c[1] != '-' || token || !inputs) 113 | throw std::runtime_error("Invalid stack separator"); 114 | c += 2; 115 | inputs = false; 116 | } else { 117 | if (!token) { 118 | // Start of token: 119 | effect.checkNotFull(); 120 | token = c; 121 | } 122 | // Add character to token: 123 | addTypeSymbol(*entry, *c); 124 | if (_isalpha(*c)) 125 | tokenIsNamed = true; 126 | } 127 | } 128 | if (inputs) 129 | throw std::runtime_error("Missing stack separator"); 130 | effect.setMax(); 131 | } 132 | 133 | /// Creates a StackEffect from a human-readable stack effect declaration. 134 | /// See the above \ref _parseStackEffect function for details. 135 | constexpr StackEffect parseStackEffect(const char *str, const char *end) { 136 | StackEffect effect; 137 | _parseStackEffect(effect, str, end); 138 | return effect; 139 | } 140 | 141 | 142 | /// Creates a StackEffect from a human-readable stack effect declaration. 143 | /// See the above \ref _parseStackEffect function for details. 144 | constexpr StackEffect parseStackEffect(const char *str) { 145 | return parseStackEffect(str, str + _strlen(str)); 146 | } 147 | 148 | 149 | /// Special operator that lets you create a StackEffect by suffixing its string literal form 150 | /// with `_sfx`. 151 | constexpr static inline StackEffect operator""_sfx (const char *str, size_t len) { 152 | StackEffect effect; 153 | _parseStackEffect(effect, str, str + len); 154 | return effect; 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/compiler/vocabulary.cc: -------------------------------------------------------------------------------- 1 | // 2 | // vocabulary.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "vocabulary.hh" 20 | #include "core_words.hh" 21 | #include "word.hh" 22 | #include "utils.hh" 23 | #include "gc.hh" 24 | 25 | 26 | namespace tails { 27 | 28 | const Vocabulary Vocabulary::core(core_words::kWords); 29 | 30 | 31 | Vocabulary::Vocabulary(const Word* const *wordList) { 32 | add(wordList); 33 | } 34 | 35 | 36 | void Vocabulary::add(const Word* const *wordList) { 37 | while (*wordList) 38 | add(**wordList++); 39 | } 40 | 41 | 42 | void Vocabulary::add(const Word &word) { 43 | _words.insert({word.name(), &word}); 44 | } 45 | 46 | 47 | const Word* Vocabulary::lookup(std::string_view name) const { 48 | if (auto i = _words.find(toupper(std::string(name))); i != _words.end()) 49 | return i->second; 50 | else 51 | return nullptr; 52 | } 53 | 54 | 55 | const Word* Vocabulary::lookup(Instruction instr) const { 56 | for (auto i = _words.begin(); i != _words.end(); ++i) { 57 | if (i->second->instruction() == instr) 58 | return i->second; 59 | } 60 | return nullptr; 61 | } 62 | 63 | 64 | 65 | void VocabularyStack::push(const Vocabulary &v) { 66 | _active.push_back(&v); 67 | } 68 | 69 | void VocabularyStack::pop() { 70 | assert(_active.size() > 1); 71 | _active.pop_back(); 72 | } 73 | 74 | 75 | const Word* VocabularyStack::lookup(std::string_view name) const { 76 | for (auto vocab : _active) 77 | if (auto word = vocab->lookup(name); word) 78 | return word; 79 | return nullptr; 80 | } 81 | 82 | const Word* VocabularyStack::lookup(Instruction instr) const { 83 | for (auto vocab : _active) 84 | if (auto word = vocab->lookup(instr); word) 85 | return word; 86 | return nullptr; 87 | } 88 | 89 | VocabularyStack::iterator& VocabularyStack::iterator::operator++ () { 90 | if (++_iWord == _endWords) { 91 | if (++_iVoc != _endVoc) { 92 | _iWord = (*_iVoc)->begin(); 93 | _endWords = (*_iVoc)->end(); 94 | } 95 | } 96 | return *this; 97 | } 98 | 99 | 100 | void VocabularyStack::gcScan() { 101 | for (auto word : *this) 102 | gc::object::scanWord(word); 103 | } 104 | 105 | 106 | } 107 | -------------------------------------------------------------------------------- /src/compiler/vocabulary.hh: -------------------------------------------------------------------------------- 1 | // 2 | // vocabulary.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "instruction.hh" 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace tails { 27 | class Word; 28 | 29 | /// A lookup table to find Words by name. Used by the Compiler. 30 | class Vocabulary { 31 | public: 32 | Vocabulary(); 33 | 34 | explicit Vocabulary(const Word* const *wordList); 35 | 36 | void add(const Word &word); 37 | 38 | void add(const Word* const *wordList); 39 | 40 | const Word* lookup(std::string_view name) const; 41 | const Word* lookup(Instruction) const; 42 | 43 | using map = std::unordered_map; 44 | using iterator = map::const_iterator; 45 | 46 | iterator begin() const {return _words.begin();} 47 | iterator end() const {return _words.end();} 48 | 49 | // The vocabulary of core words. 50 | static const Vocabulary core; 51 | 52 | private: 53 | map _words; 54 | }; 55 | 56 | 57 | /// A stack of Vocabulary objects to look up Words in. Used by the Compiler. 58 | class VocabularyStack { 59 | public: 60 | VocabularyStack() :_active{&Vocabulary::core} { } 61 | 62 | void push(const Vocabulary &v); 63 | void pop(); 64 | 65 | const Word* lookup(std::string_view name) const; 66 | const Word* lookup(Instruction) const; 67 | 68 | /// The Vocabulary to which new Words are added, if any. 69 | Vocabulary* current() const {return _current;} 70 | 71 | void setCurrent(Vocabulary* v) {_current = v;} 72 | void setCurrent(Vocabulary &v) {return setCurrent(&v);} 73 | 74 | void gcScan(); 75 | 76 | class iterator { 77 | public: 78 | const Word* operator* () const {return _iWord->second;} 79 | const Word* operator-> () const {return _iWord->second;} 80 | iterator& operator++ (); 81 | bool operator==(const iterator &other) {return _iVoc == other._iVoc;} 82 | bool operator!=(const iterator &other) {return _iVoc != other._iVoc;} 83 | 84 | private: 85 | friend class VocabularyStack; 86 | iterator(const std::vector &active, 87 | Vocabulary::iterator beginWords, Vocabulary::iterator endWords) 88 | :_iVoc(active.begin()), _endVoc(active.end()), _iWord(beginWords), _endWords(endWords) 89 | { } 90 | 91 | std::vector::const_iterator _iVoc, _endVoc; 92 | Vocabulary::iterator _iWord, _endWords; 93 | }; 94 | 95 | iterator begin() const {return iterator(_active, _active.front()->begin(), _active.front()->end());} 96 | iterator end() const {return iterator(_active, _active.front()->end(), _active.front()->end());} 97 | 98 | private: 99 | std::vector _active; 100 | Vocabulary* _current = nullptr; 101 | }; 102 | 103 | } 104 | -------------------------------------------------------------------------------- /src/core/core_words.cc: -------------------------------------------------------------------------------- 1 | // 2 | // core_words.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | // Reference: 20 | 21 | #include "core_words.hh" 22 | #include "stack_effect.hh" 23 | 24 | 25 | namespace tails::core_words { 26 | 27 | static constexpr TypeSet 28 | Any = TypeSet::anyType(), 29 | Nul = TypeSet(Value::ANull), 30 | Num = TypeSet(Value::ANumber), 31 | Str = TypeSet(Value::AString), 32 | Arr = TypeSet(Value::AnArray); 33 | 34 | 35 | #pragma mark NATIVE WORDS: 36 | 37 | 38 | #pragma mark The absolute core: 39 | 40 | // Calls the interpreted word pointed to by the following instruction. 41 | NATIVE_WORD(_INTERP, "_INTERP", StackEffect::weird(), 42 | Word::MagicWordParam) 43 | { 44 | sp = call(sp, (pc++)->word); 45 | NEXT(); 46 | } 47 | 48 | // Returns from the current word. Every interpreted word ends with this. 49 | NATIVE_WORD(_RETURN, "_RETURN", StackEffect(), 50 | Word::Magic) 51 | { 52 | return sp; 53 | } 54 | 55 | // Pushes the following instruction as a Value. 56 | // (The stack effect is declared as untyped, but the stack checker sees the literal value on 57 | // the simulated stack and knows its exact type.) 58 | NATIVE_WORD(_LITERAL, "_LITERAL", StackEffect({}, {Any}), 59 | Word::MagicValParam) 60 | { 61 | *(++sp) = (pc++)->literal; 62 | NEXT(); 63 | } 64 | 65 | 66 | #pragma mark - CALL OPTIMIZATIONS: 67 | 68 | // Interprets 2 following words (saving an instruction and some clock cycles.) 69 | NATIVE_WORD(_INTERP2, "_INTERP2", StackEffect::weird(), 70 | Word::MagicWordParam, 2) 71 | { 72 | sp = call(sp, (pc++)->word); 73 | sp = call(sp, (pc++)->word); 74 | NEXT(); 75 | } 76 | 77 | // Interprets 3 following words. 78 | NATIVE_WORD(_INTERP3, "_INTERP3", StackEffect::weird(), 79 | Word::MagicWordParam, 3) 80 | { 81 | sp = call(sp, (pc++)->word); 82 | sp = call(sp, (pc++)->word); 83 | sp = call(sp, (pc++)->word); 84 | NEXT(); 85 | } 86 | 87 | // Interprets 4 following words. 88 | NATIVE_WORD(_INTERP4, "_INTERP4", StackEffect::weird(), 89 | Word::MagicWordParam, 4) 90 | { 91 | sp = call(sp, (pc++)->word); 92 | sp = call(sp, (pc++)->word); 93 | sp = call(sp, (pc++)->word); 94 | sp = call(sp, (pc++)->word); 95 | NEXT(); 96 | } 97 | 98 | // _Jumps_ to the interpreted word pointed to by the following instruction, 99 | // as a tail-call optimization. The stack does not grow. 100 | // This must of course be the last word before a _RETURN. 101 | // (The _RETURN could then be optional, except it's currently used when inlining.) 102 | NATIVE_WORD(_TAILINTERP, "_TAILINTERP", StackEffect::weird(), 103 | Word::MagicWordParam, 1) 104 | { 105 | MUSTTAIL return call(sp, pc->word); 106 | } 107 | 108 | // Interprets 2 following words, jumping to the last one. 109 | NATIVE_WORD(_TAILINTERP2, "_TAILINTERP2", StackEffect::weird(), 110 | Word::MagicWordParam, 2) 111 | { 112 | sp = call(sp, (pc++)->word); 113 | MUSTTAIL return call(sp, pc->word); 114 | } 115 | 116 | // Interprets 3 following words, jumping to the last one. 117 | NATIVE_WORD(_TAILINTERP3, "_TAILINTERP3", StackEffect::weird(), 118 | Word::MagicWordParam, 3) 119 | { 120 | sp = call(sp, (pc++)->word); 121 | sp = call(sp, (pc++)->word); 122 | MUSTTAIL return call(sp, pc->word); 123 | } 124 | 125 | // Interprets 4 following words, jumping to the last one. 126 | NATIVE_WORD(_TAILINTERP4, "_TAILINTERP4", StackEffect::weird(), 127 | Word::MagicWordParam, 4) 128 | { 129 | sp = call(sp, (pc++)->word); 130 | sp = call(sp, (pc++)->word); 131 | sp = call(sp, (pc++)->word); 132 | MUSTTAIL return call(sp, pc->word); 133 | } 134 | 135 | // There's no reason there couldn't be more of these: _INTERP5, _INTERP6, ... 136 | // They'd need to be implemented here, and added to kWords and kInterpWords. 137 | 138 | 139 | #pragma mark Stack gymnastics: 140 | 141 | NATIVE_WORD(DUP, "DUP", StackEffect({Any}, {Any/0, Any/0})) { 142 | ++sp; 143 | sp[0] = sp[-1]; 144 | NEXT(); 145 | } 146 | 147 | NATIVE_WORD(DROP, "DROP", StackEffect({Any}, {})) { 148 | --sp; 149 | NEXT(); 150 | } 151 | 152 | NATIVE_WORD(SWAP, "SWAP", StackEffect({Any, Any}, 153 | {Any/0, Any/1})) 154 | { 155 | std::swap(sp[0], sp[-1]); 156 | NEXT(); 157 | } 158 | 159 | NATIVE_WORD(OVER, "OVER", StackEffect({Any, Any}, 160 | {Any/1, Any/0, Any/1})) 161 | { 162 | ++sp; 163 | sp[0] = sp[-2]; 164 | NEXT(); 165 | } 166 | 167 | NATIVE_WORD(ROT, "ROT", StackEffect({Any, Any, Any}, 168 | {Any/1, Any/0, Any/2})) 169 | { 170 | auto sp2 = sp[-2]; 171 | sp[-2] = sp[-1]; 172 | sp[-1] = sp[ 0]; 173 | sp[ 0] = sp2; 174 | NEXT(); 175 | } 176 | 177 | // A placeholder used by the compiler that doesn't actually appear in code 178 | NATIVE_WORD(NOP, "NOP", StackEffect()) { 179 | NEXT(); 180 | } 181 | 182 | 183 | #pragma mark Control Flow: 184 | 185 | /* "It turns out that all you need in order to define looping constructs, IF-statements, etc. 186 | are two primitives. 187 | BRANCH is an unconditional branch. 188 | 0BRANCH is a conditional branch (it only branches if the top of stack is zero)." --JonesForth 189 | But t 190 | */ 191 | 192 | // reads offset from *pc 193 | NATIVE_WORD(_BRANCH, "BRANCH", StackEffect(), 194 | Word::MagicIntParam) 195 | { 196 | pc += pc->offset + 1; 197 | NEXT(); 198 | } 199 | 200 | // reads offset from *pc ... Assumes Value supports `operator !` 201 | NATIVE_WORD(_ZBRANCH, "0BRANCH", StackEffect({Any}, {}), 202 | Word::MagicIntParam) 203 | { 204 | if (!(*sp--)) 205 | pc += pc->offset; 206 | ++pc; 207 | NEXT(); 208 | } 209 | 210 | // recursively calls the current word. The offset back to the start of the word is stored at 211 | // *pc, so this is similar to a BRANCH back to the start, except it uses `call`. 212 | NATIVE_WORD(_RECURSE, "_RECURSE", StackEffect::weird(), 213 | Word::MagicIntParam) 214 | { 215 | call(sp, pc + 1 + pc->offset); 216 | ++pc; 217 | NEXT(); 218 | } 219 | 220 | 221 | // (? quote -> ?) Pops a quotation (word) and calls it. 222 | // The actual stack effect is that of the quotation it calls, which in the general case is 223 | // only known at runtime. Until the compiler's stack checker can deal with this, I'm making 224 | // this word Magic so it can't be used in source code. 225 | NATIVE_WORD(CALL, "CALL", StackEffect::weird(), 226 | Word::Magic) 227 | { 228 | const Word *quote = (*sp--).asQuote(); 229 | assert(quote); // FIXME: Handle somehow; exceptions? 230 | sp = call(sp, quote->instruction().word); 231 | NEXT(); 232 | } 233 | 234 | 235 | #pragma mark Higher Order Functions (Combinators): 236 | 237 | // (b quote1 quote2 -> ?) Pops params, then evals quote1 if b is truthy, else quote2. 238 | // Stack effect is dependent on quote1 and quote2; currently this word is special-cased by 239 | // the compiler's stack-checker. 240 | NATIVE_WORD(IFELSE, "IFELSE", StackEffect::weird()) { 241 | const Word *quote = (!!sp[-2] ? sp[-1] : sp[0]).asQuote(); 242 | sp = call(sp - 3, quote->instruction().word); 243 | NEXT(); 244 | } 245 | 246 | 247 | #pragma mark Arithmetic & Relational: 248 | 249 | // These assume the C++ Value type supports arithmetic and relational operators. 250 | 251 | NATIVE_WORD(ZERO, "0", StackEffect({}, {Num})) { 252 | *(++sp) = Value(0); 253 | NEXT(); 254 | } 255 | 256 | NATIVE_WORD(ONE, "1", StackEffect({}, {Num})) { 257 | *(++sp) = Value(1); 258 | NEXT(); 259 | } 260 | 261 | static constexpr StackEffect kBinEffect({Num, Num}, {Num}); 262 | static constexpr StackEffect kRelEffect({Any, Any}, {Num}); 263 | static constexpr StackEffect k0RelEffect({Any}, {Num}); 264 | 265 | BINARY_OP_WORD(PLUS, "+", StackEffect({Num|Str|Arr, Num|Str|Arr}, 266 | {(Num|Str|Arr)/0}), 267 | +) 268 | BINARY_OP_WORD(MINUS, "-", kBinEffect, -) 269 | BINARY_OP_WORD(MULT, "*", kBinEffect, *) 270 | BINARY_OP_WORD(DIV, "/", kBinEffect, /) 271 | BINARY_OP_WORD(MOD, "MOD", kBinEffect, %) 272 | 273 | BINARY_OP_WORD(EQ, "=", kRelEffect, ==) 274 | BINARY_OP_WORD(NE, "<>", kRelEffect, !=) 275 | BINARY_OP_WORD(GT, ">", kRelEffect, >) 276 | BINARY_OP_WORD(GE, ">=", kRelEffect, >=) 277 | BINARY_OP_WORD(LT, "<", kRelEffect, <) 278 | BINARY_OP_WORD(LE, "<=", kRelEffect, <=) 279 | 280 | NATIVE_WORD(EQ_ZERO, "0=", k0RelEffect) { sp[0] = Value(sp[0] == Value(0)); NEXT(); } 281 | NATIVE_WORD(NE_ZERO, "0<>", k0RelEffect) { sp[0] = Value(sp[0] != Value(0)); NEXT(); } 282 | NATIVE_WORD(GT_ZERO, "0>", k0RelEffect) { sp[0] = Value(sp[0] > Value(0)); NEXT(); } 283 | NATIVE_WORD(LT_ZERO, "0<", k0RelEffect) { sp[0] = Value(sp[0] < Value(0)); NEXT(); } 284 | 285 | // [Appended an "_" to the symbol name to avoid conflict with C's `NULL`.] 286 | NATIVE_WORD(NULL_, "NULL", StackEffect({}, {Nul})) { 287 | *(++sp) = NullValue; 288 | NEXT(); 289 | } 290 | 291 | 292 | #pragma mark Strings & Arrays: 293 | 294 | NATIVE_WORD(LENGTH, "LENGTH", StackEffect({Str|Arr}, {Num})) { 295 | *sp = sp->length(); 296 | NEXT(); 297 | } 298 | 299 | 300 | #pragma mark - INTERPRETED WORDS: 301 | 302 | // These could easily be implemented in native code, but I'm making them interpreted for now 303 | // so the interpreted call path gets more use. --jpa May 2021 304 | 305 | // Warning: A numeric literal has to be preceded by _LITERAL, an interpreted word by _INTERP. 306 | 307 | INTERP_WORD(ABS, "ABS", StackEffect({Num}, {Num}).withMax(1), 308 | DUP, 309 | LT_ZERO, 310 | _ZBRANCH, Instruction::withOffset(3), 311 | ZERO, 312 | SWAP, 313 | MINUS 314 | ); 315 | 316 | INTERP_WORD(MAX, "MAX", StackEffect({Any, Any}, {Any}).withMax(2), 317 | OVER, 318 | OVER, 319 | LT, 320 | _ZBRANCH, Instruction::withOffset(1), 321 | SWAP, 322 | DROP 323 | ); 324 | 325 | 326 | INTERP_WORD (MIN, "MIN", StackEffect({Any, Any}, {Any}).withMax(2), 327 | OVER, 328 | OVER, 329 | GT, 330 | _ZBRANCH, Instruction::withOffset(1), 331 | SWAP, 332 | DROP 333 | ); 334 | 335 | 336 | #pragma mark - LIST OF CORE WORDS: 337 | 338 | 339 | // This null-terminated list is used to register these words in the Vocabulary at startup. 340 | 341 | const Word* const kWords[] = { 342 | &_INTERP, &_INTERP2, &_INTERP3, &_INTERP4, 343 | &_TAILINTERP, &_TAILINTERP2, &_TAILINTERP3, &_TAILINTERP4, 344 | &_LITERAL, &_RETURN, &_BRANCH, &_ZBRANCH, 345 | &NOP, &_RECURSE, 346 | &DROP, &DUP, &OVER, &ROT, &SWAP, 347 | &ZERO, &ONE, 348 | &EQ, &NE, &EQ_ZERO, &NE_ZERO, 349 | &GE, >, >_ZERO, 350 | &LE, <, <_ZERO, 351 | &ABS, &MAX, &MIN, 352 | &DIV, &MOD, &MINUS, &MULT, &PLUS, 353 | &CALL, 354 | &NULL_, 355 | &LENGTH, 356 | &IFELSE, 357 | &DEFINE, 358 | nullptr 359 | }; 360 | 361 | } 362 | -------------------------------------------------------------------------------- /src/core/core_words.hh: -------------------------------------------------------------------------------- 1 | // 2 | // core_words.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "word.hh" 21 | 22 | 23 | namespace tails::core_words { 24 | 25 | /// All the words defined herein. 26 | extern const Word 27 | _INTERP, _INTERP2, _INTERP3, _INTERP4, 28 | _TAILINTERP, _TAILINTERP2, _TAILINTERP3, _TAILINTERP4, 29 | _RETURN, _LITERAL, 30 | NOP, _RECURSE, 31 | DROP, DUP, OVER, ROT, SWAP, 32 | EQ, NE, EQ_ZERO, NE_ZERO, 33 | GE, GT, GT_ZERO, 34 | LE, LT, LT_ZERO, 35 | ABS, MAX, MIN, 36 | DIV, MOD, MINUS, MULT, PLUS, 37 | _BRANCH, _ZBRANCH, 38 | ONE, ZERO, 39 | DEFINE; 40 | 41 | extern const Word NULL_, LENGTH, CALL, IFELSE; 42 | 43 | /// Array of pointers to the above core words, ending in nullptr 44 | extern const Word* const kWords[]; 45 | 46 | /// Array of the `_INTERP` family of words. 47 | /// First array index is whether to tail-call the last word; 48 | /// Second index is the number of words that follow (0..kMaxInterp-1) 49 | static constexpr size_t kMaxInterp = 4; 50 | static constexpr const Word* kInterpWords[2][kMaxInterp] = { 51 | {&_INTERP, &_INTERP2, &_INTERP3, &_INTERP4}, 52 | {&_TAILINTERP, &_TAILINTERP2, &_TAILINTERP3, &_TAILINTERP4} 53 | }; 54 | } 55 | -------------------------------------------------------------------------------- /src/core/instruction.hh: -------------------------------------------------------------------------------- 1 | // 2 | // instruction.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "platform.hh" 21 | #include "value.hh" 22 | 23 | 24 | namespace tails { 25 | union Instruction; 26 | 27 | 28 | // If ENABLE_TRACING is defined, a function `TRACE(sp,pc)` will be called before each Instruction. 29 | // Enabling this makes the code much less optimal, so only use when debugging. 30 | #ifndef NDEBUG 31 | # define ENABLE_TRACING 32 | #endif 33 | 34 | #ifdef ENABLE_TRACING 35 | NOINLINE void TRACE(Value *sp, const Instruction *pc); 36 | #else 37 | # define TRACE(SP,PC) (void)0 38 | #endif 39 | 40 | 41 | /// A native word is a C++ function with this signature. 42 | /// Interpreted words consist of an array of (mostly) Op pointers, 43 | /// but some native ops are followed by a parameter read by the function. 44 | /// @param sp Stack pointer. Top is sp[0], below is sp[-1], sp[-2] ... 45 | /// @param pc Program counter. Points to the _next_ op to run. 46 | /// @return The updated stack pointer. (But almost all ops tail-call via `NEXT()` 47 | /// instead of explicitly returning a value.) 48 | using Op = Value* (*)(Value *sp, const Instruction *pc); 49 | 50 | 51 | /// A Forth instruction. Interpreted code is a sequence of these. 52 | union Instruction { 53 | Op native; // Every instruction starts with a native op 54 | const Instruction* word; // Interpreted word to call; parameter to INTERP 55 | intptr_t offset; // PC offset; parameter to BRANCH and ZBRANCH 56 | Value literal; // Value to push on stack; parameter to LITERAL 57 | 58 | constexpr Instruction(Op o) :native(o) { } 59 | constexpr Instruction(const Instruction *w) :word(w) { } 60 | constexpr Instruction(Value v) :literal(v) { } 61 | explicit constexpr Instruction(intptr_t o) :offset(o) { } 62 | 63 | static constexpr Instruction withOffset(intptr_t o) {return Instruction(o);} 64 | 65 | private: 66 | friend class Word; 67 | friend class WordRef; 68 | constexpr Instruction() :word(nullptr) { } 69 | }; 70 | 71 | inline bool operator== (const Instruction &a, const Instruction &b) {return a.native == b.native;} 72 | inline bool operator!= (const Instruction &a, const Instruction &b) {return !(a == b);} 73 | 74 | 75 | // The standard Forth NEXT routine, found at the end of every native op, 76 | // that jumps to the next op. 77 | // It uses tail-recursion, so (in an optimized build) it _literally does jump_, 78 | // without growing the call stack. 79 | #define NEXT() TRACE(sp, pc); MUSTTAIL return pc->native(sp, pc + 1) 80 | 81 | 82 | /// Calls an interpreted word pointed to by `fn`. Used by `INTERP` and `run`. 83 | /// @param sp Stack pointer 84 | /// @param start The first instruction of the word to run 85 | /// @return The stack pointer on completion. 86 | ALWAYS_INLINE 87 | static inline Value* call(Value *sp, const Instruction *start) { 88 | TRACE(sp, start); 89 | return start->native(sp, start + 1); 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/core/platform.hh: -------------------------------------------------------------------------------- 1 | // 2 | // platform.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | 21 | 22 | // The __has_xxx() macros are only(?) implemented by Clang. (Except GCC has __has_attribute...) 23 | // Define them to return 0 on other compilers. 24 | // https://clang.llvm.org/docs/AttributeReference.html 25 | // https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html 26 | 27 | #ifndef __has_attribute 28 | #define __has_attribute(x) 0 29 | #endif 30 | 31 | #ifndef __has_builtin 32 | #define __has_builtin(x) 0 33 | #endif 34 | 35 | #ifndef __has_feature 36 | #define __has_feature(x) 0 37 | #endif 38 | 39 | #ifndef __has_extension 40 | #define __has_extension(x) 0 41 | #endif 42 | 43 | 44 | // Magic function attributes! 45 | // 46 | // Note: The `musttail` attribute is new (as of early 2021) and only supported by Clang. 47 | // Without it, Tails ops will not use tail recursion _in unoptimized builds_, meaning 48 | // the call stack will grow with every word called and eventually overflow. 49 | // 50 | #ifdef __has_attribute 51 | # if __has_attribute(musttail) 52 | # define MUSTTAIL [[clang::musttail]] 53 | # endif 54 | # if __has_attribute(always_inline) 55 | # define ALWAYS_INLINE [[gnu::always_inline]] 56 | # endif 57 | # if __has_attribute(noinline) 58 | # define NOINLINE [[gnu::noinline]] 59 | # endif 60 | #endif 61 | #ifndef MUSTTAIL 62 | # define MUSTTAIL 63 | #endif 64 | #ifndef ALWAYS_INLINE 65 | # define ALWAYS_INLINE 66 | #endif 67 | #ifndef NOINLINE 68 | # define NOINLINE 69 | #endif 70 | 71 | 72 | // `_pure` functions are _read-only_. They cannot write to memory (in a way that's detectable), 73 | // and they cannot access volatile data or do I/O. 74 | // 75 | // Calling a pure function twice in a row with the same arguments MUST return the same result. 76 | // This guarantee allows the compiler to optimize out redundant calls. 77 | // 78 | // "Many functions have no effects except the return value, and their return value depends only on 79 | // the parameters and/or global variables. Such a function can be subject to common subexpression 80 | // elimination and loop optimization just as an arithmetic operator would be. These functions 81 | // should be declared with the attribute pure. 82 | // "The pure attribute prohibits a function from modifying the state of the program that is 83 | // observable by means other than inspecting the function’s return value. However, functions 84 | // declared with the pure attribute can safely read any non-volatile objects, and modify the value 85 | // of objects in a way that does not affect their return value or the observable state of the 86 | // program." -- GCC manual 87 | #if defined(__GNUC__) || __has_attribute(__pure__) 88 | #define _pure __attribute__((__pure__)) 89 | #else 90 | #define _pure 91 | #endif 92 | -------------------------------------------------------------------------------- /src/core/stack_effect.hh: -------------------------------------------------------------------------------- 1 | // 2 | // stack_effect.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "value.hh" 21 | #include "utils.hh" 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace tails { 30 | 31 | /// A set of Value types. Describes one item, an input or output, in a word's stack effect. 32 | /// If it's a StackEffect output, it can optionally declare that it matches the type of an input. 33 | class TypeSet { 34 | public: 35 | constexpr TypeSet() { } 36 | 37 | constexpr TypeSet(Value::Type type) {addType(type);} 38 | 39 | constexpr TypeSet(std::initializer_list types) { 40 | for (auto type : types) 41 | addType(type); 42 | } 43 | 44 | constexpr static TypeSet anyType() {return TypeSet(kTypeFlags);} 45 | constexpr static TypeSet noType() {return TypeSet();} 46 | 47 | constexpr bool exists() const {return _flags != 0;} 48 | constexpr bool canBeAnyType() const {return typeFlags() == kTypeFlags;} 49 | constexpr bool canBeType(Value::Type type) const {return (_flags & (1 << int(type))) != 0;} 50 | 51 | std::optional firstType() const { 52 | for (int i = 0; i < kNumTypes; ++i) 53 | if (_flags & (1<> kNumTypes) - 1;} 63 | 64 | constexpr void setInputMatch(TypeSet inputEntry, unsigned inputNo) { 65 | assert(inputNo <= 6); 66 | _flags = ((inputNo+1) << kNumTypes) | (inputEntry._flags & kTypeFlags); 67 | } 68 | 69 | constexpr TypeSet operator/ (unsigned inputNo) const { 70 | assert(inputNo <= 6); 71 | return TypeSet(typeFlags() | ((inputNo+1) << kNumTypes)); 72 | } 73 | 74 | /// I am "greater than" another entry if I support types it doesn't. 75 | constexpr int compare(const TypeSet &other) const { 76 | if (typeFlags() == other.typeFlags()) 77 | return 0; 78 | else if ((typeFlags() & ~other.typeFlags()) != 0) 79 | return 1; 80 | else 81 | return -1; 82 | } 83 | 84 | constexpr bool operator== (const TypeSet &other) const {return compare(other) == 0;} 85 | constexpr bool operator!= (const TypeSet &other) const {return compare(other) != 0;} 86 | constexpr bool operator> (const TypeSet &other) const {return compare(other) > 0;} 87 | constexpr bool operator< (const TypeSet &other) const {return compare(other) < 0;} 88 | 89 | constexpr explicit operator bool() const {return exists();} 90 | 91 | constexpr TypeSet operator| (const TypeSet &other) const {return _flags | other._flags;} 92 | constexpr TypeSet operator& (const TypeSet &other) const {return _flags & other._flags;} 93 | constexpr TypeSet operator- (const TypeSet &other) const {return _flags & ~other._flags;} 94 | 95 | constexpr uint8_t typeFlags() const {return _flags & kTypeFlags;} 96 | constexpr uint8_t flags() const {return _flags;} // tests only 97 | 98 | private: 99 | constexpr TypeSet(int flags) :_flags(uint8_t(flags)) { } 100 | 101 | static constexpr int kNumTypes = 5; 102 | static constexpr uint8_t kTypeFlags = (1 << kNumTypes) - 1; 103 | 104 | uint8_t _flags = 0; 105 | }; 106 | 107 | 108 | 109 | /// A reference to a list of TypeSets in stack order. (Basically like a C++20 range.) 110 | class TypesView { 111 | public: 112 | constexpr TypesView(TypeSet *bottom, TypeSet *top) 113 | :_bottom(bottom), _top(top) 114 | { assert(bottom && top && _bottom <= _top + 1); } 115 | 116 | constexpr TypesView(TypeSet *bottom, size_t size) 117 | :TypesView(bottom, bottom + size - 1) 118 | { } 119 | 120 | constexpr int size() const {return int(_top - _bottom + 1);} 121 | 122 | // Indexing is from the top of the stack 123 | constexpr TypeSet operator[] (size_t i) const {assert (i < size()); return *(_top - i);} 124 | constexpr TypeSet& operator[] (size_t i) {assert (i < size()); return *(_top - i);} 125 | 126 | // rbegin/rend start at the bottom of the stack 127 | // (begin() / end() would take more work to implement since ++ needs to decrement the ptr) 128 | constexpr const TypeSet* rbegin() const {return _bottom;} 129 | constexpr TypeSet* rbegin() {return _bottom;} 130 | constexpr const TypeSet* rend() const {return _top + 1;} 131 | constexpr TypeSet* rend() {return _top + 1;} 132 | 133 | constexpr bool operator== (const TypesView &other) const { 134 | auto sz = size(); 135 | if (sz != other.size()) 136 | return false; 137 | for (size_t i = 0; i < sz; ++i) 138 | if (_bottom[i] != other._bottom[i]) 139 | return false; 140 | return true; 141 | } 142 | 143 | constexpr bool operator!= (const TypesView &other) const { 144 | return !(*this == other); 145 | } 146 | 147 | private: 148 | TypeSet* const _bottom; 149 | TypeSet* const _top; 150 | }; 151 | 152 | 153 | 154 | /// Describes the API of a word: 155 | /// - how many inputs it reads from the stack, and their allowed types; 156 | /// - how many outputs it leaves on the stack, and their potential types; 157 | /// - the net change in stack depth (output count minus input count); 158 | /// - the maximum increase in stack depth during execution. 159 | /// This is used by the compiler's stack checker to verify stack safety and type safety, 160 | /// and by the interpreter to allocate a sufficiently large stack at runtime. 161 | class StackEffect { 162 | public: 163 | /// Constructs an empty instance with zero inputs and outputs and max. 164 | constexpr StackEffect() { } 165 | 166 | /// Creates a stack effect lists of inputs and outputs. 167 | constexpr StackEffect(std::initializer_list inputs, 168 | std::initializer_list outputs) 169 | :_ins(inputs.size()) 170 | ,_outs(outputs.size()) 171 | { 172 | if (inputs.size() + outputs.size() >= kMaxEntries) 173 | throw std::runtime_error("Too many stack entries"); 174 | auto entry = &_entries[0]; 175 | for (auto in : inputs) 176 | *entry++ = in; 177 | for (auto out : outputs) 178 | *entry++ = out; 179 | setMax(); 180 | } 181 | 182 | /// Returns a copy with the max stack depth set to `max`. 183 | /// (However, the max will not be set less than 0, or less than the `net()`.) 184 | /// Usually called after the constructor to declare a custom max. 185 | constexpr StackEffect withMax(int max) { 186 | auto result = *this; 187 | result.setMax(max); 188 | return result; 189 | } 190 | 191 | static constexpr uint16_t kUnknownMax = UINT16_MAX; 192 | 193 | /// Returns a copy with the max stack depth set to "unknown". 194 | constexpr StackEffect withUnknownMax() {return withMax(kUnknownMax);} 195 | 196 | /// Returns a StackEffect whose inputs and outputs are not known at compile time. 197 | constexpr static StackEffect weird() { 198 | StackEffect result; 199 | result._weird = true; 200 | return result; 201 | } 202 | 203 | void addInput(TypeSet entry) {insert(entry, _ins); ++_ins;} 204 | 205 | void addOutput(TypeSet entry) {insert(entry, _ins + _outs); ++_outs;} 206 | 207 | void addInputAtBottom(TypeSet entry) {insert(entry, 0); ++_ins;} 208 | 209 | void addOutputAtBottom(TypeSet entry) {insert(entry, _ins); ++_outs;} 210 | 211 | /// Number of items read from stack on entry (i.e. minimum stack depth on entry) 212 | constexpr int inputCount() const {assert(!_weird); return _ins;} 213 | 214 | /// Number of items left on stack on exit, "replacing" the input 215 | constexpr int outputCount() const {assert(!_weird); return _outs;} 216 | 217 | /// Net change in stack depth from entry to exit; equal to `output` - `input`. 218 | constexpr int net() const {assert(!_weird); return int(_outs) - int(_ins);} 219 | 220 | /// Max growth of stack while the word runs 221 | constexpr int max() const {assert(!_weird); return _max;} 222 | 223 | /// True if actual max stack growth is not known at compile time (e.g. recursive fns) 224 | constexpr bool maxIsUnknown() const {return _max == UINT16_MAX;} 225 | 226 | /// True if the stack effect is unknown at compile time or depends on instruction params 227 | constexpr bool isWeird() const {return _weird;} 228 | 229 | /// The array of input types 230 | constexpr const TypesView inputs() const {return TypesView((TypeSet*)&_entries[0], _ins);} 231 | constexpr TypesView inputs() {return TypesView(&_entries[0], _ins);} 232 | 233 | /// The array of output types 234 | constexpr const TypesView outputs() const {return TypesView((TypeSet*)&_entries[_ins], _outs);} 235 | constexpr TypesView outputs() {return TypesView(&_entries[_ins], _outs);} 236 | 237 | constexpr bool operator== (const StackEffect &other) const { 238 | if (_ins == other._ins && _outs == other._outs && _max == other._max 239 | && !_weird && !other._weird) { 240 | for (int i = _ins + _outs - 1; i >= 0; --i) 241 | if (_entries[i] != other._entries[i]) 242 | return false; 243 | return true; 244 | } 245 | return false; 246 | } 247 | 248 | constexpr bool operator!= (const StackEffect &other) const {return !(*this == other);} 249 | 250 | private: 251 | friend constexpr void _parseStackEffect(StackEffect&, const char *str, const char *end); 252 | 253 | constexpr void checkNotFull() const { 254 | if (_ins + _outs >= kMaxEntries) 255 | throw std::runtime_error("Too many stack entries"); 256 | } 257 | 258 | void insert(TypeSet entry, int index) { 259 | assert(entry); 260 | checkNotFull(); 261 | std::copy_backward(&_entries[index], &_entries[_ins + _outs], &_entries[_ins + _outs + 1]); 262 | _entries[index] = entry; 263 | } 264 | 265 | constexpr void setMax(int m =0) { 266 | m = std::max({m, 0, net(), int(_max)}); 267 | _max = uint16_t(std::min(m, int(kUnknownMax))); 268 | } 269 | 270 | static constexpr size_t kMaxEntries = 8; 271 | using Entries = std::array; 272 | 273 | Entries _entries; // Inputs (bottom to top), then outputs (same) 274 | uint8_t _ins = 0, _outs = 0; // Number of inputs and outputs 275 | uint16_t _max = 0; // Max stack growth during run 276 | bool _weird = false; // If true, behavior not fixed at compile time 277 | }; 278 | 279 | } 280 | -------------------------------------------------------------------------------- /src/core/utils.hh: -------------------------------------------------------------------------------- 1 | // 2 | // utils.hh 3 | // 4 | // Copyright (C) 2020 Jens Alfke. All Rights Reserved. 5 | // 6 | 7 | #pragma once 8 | #include "stdint.h" 9 | #include "stdio.h" 10 | #include 11 | 12 | namespace tails { 13 | 14 | static inline std::string format(const char *fmt, ...) { 15 | char *str = nullptr; 16 | va_list args; 17 | va_start(args, fmt); 18 | vasprintf(&str, fmt, args); 19 | va_end(args); 20 | std::string result(str); 21 | free(str); 22 | return result; 23 | } 24 | 25 | 26 | static inline std::string toupper(std::string str) { 27 | for (char &c : str) 28 | c = ::toupper(c); 29 | return str; 30 | } 31 | 32 | 33 | // Constexpr equivalents of strlen and memcmp, for use in constexpr functions: 34 | 35 | constexpr static inline size_t _strlen(const char *str) noexcept { 36 | if (!str) 37 | return 0; 38 | auto c = str; 39 | while (*c) ++c; 40 | return c - str; 41 | } 42 | 43 | constexpr static inline bool _isalpha(char c) { 44 | return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); 45 | } 46 | 47 | constexpr static inline bool _compare(const char *a, const char *b, size_t len) { 48 | while (len-- > 0) 49 | if (*a++ != *b++) 50 | return false; 51 | return true; 52 | } 53 | 54 | 55 | template 56 | constexpr static inline int _cmp(T a, T b) {return (a==b) ? 0 : ((a 10 | 11 | namespace tails { 12 | 13 | std::ostream& operator<< (std::ostream&, Value); // defined in value.cc 14 | 15 | 16 | inline std::ostream& operator<< (std::ostream &out, TypeSet entry) { 17 | if (entry.canBeAnyType()) 18 | out << "x"; 19 | else if (!entry.exists()) 20 | out << "∅"; 21 | else { 22 | static constexpr const char *kNames[] = {"?", "#", "$", "{}", "[]"}; 23 | for (int i = 0; i <= 5; ++i) { 24 | if (entry.canBeType(Value::Type(i))) { 25 | out << kNames[i]; 26 | } 27 | } 28 | } 29 | return out; 30 | } 31 | 32 | 33 | inline std::ostream& operator<< (std::ostream &out, TypesView types) { 34 | for (auto i = types.rbegin(); i != types.rend(); ++i) { 35 | if (i != types.rbegin()) out << ' '; 36 | out << *i; 37 | } 38 | return out; 39 | } 40 | 41 | 42 | inline std::ostream& operator<< (std::ostream &out, const StackEffect &effect) { 43 | return out << effect.inputs() << " -- " << effect.outputs(); 44 | } 45 | 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/more_words.cc: -------------------------------------------------------------------------------- 1 | // 2 | // more_words.cc 3 | // 4 | // Copyright (C) 2020 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "more_words.hh" 20 | #include "io.hh" 21 | #include "stack_effect_parser.hh" 22 | #include 23 | 24 | 25 | namespace tails::word { 26 | using namespace std; 27 | 28 | 29 | #pragma mark - I/O: 30 | 31 | static bool sAtLeftMargin = true; 32 | 33 | NATIVE_WORD(PRINT, ".", "a --"_sfx) { 34 | std::cout << *(sp--); 35 | sAtLeftMargin = false; 36 | NEXT(); 37 | } 38 | 39 | NATIVE_WORD(SP, "SP.", "--"_sfx) { 40 | std::cout << ' '; 41 | sAtLeftMargin = false; 42 | NEXT(); 43 | } 44 | 45 | NATIVE_WORD(NL, "NL.", "--"_sfx) { 46 | std::cout << '\n'; 47 | sAtLeftMargin = true; 48 | NEXT(); 49 | } 50 | 51 | void endLine() { 52 | if (!sAtLeftMargin) { 53 | std::cout << '\n'; 54 | sAtLeftMargin = true; 55 | } 56 | } 57 | 58 | NATIVE_WORD(NLQ, "NL?", "--"_sfx) { 59 | endLine(); 60 | NEXT(); 61 | } 62 | 63 | 64 | 65 | #pragma mark - LIST OF WORDS: 66 | 67 | 68 | // This null-terminated list is used to register these words in the Vocabulary at startup. 69 | 70 | const Word* const kWords[] = { 71 | &PRINT, &SP, &NL, &NLQ, 72 | nullptr 73 | }; 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/more_words.hh: -------------------------------------------------------------------------------- 1 | // 2 | // more_words.hh 3 | // 4 | // Copyright (C) 2020 Jens Alfke. All Rights Reserved. 5 | // 6 | 7 | #pragma once 8 | #include "word.hh" 9 | 10 | namespace tails::word { 11 | 12 | void endLine(); 13 | 14 | extern const Word 15 | PRINT, // `.` -- print top of stack to stdout 16 | SP, // `SP.` -- print a space character 17 | NL, // `NL.` -- print a newline 18 | NLQ; // `NL?` -- print a newline only if there are characters on the current line 19 | 20 | extern const Word* const kWords[]; 21 | } 22 | -------------------------------------------------------------------------------- /src/repl.cc: -------------------------------------------------------------------------------- 1 | // 2 | // repl.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "compiler.hh" 20 | #include "gc.hh" 21 | #include "io.hh" 22 | #include "more_words.hh" 23 | #include "vocabulary.hh" 24 | #include "linenoise.h" 25 | #include "utf8.h" 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | using namespace std; 34 | 35 | 36 | #ifdef ENABLE_TRACING 37 | namespace tails { 38 | void TRACE(Value *sp, const Instruction *pc) { } 39 | } 40 | #endif 41 | 42 | 43 | namespace repl { 44 | using namespace tails; 45 | 46 | 47 | static optional readLine(const char *prompt) { 48 | static once_flag sOnce; 49 | call_once(sOnce, [] { 50 | linenoiseSetEncodingFunctions(linenoiseUtf8PrevCharLen, linenoiseUtf8NextCharLen, 51 | linenoiseUtf8ReadCode); 52 | #ifdef __APPLE__ 53 | // Prevent linenoise from trying to use ANSI escapes in the Xcode console on macOS, 54 | // which is a TTY but does not set $TERM. For some reason linenoise thinks a missing $TERM 55 | // indicates an ANSI-compatible terminal (isUnsupportedTerm() in linenoise.c.) 56 | // So if $TERM is not set, set it to "dumb", which linenoise does understand. 57 | if (isatty(STDIN_FILENO) && getenv("TERM") == nullptr) 58 | setenv("TERM", "dumb", false); 59 | #endif 60 | }); 61 | 62 | char *cline = linenoise(prompt); 63 | if (!cline) 64 | return nullopt; 65 | string line = cline; 66 | linenoiseFree(cline); 67 | return line; 68 | } 69 | 70 | 71 | using Stack = std::vector; 72 | 73 | 74 | #ifdef ENABLE_TRACING 75 | // Exposed while running, for the TRACE function to use 76 | static Value * StackBase; 77 | #endif 78 | 79 | 80 | /// Top-level function to run a Word. 81 | /// @return The top value left on the stack. 82 | static Stack run(const Word &word, Stack &stack) { 83 | assert(!word.isNative()); // must be interpreted 84 | if (word.stackEffect().inputCount() > stack.size()) 85 | throw compile_error("Stack would underflow", nullptr); 86 | auto depth = stack.size(); 87 | stack.resize(depth + word.stackEffect().max()); 88 | 89 | auto stackBase = &stack[0]; 90 | #ifdef ENABLE_TRACING 91 | StackBase = stackBase; 92 | #endif 93 | auto stackTop = call(&stack[depth] - 1, word.instruction().word); 94 | stack.resize(stackTop - stackBase + 1); 95 | return stack; 96 | } 97 | 98 | 99 | static void eval(const string &source, Stack &stack) { 100 | Compiler comp; 101 | comp.setInputStack(&stack.front(), &stack.back()); 102 | comp.parse(source); 103 | CompiledWord compiled(std::move(comp)); 104 | run(compiled, stack); 105 | } 106 | 107 | 108 | static void garbageCollect(Stack &stack) { 109 | Compiler::activeVocabularies.gcScan(); 110 | gc::object::scanStack(&stack.front(), &stack.back()); 111 | #if 1 112 | gc::object::sweep(); 113 | #else 114 | auto [preserved, freed] = gc::object::sweep(); 115 | if (freed > 0) 116 | cout << "GC: freed " << freed << " objects; " << preserved << " left.\n"; 117 | #endif 118 | } 119 | 120 | 121 | static constexpr int kPromptIndent = 40; 122 | 123 | 124 | // Right-justified output 125 | static void print(const string &str) { 126 | size_t len = min(str.size(), size_t(kPromptIndent)); 127 | size_t start = str.size() - len; 128 | cout << string(kPromptIndent - len, ' ') << str.substr(start, len); 129 | } 130 | 131 | 132 | // Print stack, right-justified 133 | static void print(const Stack &stack) { 134 | stringstream out; 135 | for (tails::Value v : stack) 136 | out << v << ' '; 137 | print(out.str()); 138 | } 139 | } 140 | 141 | using namespace repl; 142 | 143 | 144 | int main(int argc, const char **argv) { 145 | tails::Vocabulary defaultVocab(tails::word::kWords); 146 | tails::Compiler::activeVocabularies.push(defaultVocab); 147 | tails::Compiler::activeVocabularies.setCurrent(defaultVocab); 148 | 149 | cout << "Tails interpreter!! Empty line clears stack. Ctrl-D to exit.\n"; 150 | Stack stack; 151 | while (true) { 152 | print(stack); 153 | cout.flush(); 154 | optional line = readLine(" ➤ "); 155 | if (!line) 156 | break; 157 | else if (line->empty()) { 158 | if (stack.empty()) { 159 | print("Cleared stack."); 160 | cout << '\n'; 161 | } 162 | stack.clear(); 163 | } else { 164 | try { 165 | eval(*line, stack); 166 | tails::word::endLine(); 167 | garbageCollect(stack); 168 | } catch (const tails::compile_error &x) { 169 | if (x.location) { 170 | auto pos = x.location - line->data(); 171 | assert(pos >= 0 && pos <= line->size()); 172 | cout << string(kPromptIndent + 3 + pos, ' ') << "⬆︎\n"; 173 | } 174 | cout << string(kPromptIndent + 3, ' ') << "Error: " << x.what() << "\n"; 175 | } 176 | } 177 | } 178 | return 0; 179 | } 180 | -------------------------------------------------------------------------------- /src/test.cc: -------------------------------------------------------------------------------- 1 | // 2 | // test.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "core_words.hh" 20 | #include "compiler.hh" 21 | #include "disassembler.hh" 22 | #include "gc.hh" 23 | #include "more_words.hh" 24 | #include "stack_effect_parser.hh" 25 | #include "vocabulary.hh" 26 | #include "io.hh" 27 | #include 28 | #include 29 | #include 30 | 31 | // Need assert() even in a release build 32 | #ifdef NDEBUG 33 | #undef NDEBUG 34 | #include 35 | #define NDEBUG 1 36 | #else 37 | #include 38 | #endif 39 | 40 | using namespace std; 41 | using namespace tails; 42 | 43 | #ifdef ENABLE_TRACING 44 | // Exposed while running, for the TRACE function to use 45 | static Value * StackBase; 46 | #endif 47 | 48 | 49 | /// Top-level function to run a Word. 50 | /// @return The top value left on the stack. 51 | static Value run(const Word &word) { 52 | assert(!word.isNative()); // must be interpreted 53 | assert(word.stackEffect().inputCount() == 0); // must not require any inputs 54 | assert(word.stackEffect().outputCount() > 0); // must produce results 55 | size_t stackSize = word.stackEffect().max(); 56 | assert(stackSize >= word.stackEffect().outputCount()); 57 | std::vector stack; 58 | stack.resize(stackSize); 59 | auto stackBase = &stack.front(); 60 | #ifdef ENABLE_TRACING 61 | StackBase = stackBase; 62 | #endif 63 | return * call(stackBase - 1, word.instruction().word); 64 | } 65 | 66 | 67 | static void garbageCollect() { 68 | Compiler::activeVocabularies.gcScan(); 69 | auto [preserved, freed] = gc::object::sweep(); 70 | cout << "GC: freed " << freed << " objects; " << preserved << " left.\n"; 71 | } 72 | 73 | 74 | //======================== TEST CODE ========================// 75 | 76 | 77 | #ifdef ENABLE_TRACING 78 | namespace tails { 79 | /// Tracing function called at the end of each native op -- prints the stack 80 | void TRACE(Value *sp, const Instruction *pc) { 81 | cout << "\tbefore " << setw(14) << pc; 82 | auto dis = Disassembler::wordOrParamAt(pc); 83 | cout << " " << setw(12) << std::left << dis.word->name(); 84 | cout << ": "; 85 | for (auto i = StackBase; i <= sp; ++i) 86 | cout << ' ' << *i; 87 | cout << '\n'; 88 | } 89 | } 90 | #endif 91 | 92 | 93 | static void printStackEffect(StackEffect f) { 94 | cout << "Stack effect: (" << f << "), max stack " << f.max() << "\n"; 95 | } 96 | 97 | 98 | static void printDisassembly(const Word *word) { 99 | auto dis = Disassembler::disassembleWord(word->instruction().word, true); 100 | for (auto &wordRef : dis) { 101 | cout << ' ' << (wordRef.word->name() ? wordRef.word->name() : "???"); 102 | if (wordRef.word->hasIntParams()) 103 | cout << "+<" << (int)wordRef.param.offset << '>'; 104 | else if (wordRef.word->hasValParams()) 105 | cout << ":<" << wordRef.param.literal << '>'; 106 | else if (wordRef.word->hasWordParams()) 107 | cout << ":<" << Compiler::activeVocabularies.lookup(wordRef.param.word)->name() << '>'; 108 | } 109 | } 110 | 111 | 112 | static void _test(std::initializer_list words, 113 | const char *sourcecode, 114 | double expected) 115 | { 116 | cout << "* Testing {" << sourcecode << "} ...\n"; 117 | CompiledWord word = Compiler::compile(words); 118 | printStackEffect(word.stackEffect()); 119 | Value result = run(word); 120 | cout << "\t-> got " << result << "\n"; 121 | assert(result == Value(expected)); 122 | } 123 | 124 | 125 | static Value _runParser(const char *source) { 126 | cout << "* Parsing “" << source << "”\n"; 127 | Compiler compiler; 128 | compiler.parse(string(source)); 129 | CompiledWord parsed(std::move(compiler)); 130 | 131 | cout << "\tDisassembly:"; 132 | printDisassembly(&parsed); 133 | cout << "\n"; 134 | 135 | printStackEffect(parsed.stackEffect()); 136 | 137 | Value result = run(parsed); 138 | cout << "\t-> got " << result << '\n'; 139 | return result; 140 | } 141 | 142 | 143 | #define TEST(EXPECTED, ...) _test({__VA_ARGS__}, #__VA_ARGS__, EXPECTED) 144 | 145 | #define TEST_PARSER(EXPECTED, SRC) assert(_runParser(SRC) == EXPECTED) 146 | 147 | 148 | using namespace tails::core_words; 149 | 150 | 151 | __unused static constexpr StackEffect kSomeTS = "x# -- y#"_sfx; 152 | 153 | 154 | static void testStackEffect() { 155 | StackEffect ts = "--"_sfx; 156 | assert(ts.inputCount() == 0); 157 | assert(ts.outputCount() == 0); 158 | 159 | ts = "a -- b"_sfx; 160 | assert(ts.inputCount() == 1); 161 | assert(ts.outputCount() == 1); 162 | assert(ts.inputs()[0].flags() == 0x1F); 163 | assert(ts.outputs()[0].flags() == 0x1F); 164 | 165 | ts = "aaa# bbb#? -- ccc$ [d_d]?"_sfx; 166 | assert(ts.inputCount() == 2); 167 | assert(ts.outputCount() == 2); 168 | assert(ts.inputs()[0].flags() == 0x03); 169 | assert(ts.inputs()[1].flags() == 0x02); 170 | assert(ts.outputs()[0].flags() == 0x09); 171 | assert(ts.outputs()[1].flags() == 0x04); 172 | assert(!ts.outputs()[0].isInputMatch()); 173 | assert(ts.outputs()[0].inputMatch() == -1); 174 | 175 | ts = "apple ball# cat -- ball# cat apple"_sfx; 176 | assert(ts.inputCount() == 3); 177 | assert(ts.outputCount() == 3); 178 | assert(ts.inputs()[0].flags() == 0x1F); 179 | assert(ts.inputs()[1].flags() == 0x02); 180 | assert(ts.inputs()[2].flags() == 0x1F); 181 | assert(ts.outputs()[0].isInputMatch()); 182 | assert(ts.outputs()[0].inputMatch() == 2); 183 | assert(ts.outputs()[1].inputMatch() == 0); 184 | assert(ts.outputs()[2].inputMatch() == 1); 185 | assert(ts.outputs()[0].flags() == 0x7F); 186 | assert(ts.outputs()[1].flags() == 0x3F); 187 | assert(ts.outputs()[2].flags() == 0x42); 188 | } 189 | 190 | 191 | int main(int argc, char *argv[]) { 192 | Vocabulary defaultVocab(word::kWords); 193 | Compiler::activeVocabularies.push(defaultVocab); 194 | Compiler::activeVocabularies.setCurrent(defaultVocab); 195 | 196 | testStackEffect(); 197 | 198 | cout << "Known words:"; 199 | for (auto word : Compiler::activeVocabularies) 200 | cout << ' ' << word->name(); 201 | cout << "\n"; 202 | 203 | garbageCollect(); 204 | 205 | TEST(-1234, -1234); 206 | TEST(-1, 3, 4, MINUS); 207 | TEST(0.75, 3, 4, DIV); 208 | TEST(1, 1, 2, 3, ROT); 209 | TEST(1234, -1234, ABS); 210 | TEST(1234, 1234, ABS); 211 | TEST(4, 3, 4, MAX); 212 | TEST(4, 4, 3, MAX); 213 | 214 | CompiledWord SQUARE( []() { 215 | Compiler c("SQUARE"); 216 | c.setStackEffect("# -- #"_sfx); 217 | c.setInline(); 218 | c.add({DUP}); 219 | c.add({MULT}); 220 | return c; 221 | }()); 222 | 223 | TEST(16, 4, SQUARE); 224 | 225 | TEST(9604, 226 | 4, 227 | 3, 228 | PLUS, 229 | SQUARE, 230 | DUP, 231 | PLUS, 232 | SQUARE, 233 | ABS); 234 | 235 | TEST_PARSER(7, "3 -4 -"); 236 | TEST_PARSER(14, "4 3 + DUP + ABS"); 237 | TEST_PARSER(9604, "4 3 + SQUARE DUP + SQUARE ABS"); 238 | TEST_PARSER(2 , "2 ABS ABS ABS"); // testing INTERP2/3/4 239 | TEST_PARSER(123, "1 IF 123 ELSE 666 THEN"); 240 | TEST_PARSER(666, "0 IF 123 ELSE 666 THEN"); 241 | 242 | TEST_PARSER(120, "1 5 begin dup while swap over * swap 1 - repeat drop"); 243 | 244 | garbageCollect(); 245 | 246 | // Strings: 247 | TEST_PARSER("hello", R"( "hello" )"); 248 | TEST_PARSER("truthy", R"( 1 IF "truthy" ELSE "falsey" THEN )"); 249 | TEST_PARSER("HiThere", R"( "Hi" "There" + )"); 250 | TEST_PARSER(5, R"( "hello" LENGTH )"); 251 | 252 | // Arrays: 253 | TEST_PARSER(Value({12,34,56}), R"( [12 34 56] )"); 254 | TEST_PARSER(Value({Value(12)}), R"( [12] )"); 255 | TEST_PARSER(Value({12,"hi there",Value({}),56}), 256 | R"( [12 "hi there" [] 56] )"); 257 | TEST_PARSER(3, R"( [12 34 56] LENGTH )"); 258 | 259 | garbageCollect(); 260 | 261 | // Quotations and IFELSE: 262 | TEST_PARSER(3, R"( 3 {DUP 4} DROP )"); 263 | 264 | TEST_PARSER("yes", R"( 1 {"yes"} {"no"} IFELSE )"); 265 | TEST_PARSER("no", R"( 0 {"yes"} {"no"} IFELSE )"); 266 | 267 | TEST_PARSER(12, R"( 3 4 1 {(# # -- #) *} {(# # -- #) +} IFELSE )"); 268 | TEST_PARSER(7, R"( 3 4 0 {*} {+} IFELSE )"); 269 | 270 | TEST_PARSER(12, R"( 3 4 1 {*} {DROP} IFELSE )"); 271 | TEST_PARSER(3, R"( 3 4 0 {*} {DROP} IFELSE )"); 272 | 273 | // Writing to stdout: 274 | TEST_PARSER(0, R"( "Hello" . SP. 17 . NL. 0 )"); 275 | 276 | // Defining a new word: 277 | TEST_PARSER(0, R"( {(# -- #) 3 *} "thrice" define 0 )"); 278 | TEST_PARSER(72, R"( 8 thrice Thrice )"); 279 | 280 | // Define a typical recursive factorial function: 281 | TEST_PARSER(0, R"( {(# -- #) DUP 1 > IF DUP 1 - RECURSE * ELSE DROP 1 THEN} "factorial" define 0 )"); 282 | TEST_PARSER(120, R"( 5 factorial )"); 283 | auto fact = Compiler::activeVocabularies.lookup("factorial"); 284 | assert(fact); 285 | assert(fact->hasFlag(Word::Recursive)); 286 | 287 | // Define a tail-recursive form of factorial: 288 | // fact(a, n) -> fact(a * n, n - 1) when n > 1 289 | // -> a when n ≤ 1 290 | // n! -> fact(1, n) 291 | cout << '\n'; 292 | TEST_PARSER(0, R"( {(f# i# -- result#) DUP 1 > IF DUP ROT * SWAP 1 - RECURSE ELSE DROP THEN} "fact" define 0 )"); 293 | fact = Compiler::activeVocabularies.lookup("fact"); 294 | assert(fact); 295 | cout << "`fact` stack effect: "; 296 | printStackEffect(fact->stackEffect()); 297 | cout << "`fact` disassembly: "; 298 | printDisassembly(fact); 299 | cout << "\n"; 300 | assert(!fact->hasFlag(Word::Recursive)); 301 | assert(fact->stackEffect().max() == 2); 302 | 303 | TEST_PARSER(120, R"( 1 5 fact )"); 304 | 305 | // Define a tail-recursive form of triangle-number: 306 | cout << '\n'; 307 | TEST_PARSER(0, R"( {(f# i# -- result#) DUP 1 > IF DUP ROT + SWAP 1 - RECURSE ELSE DROP THEN} "tri" define 0 )"); 308 | auto tri = Compiler::activeVocabularies.lookup("tri"); 309 | assert(tri); 310 | cout << "`tri` stack effect: "; 311 | printStackEffect(tri->stackEffect()); 312 | cout << "`tri` disassembly: "; 313 | printDisassembly(tri); 314 | cout << "\n"; 315 | assert(!tri->hasFlag(Word::Recursive)); 316 | assert(tri->stackEffect().max() == 2); 317 | 318 | TEST_PARSER(15, R"( 1 5 tri )"); 319 | 320 | #ifdef NDEBUG 321 | cout << "Running performance test, `1 100000000 tri` ...\n"; 322 | auto start = std::chrono::steady_clock::now(); 323 | auto result = _runParser(R"( 1 100000000 tri )"); 324 | assert(result.asDouble() == (1e8 * (1e8 + 1)) / 2); 325 | auto end = std::chrono::steady_clock::now(); 326 | std::chrono::duration diff = end - start; 327 | cout << "Got " << result << endl; 328 | cout << "Time to compute tri(1e8): " << diff.count() << " s; " << (diff.count() / 1e8 * 1e9) << " ns / iteration\n"; 329 | #endif 330 | 331 | garbageCollect(); 332 | assert(gc::object::instanceCount() == 0); 333 | 334 | cout << "\nTESTS PASSED❣️❣️❣️\n\n"; 335 | } 336 | -------------------------------------------------------------------------------- /src/values/gc.cc: -------------------------------------------------------------------------------- 1 | // 2 | // gc.cc 3 | // 4 | // Copyright (C) 2020 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "gc.hh" 20 | #include "compiler.hh" // just for CompiledWord 21 | #include "value.hh" 22 | #include "word.hh" 23 | #include "core_words.hh" 24 | 25 | namespace tails::gc { 26 | using namespace std; 27 | using namespace tails; 28 | 29 | 30 | object* object::sFirst = nullptr; 31 | size_t object::sInstanceCount = 0; 32 | 33 | 34 | object::object(int type) 35 | :_next(intptr_t(sFirst) | (type & kTypeBits)) 36 | { 37 | assert(next() == sFirst); 38 | sFirst = this; 39 | ++sInstanceCount; 40 | } 41 | 42 | void object::scanStack(const Value *bottom, const Value *top) { 43 | if (bottom && top) { 44 | for (auto val = bottom; val <= top; ++val) 45 | val->mark(); 46 | } 47 | } 48 | 49 | 50 | void object::scanWord(const Word *word) { 51 | if (!word->isNative()) { 52 | for (const Instruction *pc = word->instruction().word; *pc != core_words::_RETURN; ++pc) { 53 | if (*pc == core_words::_LITERAL) 54 | (++pc)->literal.mark(); 55 | } 56 | } 57 | } 58 | 59 | 60 | pair object::sweep() { 61 | size_t freed = 0, kept = 0; 62 | object *next, *prev = nullptr, *o; 63 | bool fixLink = false; 64 | 65 | auto updateLink = [&] { 66 | if (fixLink) { 67 | if (prev) prev->setNext(o); else sFirst = o; 68 | fixLink = false; 69 | } 70 | }; 71 | 72 | for (o = first(); o; o = next) { 73 | next = o->next(); 74 | if (o->isMarked()) { 75 | o->unmark(); 76 | updateLink(); 77 | prev = o; 78 | ++kept; 79 | } else { 80 | // Free unmarked objects: 81 | o->collect(); 82 | fixLink = true; 83 | ++freed; 84 | } 85 | } 86 | updateLink(); 87 | assert(kept + freed == sInstanceCount); 88 | sInstanceCount -= freed; 89 | return {kept, freed}; 90 | } 91 | 92 | 93 | void object::collect() { 94 | switch (type()) { 95 | case kStringType: delete (String*)this; break; 96 | case kArrayType: delete (Array*)this; break; 97 | case kQuoteType: delete (Quote*)this; break; 98 | default: break; 99 | } 100 | } 101 | 102 | 103 | #pragma mark - STRING: 104 | 105 | 106 | String::String(size_t len) 107 | :object(kStringType) 108 | ,_len(uint32_t(len)) 109 | { 110 | assert(len < UINT32_MAX); 111 | _data[len] = 0; 112 | } 113 | 114 | 115 | String::String(std::string_view str) 116 | :String(str.size()) 117 | { 118 | memcpy(_data, str.data(), str.size()); 119 | } 120 | 121 | 122 | #pragma mark - ARRAY: 123 | 124 | 125 | void Array::mark() { 126 | if (object::mark()) { 127 | for (auto val : _array) 128 | val.mark(); 129 | } 130 | } 131 | 132 | 133 | #pragma mark - QUOTE: 134 | 135 | 136 | Quote::Quote(CompiledWord *word) 137 | :object(kQuoteType) 138 | ,_word(word) 139 | { } 140 | 141 | Quote::~Quote() = default; 142 | 143 | void Quote::mark() { 144 | if (object::mark()) 145 | scanWord(_word.get()); 146 | } 147 | 148 | } 149 | -------------------------------------------------------------------------------- /src/values/gc.hh: -------------------------------------------------------------------------------- 1 | // 2 | // gc.hh 3 | // 4 | // Copyright (C) 2020 Jens Alfke. All Rights Reserved. 5 | // 6 | 7 | #pragma once 8 | #include "value.hh" 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace tails { 15 | class Word; 16 | class CompiledWord; 17 | } 18 | 19 | namespace tails::gc { 20 | 21 | /// Abstract base class of garbage collected objects (referenced by Values.) 22 | /// This class hierarchy doesn't use C++ virtual methods; instead the subclass is indicated by 23 | /// two bits in the `_next` pointer. (The only time the subclass needs to be determined this 24 | /// way is when freeing an object; otherwise the Value that points to the object already knows 25 | /// the type.) 26 | class object { 27 | public: 28 | /// Marks all objects found in the stack from `bottom` to `top` (inclusive.) 29 | static void scanStack(const Value *bottom, const Value *top); 30 | /// Marks all object literals found in a word. 31 | static void scanWord(const Word*); 32 | 33 | /// Frees all objects that have not been marked. 34 | /// Returns the number still alive, and the number freed. 35 | static std::pair sweep(); 36 | 37 | static object* first() {return sFirst;} 38 | object* next() const {return (object*)(_next & ~kTagBits);} 39 | static size_t instanceCount() {return sInstanceCount;} 40 | 41 | int type() const {return _next & kTypeBits;} 42 | 43 | protected: 44 | object(int type); 45 | bool mark() {bool chg = !isMarked(); _next |= kMarkedBit; return chg;} 46 | void unmark() {_next &= ~kMarkedBit;} 47 | bool isMarked() const {return (_next & kMarkedBit) != 0;} 48 | void collect(); 49 | void setNext(object *o) {_next = (intptr_t(o) & ~kTagBits) | (_next & kTagBits);} 50 | 51 | enum { 52 | kTypeBits = 0x3, // Bits 0,1 indicate the object's subclass 53 | kStringType = 0x1, 54 | kArrayType = 0x2, 55 | kQuoteType = 0x3, 56 | kMarkedBit = 0x4, // Bit 2 is set when object is marked as live during GC 57 | kTagBits = kTypeBits | kMarkedBit 58 | }; 59 | 60 | private: 61 | static object* sFirst; // Start of linked list of all allocated objects 62 | static size_t sInstanceCount; 63 | 64 | intptr_t _next; // Pointer to next object, plus 3 tag bits 65 | }; 66 | 67 | 68 | /// A heap-allocated garbage-collected string. 69 | class String : public object { 70 | public: 71 | static String* make(size_t len) {return new (len) String(len);} 72 | static String* make(std::string_view s) {return new (s.size()) String(s);} 73 | const char* c_str() const {return _data;} 74 | std::string_view string_view() const {return std::string_view(_data, _len);} 75 | /// Marks this string as in use. 76 | void mark() {object::mark();} 77 | 78 | static void operator delete(void *ptr) {::operator delete(ptr);} 79 | private: 80 | static void* operator new(size_t baseSize, size_t extra) { 81 | return ::operator new(baseSize + extra); 82 | } 83 | 84 | String(size_t len); 85 | String(std::string_view str); 86 | 87 | uint32_t _len; 88 | char _data[1]; // actual length is variable 89 | }; 90 | 91 | 92 | /// A heap-allocated garbage-collected array. 93 | class Array : public object { 94 | public: 95 | Array() :object(kArrayType) { } 96 | Array(std::vector&& a) :object(kArrayType), _array(std::move(a)) { } 97 | std::vector& array() {return _array;} 98 | /// Marks this array, and all objects in it, as in use. 99 | void mark(); 100 | private: 101 | std::vector _array; 102 | }; 103 | 104 | 105 | /// A heap-allocated garbage-collected anonymous Word. 106 | class Quote : public object { 107 | public: 108 | Quote(CompiledWord*); 109 | ~Quote(); 110 | const CompiledWord* word() const {return _word.get();} 111 | /// Marks this quote, and any objects it references as literals, as in use. 112 | void mark(); 113 | private: 114 | std::unique_ptr _word; 115 | }; 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/values/nan_tagged.hh: -------------------------------------------------------------------------------- 1 | // 2 | // nan_tagged.hh 3 | // 4 | // Copyright (C) 2020 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | // Inspiration and details from: 20 | // https://www.npopov.com/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html 21 | 22 | #pragma once 23 | #include "platform.hh" 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include // for memcpy() 29 | 30 | namespace tails { 31 | 32 | struct slice { 33 | const void *data; 34 | size_t size; 35 | }; 36 | 37 | /** A self-describing 8-byte value that can store a double, a pointer, or six bytes of inline 38 | data; and can identify which it's holding at any time. 39 | (By virtue of storing doubles, it can also store exact integers up to ±2^51.) 40 | Uses the so-called "NaN tagging" or "Nan boxing" trick that's used by several dynamic 41 | language runtimes, such as LuaJIT and both WebKit's and Mozilla's JavaScript VMs. 42 | 43 | Theory of operation: 44 | - All non-NaN double values represent themselves. 45 | - "Quiet" NaNs with the leading bits 0x7ff8 or 0xfff8 are special: 46 | - If the sign bit is set, the lower 48 bits are a pointer, which will be extended to 47 | 64 bits. (No current mainstream CPUs use more than 48 bits of address space.) 48 | - Otherwise the lower 48 bits are 6 bytes of inline data. 49 | - Two tag bits are available; you could use them to distinguish between four types of 50 | pointers or inline data, for instance. */ 51 | template 52 | class NanTagged { 53 | public: 54 | /** How many bytes of inline data I can hold. */ 55 | static constexpr size_t kInlineCapacity = 6; 56 | 57 | constexpr NanTagged() noexcept :_bits(kPointerType) { } 58 | constexpr NanTagged(std::nullptr_t) noexcept :_bits(kPointerType) { } 59 | constexpr NanTagged(double d) noexcept {setDouble(d);} 60 | NanTagged(const TO *ptr) noexcept {setPointer(ptr);} 61 | constexpr NanTagged(std::initializer_list b) noexcept {setInline(b);} 62 | 63 | bool operator== (NanTagged n) const noexcept _pure {return _bits == n._bits;} 64 | bool operator!= (NanTagged n) const noexcept _pure {return _bits != n._bits;} 65 | 66 | // Type testing: 67 | 68 | constexpr bool isDouble() const noexcept _pure {return (_bits & kMagicBits) != kMagicBits;} 69 | constexpr bool isPointer() const noexcept _pure {return (_bits & kTypeMask) == kPointerType;} 70 | constexpr bool isInline() const noexcept _pure {return (_bits & kTypeMask) == kInlineType;} 71 | 72 | constexpr bool isNullPointer() const noexcept _pure {return _bits == kPointerType;} 73 | 74 | // Getters: 75 | 76 | /// Returns the `double` this stores, or an `NaN` if it's not holding a double. 77 | constexpr double asDouble() const noexcept _pure {return _asDouble;} 78 | /// Returns the `double` this stores, or 0.0 if it's not holding a double. 79 | constexpr double asDoubleOrZero() const noexcept _pure{return isDouble() ? _asDouble : 0.0;} 80 | /// Returns the pointer this stores, or nullptr if it's not holding a pointer. 81 | constexpr const TO* asPointer() const noexcept _pure {return isPointer() ? pointerValue() : nullptr;} 82 | /// Returns the inline data this stores, or `{nullptr,0}` if it's not inline. 83 | constexpr slice asInline() const noexcept _pure {return isInline() ? inlineValue() : slice();} 84 | 85 | 86 | // Pointer & inline values have two free tag bits: 87 | constexpr bool tag1() const noexcept _pure {return (_bits & kTagBit1) != 0; } 88 | constexpr bool tag2() const noexcept _pure {return (_bits & kTagBit2) != 0; } 89 | constexpr int tags() const noexcept _pure {return int((_bits >> 48) & 0x03);} 90 | 91 | // Setters: 92 | 93 | void setDouble(double d) noexcept { 94 | // We can't accept a double that's a NaN, because it could match our magic bit pattern 95 | // and be mistaken for a pointer or inline. Therefore we turn any NaN value into null. 96 | if (isnan(d)) 97 | setPointer(nullptr); 98 | else 99 | _asDouble = d; 100 | } 101 | 102 | void setPointer(const TO *p) noexcept { 103 | _bits = (uint64_t)p | kPointerType; 104 | } 105 | 106 | /// Makes this an inline value, containing all zeroes, and returns a pointer to the 107 | /// inline storage so you can write to it. 108 | void* setInline() noexcept {_bits = kMagicBits; return &_bytes[kInlineOffset];} 109 | 110 | /// Makes this an inline value, copying the input bytes. 111 | /// \note The input length is not preserved; \ref asInline will return all 6 bytes. 112 | void setInline(slice bytes) noexcept { 113 | assert(bytes.size <= kInlineCapacity); 114 | setInline(); 115 | ::memcpy(&_bytes[0], bytes.data, bytes.size); 116 | } 117 | 118 | void setInline(std::initializer_list inlineBytes) noexcept { 119 | setInline({inlineBytes.begin(), inlineBytes.size()}); 120 | } 121 | 122 | void setTag1(bool b) noexcept {if (b) _bits |= kTagBit1; else _bits &= ~kTagBit1;} 123 | void setTag2(bool b) noexcept {if (b) _bits |= kTagBit2; else _bits &= ~kTagBit2;} 124 | 125 | void setTags(int t) noexcept { 126 | _bits = (_bits & ~(kTagBit1 | kTagBit2)) | (uint64_t(t & 0x03) << 48); 127 | } 128 | 129 | protected: 130 | NanTagged(void**) { } // no-op initializer for subclass constructors to call 131 | const TO* pointerValue() const noexcept _pure {return (TO*)(_bits & kPtrBits);} 132 | slice inlineValue() const noexcept _pure {return {&_bytes[kInlineOffset], kInlineCapacity};} 133 | 134 | private: 135 | static constexpr uint64_t kSignBit = 0x8000000000000000; // Sign bit of a double 136 | static constexpr uint64_t kQNaNBits = 0x7ff8000000000000; // Bits set in a 'quiet' NaN 137 | static constexpr uint64_t kMagicBits= 0x7ffc000000000000; // Bits that indicate non-double 138 | static constexpr uint64_t kTagBit1 = 0x0001000000000000; // Two pointer tag bits 139 | static constexpr uint64_t kTagBit2 = 0x0002000000000000; // Two pointer tag bits 140 | static constexpr uint64_t kPtrBits = 0x0000FFFFFFFFFFFF; // Bits available to pointers 141 | 142 | static constexpr uint64_t kTypeMask = kMagicBits | kSignBit; // Bits involved in tagging 143 | static constexpr uint64_t kPointerType = kMagicBits | kSignBit; // Tag bits in a pointer 144 | static constexpr uint64_t kInlineType = kMagicBits; // Tag bits in an inline 145 | 146 | 147 | // In little-endian the 51 free bits are at the start, in big-endian at the end. 148 | #ifdef __BIG_ENDIAN__ 149 | static constexpr auto kInlineOffset = 2; 150 | #else 151 | static constexpr auto kInlineOffset = 0; 152 | #endif 153 | 154 | union { 155 | double _asDouble; 156 | uint64_t _bits; 157 | uint8_t _bytes[sizeof(double)]; 158 | }; 159 | }; 160 | 161 | 162 | // Sanity checking: 163 | static_assert(sizeof(double) == 8); 164 | static_assert(sizeof(void*) <= 8); 165 | } 166 | -------------------------------------------------------------------------------- /src/values/value.cc: -------------------------------------------------------------------------------- 1 | // 2 | // value.cc 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #include "value.hh" 20 | #include "gc.hh" 21 | #include "compiler.hh" // just for CompiledWord 22 | #include "io.hh" 23 | #include 24 | #include 25 | #include 26 | 27 | namespace tails { 28 | using namespace std; 29 | 30 | /** 31 | ### Value data representation: 32 | 33 | `Value` is a subclass of `NanTagged` (see nan_tagged.hh), which magically allows numbers, 34 | pointers and inline data to be stored in 64 bits. It exposes a `double`, a pointer, 35 | two tag bits, and an "inline" flag. 36 | 37 | - A number is represented as a regular `double` value. (This includes exact storage of integers 38 | up to ±2^51.) 39 | - A string has `kStringTag`. If up to 6 bytes long it can be stored inline; the length is 40 | determined by the number of trailing zero bytes. Otherwise it points to a gc::String object. 41 | - An array has `kArrayTag` and points to a `gc::Array` object. (It's never inline.) 42 | - A quotation has `kQuoteTag` and points to a `gc::Quote` object. (It's never inline.) 43 | - Null is a singleton value that's tagged as a String but has a null pointer. 44 | */ 45 | 46 | 47 | Value::Value(const char* str) 48 | :Value(str, (str ? strlen(str) : 0)) 49 | { } 50 | 51 | 52 | Value::Value(const char* str, size_t len) 53 | :NanTagged((void**)0) 54 | { 55 | if (str == nullptr) { 56 | assert(len == 0); 57 | setPointer(nullptr); 58 | } else { 59 | char *dst = allocString(len); 60 | memcpy(dst, str, len); 61 | } 62 | } 63 | 64 | 65 | Value::Value(std::initializer_list arrayItems) 66 | :NanTagged((void**)0) 67 | { 68 | vector array(arrayItems); 69 | setPointer(new gc::Array(std::move(array))); 70 | setTags(kArrayTag); 71 | } 72 | 73 | 74 | Value::Value(vector &&array) 75 | :NanTagged((void**)0) 76 | { 77 | setPointer(new gc::Array(std::move(array))); 78 | setTags(kArrayTag); 79 | } 80 | 81 | 82 | Value::Value(CompiledWord *word) 83 | :NanTagged(word) 84 | { 85 | assert(word); 86 | setPointer(new gc::Quote(word)); 87 | setTags(kQuoteTag); 88 | } 89 | 90 | 91 | Value::Type Value::type() const { 92 | if (isDouble()) 93 | return ANumber; 94 | else if (isNullPointer()) 95 | return ANull; 96 | else 97 | return Type(int(AString) + tags()); 98 | } 99 | 100 | 101 | const char* Value::typeName(Type type) { 102 | static constexpr const char* kNames[5] = { 103 | "null", "number", "string", "array", "quotation" 104 | }; 105 | return kNames[type]; 106 | } 107 | 108 | 109 | // Makes me a string with space for a string `len` bytes long. 110 | // Returns a pointer to the storage. 111 | char* Value::allocString(size_t len) { 112 | if (len <= NanTagged::kInlineCapacity) { 113 | return (char*)setInline(); 114 | } else { 115 | auto heapStr = gc::String::make(len); 116 | setPointer(heapStr); 117 | return (char*)heapStr->c_str(); 118 | } 119 | } 120 | 121 | 122 | string_view Value::asString() const { 123 | if (tags() == kStringTag && !isDouble()) { 124 | if (isInline()) { 125 | // The inline string ends before the first 0 byte, else at the end of the inline data. 126 | auto str = (const char*)asInline().data; 127 | size_t len = 0; 128 | while (str[len] != 0 && len < NanTagged::kInlineCapacity) 129 | ++len; 130 | return string_view(str, len); 131 | } else if (!isNull()) { 132 | return ((gc::String*)asPointer())->string_view(); 133 | } 134 | } 135 | return string_view(); 136 | } 137 | 138 | 139 | vector* Value::asArray() const { 140 | if (tags() == kArrayTag) 141 | return &((gc::Array*)asPointer())->array(); 142 | return nullptr; 143 | } 144 | 145 | 146 | const Word* Value::asQuote() const { 147 | if (tags() == kQuoteTag) 148 | return ((gc::Quote*)asPointer())->word(); 149 | return nullptr; 150 | } 151 | 152 | 153 | void Value::mark() const { 154 | switch (tags()) { 155 | case kStringTag: 156 | if (!isDouble() && !isInline()) 157 | ((gc::String*)asPointer())->mark(); 158 | break; 159 | case kArrayTag: 160 | ((gc::Array*)asPointer())->mark(); 161 | break; 162 | case kQuoteTag: 163 | ((gc::Quote*)asPointer())->mark(); 164 | default: 165 | break; 166 | } 167 | } 168 | 169 | 170 | Value::operator bool() const { 171 | if (isDouble()) 172 | return asDouble() != 0; 173 | else 174 | return !isNull(); 175 | } 176 | 177 | bool Value::operator== (const Value &v) const { 178 | if (NanTagged::operator==(v)) 179 | return true; 180 | Type myType = type(); 181 | if (myType == ANull || myType == ANumber || v.type() != myType) 182 | return false; 183 | else if (myType == AString) 184 | return asString() == v.asString(); 185 | else if (myType == AnArray) 186 | return *asArray() == *v.asArray(); 187 | else 188 | return false; 189 | } 190 | 191 | 192 | int Value::cmp(Value v) const { 193 | Type myType = type(), vType = v.type(); 194 | if (myType != vType) 195 | return int(myType) - int(vType); 196 | switch (myType) { 197 | case ANull: 198 | return 0; 199 | case ANumber: 200 | return _cmp(asDouble(), v.asDouble()); 201 | case AString: 202 | return asString().compare(v.asString()); 203 | case AnArray: { 204 | const vector *a = asArray(), *b = v.asArray(); 205 | auto ia = a->begin(), ib = b->begin(); 206 | for (size_t n = min(a->size(), b->size()); n > 0; --n, ++ia, ++ib) { 207 | if (int c = ia->cmp(*ib); c != 0) 208 | return c; 209 | } 210 | return _cmp(a->size(), b->size()); 211 | } 212 | case AQuote: 213 | return _cmp(asQuote(), v.asQuote()); // arbitrary ordering by address 214 | } 215 | abort(); // unreachable, but GCC doesn't know that :p 216 | } 217 | 218 | 219 | Value Value::length() const { 220 | if (isString()) 221 | return asString().size(); 222 | else if (isArray()) 223 | return asArray()->size(); 224 | else 225 | return NullValue; 226 | } 227 | 228 | 229 | Value Value::operator+ (Value v) const { 230 | if (isDouble() || v.isDouble()) { 231 | // Addition: 232 | return Value(asDouble() + v.asDouble()); 233 | } else if (isString() && v.isString()) { 234 | // String concatenation: 235 | auto str1 = asString(), str2 = v.asString(); 236 | if (str1.size() == 0) 237 | return v; 238 | else if (str2.size() == 0) 239 | return *this; 240 | else { 241 | Value result; 242 | char *dst = result.allocString(str1.size() + str2.size()); 243 | memcpy(dst, str1.data(), str1.size()); 244 | memcpy(dst + str1.size(), str2.data(), str2.size()); 245 | return result; 246 | } 247 | } else if (isArray()) { 248 | // Add item to array: 249 | vector newArray(*asArray()); 250 | Value newVal(std::move(newArray)); 251 | newVal.asArray()->push_back(v); 252 | return newVal; 253 | } else { 254 | return NullValue; 255 | } 256 | } 257 | 258 | 259 | // Numeric-only operations don't need type checking. If either value is non-numeric, then 260 | // `asDouble` returns a NaN by definition, and the Value constructor changes that to `null`. 261 | 262 | Value Value::operator- (Value v) const { 263 | return Value(asDouble() - v.asDouble()); 264 | } 265 | 266 | 267 | Value Value::operator* (Value v) const { 268 | return Value(asDouble() * v.asDouble()); 269 | } 270 | 271 | 272 | Value Value::operator/ (Value v) const { 273 | return Value(asDouble() / v.asDouble()); 274 | } 275 | 276 | 277 | Value Value::operator% (Value v) const { 278 | // Modulo only operates on integers, and the denominator can't be zero: 279 | if (isDouble() && v.isDouble()) { 280 | if (int denom = v.asInt(); denom != 0) 281 | return Value(asInt() % denom); 282 | } 283 | return NullValue; 284 | } 285 | 286 | 287 | static std::ostream& operator<< (std::ostream &out, const vector &array) { 288 | out << '['; 289 | int n = 0; 290 | for (auto value : array) { 291 | if (n++ > 0) 292 | out << ", "; 293 | out << value; 294 | } 295 | out << ']'; 296 | return out; 297 | } 298 | 299 | 300 | std::ostream& operator<< (std::ostream &out, Value value) { 301 | switch (value.type()) { 302 | case Value::ANull: return out << "null"; 303 | case Value::ANumber: return out << value.asDouble(); 304 | case Value::AString: return out << std::quoted(value.asString()); 305 | case Value::AnArray: return out << *value.asArray(); 306 | case Value::AQuote: return out << "{(" << value.asQuote()->stackEffect() << ")}"; 307 | } 308 | return out; 309 | } 310 | 311 | } 312 | -------------------------------------------------------------------------------- /src/values/value.hh: -------------------------------------------------------------------------------- 1 | // 2 | // value.hh 3 | // 4 | // Copyright (C) 2021 Jens Alfke. All Rights Reserved. 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | // 18 | 19 | #pragma once 20 | #include "platform.hh" 21 | #include "nan_tagged.hh" 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | 28 | namespace tails { 29 | 30 | class Word; 31 | class CompiledWord; 32 | 33 | /// Type of values stored on the stack. 34 | /// 35 | /// This is a more complex implementation that can store numbers, strings, arrays, and 36 | /// "quotations" (anonymous words, aka lambdas.) This all still fits in 64 bits thanks to the 37 | /// magic of NaN Tagging. 38 | class Value : private NanTagged { 39 | public: 40 | constexpr Value() :NanTagged(nullptr) { } 41 | constexpr Value(nullptr_t) :Value() { } 42 | 43 | constexpr Value(double n) :NanTagged(n) { } 44 | constexpr Value(int n) :Value(double(n)) { } 45 | constexpr Value(size_t n) :Value(double(n)) { } 46 | 47 | Value(const char* str); 48 | Value(const char* str, size_t len); 49 | 50 | Value(std::initializer_list arrayItems); 51 | Value(std::vector&&); 52 | 53 | explicit Value(CompiledWord*); 54 | 55 | enum Type { 56 | ANull, 57 | ANumber, 58 | AString, 59 | AnArray, 60 | AQuote, 61 | }; 62 | 63 | Type type() const; 64 | static const char* typeName(Type); 65 | 66 | constexpr bool isNull() const {return NanTagged::isNullPointer();} 67 | constexpr bool isDouble() const {return NanTagged::isDouble();} 68 | constexpr bool isString() const {return (asPointer() || isInline()) && tags() == kStringTag;} 69 | constexpr bool isArray() const {return asPointer() && tags() == kArrayTag;} 70 | constexpr bool isQuote() const {return asPointer() && tags() == kQuoteTag;} 71 | 72 | constexpr double asNumber() const {return NanTagged::asDouble();} 73 | constexpr double asDouble() const {return NanTagged::asDouble();} 74 | constexpr int asInt() const {return int(asDoubleOrZero());} 75 | std::string_view asString() const; 76 | std::vector* asArray() const; 77 | const Word* asQuote() const; 78 | 79 | /// 'Truthiness' -- any Value except 0 and null is considered truthy. 80 | explicit operator bool() const; 81 | /// Equality comparison 82 | bool operator== (const Value &v) const; 83 | /// 3-way comparison, like the C++20 `<=>` operator. 84 | int cmp(Value v) const; 85 | 86 | // Arithmetic operators. `+` is overloaded to concatenate strings and arrays. 87 | Value operator+ (Value v) const; 88 | Value operator- (Value v) const; 89 | Value operator* (Value v) const; 90 | Value operator/ (Value v) const; 91 | Value operator% (Value v) const; 92 | 93 | /// Returns the length of a string or array; not valid for other types. 94 | Value length() const; 95 | 96 | /// Marks this value as in use during garbage collection. (See `gc.hh` for main GC API.) 97 | void mark() const; 98 | 99 | private: 100 | enum { kStringTag = 0, kArrayTag = 1, kQuoteTag = 2, }; 101 | 102 | char* allocString(size_t len); 103 | }; 104 | 105 | constexpr Value NullValue; 106 | 107 | static inline bool operator!= (const Value &a, const Value &b) {return !(a == b);} 108 | static inline bool operator> (const Value &a, const Value &b) {return a.cmp(b) > 0;} 109 | static inline bool operator>= (const Value &a, const Value &b) {return a.cmp(b) >= 0;} 110 | static inline bool operator< (const Value &a, const Value &b) {return a.cmp(b) < 0;} 111 | static inline bool operator<= (const Value &a, const Value &b) {return a.cmp(b) <= 0;} 112 | } 113 | --------------------------------------------------------------------------------