├── .clang-format ├── .clang-tidy ├── .dockerignore ├── .github └── workflows │ ├── ci.yml │ └── stage.yml ├── .gitignore ├── LICENSE ├── README.md ├── cli.c ├── compiler.py ├── compiler_tests.py ├── examples ├── 0_home │ ├── a.scrap │ ├── combinators.scrap │ └── factorial.scrap ├── 10_types │ ├── a.scrap │ ├── b.scrap │ ├── c.scrap │ ├── d.scrap │ ├── e.scrap │ ├── f.scrap │ └── g.scrap ├── 11_platforms │ └── web │ │ ├── Dockerfile │ │ ├── handler.scrap │ │ ├── web.c │ │ ├── web.h │ │ └── webplatform.py ├── 1_numbers │ ├── a.scrap │ ├── b.scrap │ └── c.scrap ├── 2_text │ ├── a.scrap │ ├── b.scrap │ └── c.scrap ├── 3_bytes │ ├── a.scrap │ └── b.scrap ├── 4_hole │ └── a.scrap ├── 5_variables │ ├── a.scrap │ ├── b.scrap │ ├── c.scrap │ └── d.scrap ├── 6_lists │ └── a.scrap ├── 7_records │ ├── a.scrap │ └── b.scrap ├── 8_operators │ ├── a.scrap │ ├── b.scrap │ └── c.scrap └── 9_functions │ ├── a.scrap │ ├── b.scrap │ ├── c.scrap │ ├── d.scrap │ ├── e.scrap │ ├── f.scrap │ ├── g.scrap │ ├── h.scrap │ └── i.scrap ├── fly.toml ├── hooks └── pre-commit ├── pyproject.toml ├── runtime.c ├── scrapscript.py ├── scrapscript_tests.py └── util ├── Dockerfile ├── build-com ├── compilerepl.html ├── index.html ├── repl.html ├── scrapscript.js └── style.css /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | AlignEscapedNewlinesLeft: false 3 | DerivePointerAlignment: false 4 | PointerAlignment: Left 5 | IncludeBlocks: Regroup 6 | IncludeCategories: 7 | # C system headers 8 | - Regex: '^<.*\.h?>' 9 | Priority: 1 10 | # Scrapscript headers 11 | - Regex: '.*' 12 | Priority: 2 13 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | Checks: > 2 | -*, 3 | bugprone-argument-comment, 4 | google-readability-casting, 5 | google-readability-todo, 6 | modernize-use-nullptr, 7 | readability-braces-around-statements, 8 | readability-else-after-return, 9 | readability-identifier-naming, 10 | readability-inconsistent-declaration-parameter-name, 11 | readability-static-accessed-through-instance, 12 | FormatStyle: file 13 | CheckOptions: 14 | - { key: readability-identifier-naming.ClassCase, value: CamelCase } 15 | - { key: readability-identifier-naming.ClassMemberCase, value: lower_case } 16 | - { key: readability-identifier-naming.ClassMemberSuffix, value: '_' } 17 | - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } 18 | - { key: readability-identifier-naming.GlobalConstantPrefix, value: 'k' } 19 | - { key: readability-identifier-naming.LocalVariableCase, value: lower_case } 20 | - { key: readability-identifier-naming.MethodCase, value: camelBack } 21 | - { key: readability-identifier-naming.NamespaceCase, value: lower_case } 22 | - { key: readability-identifier-naming.ParameterCase, value: lower_case } 23 | - { key: readability-braces-around-statements.ShortStatementLines, value: 1 } 24 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | fly.toml 2 | .git 3 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | branches: [ trunk ] 6 | pull_request: 7 | branches: [ trunk ] 8 | 9 | 10 | env: 11 | REGISTRY: ghcr.io 12 | IMAGE_NAME: ${{ github.repository }} 13 | 14 | 15 | jobs: 16 | run_lint: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Install uv 21 | uses: astral-sh/setup-uv@v5 22 | - name: Install Python 23 | run: uv python install 3.10 24 | - run: uv sync --only-dev 25 | - run: uv run ruff check scrapscript.py 26 | - run: uv run ruff format --check scrapscript.py 27 | - run: uv run mypy --strict scrapscript.py 28 | run_interpreter_unit_tests: 29 | strategy: 30 | matrix: 31 | PYTHON: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 32 | runs-on: ubuntu-latest 33 | steps: 34 | - uses: actions/checkout@v4 35 | - name: Install uv 36 | uses: astral-sh/setup-uv@v5 37 | - name: Install Python 38 | run: uv python install ${{matrix.PYTHON}} 39 | - name: Pin Python 40 | run: uv python pin ${{matrix.PYTHON}} 41 | - name: Run interpreter tests 42 | run: uv run python scrapscript_tests.py 43 | run_compiler_unit_tests: 44 | strategy: 45 | matrix: 46 | PYTHON: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 47 | CC: [gcc, clang, tcc] 48 | USE_STATIC_HEAP: ["-DSTATIC_HEAP", ""] 49 | runs-on: ubuntu-latest 50 | steps: 51 | - uses: actions/checkout@v4 52 | - name: Install uv 53 | uses: astral-sh/setup-uv@v5 54 | - name: Update local package lists 55 | run: sudo apt update 56 | - name: Install Python 57 | run: uv python install ${{matrix.PYTHON}} 58 | - name: Pin Python 59 | run: uv python pin ${{matrix.PYTHON}} 60 | - name: Install other deps 61 | run: sudo apt install --yes ${{matrix.CC}} valgrind 62 | - name: Run compiler tests 63 | run: CC=${{matrix.CC}} CFLAGS="${{matrix.USE_STATIC_HEAP}}" uv run python compiler_tests.py 64 | - name: Run compiler tests -DNDEBUG 65 | run: CC=${{matrix.CC}} CFLAGS="${{matrix.USE_STATIC_HEAP}} -DNDEBUG" uv run python compiler_tests.py 66 | - name: Run compiler tests with ASAN 67 | run: CC=${{matrix.CC}} CFLAGS="-fsanitize=address ${{matrix.USE_STATIC_HEAP}}" uv run python compiler_tests.py 68 | - name: Run compiler tests with UBSAN 69 | run: CC=${{matrix.CC}} CFLAGS="-fsanitize=undefined ${{matrix.USE_STATIC_HEAP}}" uv run python compiler_tests.py 70 | - name: Run compiler tests with Valgrind 71 | run: CC=${{matrix.CC}} CFLAGS="${{matrix.USE_STATIC_HEAP}}" USE_VALGRIND=1 uv run python compiler_tests.py 72 | run_unit_tests: 73 | needs: [run_interpreter_unit_tests, run_compiler_unit_tests] 74 | # Fake, unnecessary 75 | runs-on: ubuntu-latest 76 | # Fake, unnecessary 77 | steps: 78 | - uses: actions/checkout@v4 79 | build_docker_image: 80 | runs-on: ubuntu-latest 81 | permissions: 82 | contents: read 83 | packages: write 84 | steps: 85 | - uses: actions/checkout@v4 86 | - name: Log in to the Container registry 87 | uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 88 | with: 89 | registry: ${{ env.REGISTRY }} 90 | username: ${{ github.actor }} 91 | password: ${{ secrets.GITHUB_TOKEN }} 92 | - name: Extract metadata (tags, labels) for Docker 93 | id: meta 94 | uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 95 | with: 96 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 97 | - name: Build and push Docker image 98 | uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 99 | with: 100 | context: . 101 | push: true 102 | tags: ${{ steps.meta.outputs.tags }} 103 | labels: ${{ steps.meta.outputs.labels }} 104 | target: main 105 | file: util/Dockerfile 106 | deploy: 107 | runs-on: ubuntu-latest 108 | if: | 109 | github.repository == 'tekknolagi/scrapscript' && 110 | github.ref == format('refs/heads/{0}', github.event.repository.default_branch) 111 | needs: [run_lint, run_unit_tests] 112 | steps: 113 | - uses: actions/checkout@v4 114 | - uses: superfly/flyctl-actions/setup-flyctl@master 115 | - run: flyctl deploy --remote-only 116 | env: 117 | FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} 118 | -------------------------------------------------------------------------------- /.github/workflows/stage.yml: -------------------------------------------------------------------------------- 1 | name: Stage app for PR preview 2 | on: 3 | pull_request: 4 | types: [opened, reopened, synchronize, closed] 5 | 6 | jobs: 7 | staging_app: 8 | runs-on: ubuntu-latest 9 | 10 | # Only run one deployment at a time per PR. 11 | concurrency: 12 | group: scrapscript-pr-${{ github.event.number }} 13 | 14 | # Create a GitHub deployment environment per staging app so it shows up 15 | # in the pull request UI. 16 | environment: 17 | name: scrapscript-pr-${{ github.event.number }} 18 | url: ${{ steps.deploy.outputs.url }}/repl 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - name: Deploy 24 | id: deploy 25 | uses: superfly/fly-pr-review-apps@1.2.1 26 | with: 27 | name: scrapscript-pr-${{ github.event.number }} 28 | region: ewr 29 | org: personal 30 | env: 31 | FLY_API_TOKEN: ${{ secrets.FLY_API_DEPLOY_TOKEN }} 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | 4 | # Distribution / packaging 5 | .Python 6 | build/ 7 | develop-eggs/ 8 | dist/ 9 | downloads/ 10 | eggs/ 11 | .eggs/ 12 | lib64/ 13 | parts/ 14 | sdist/ 15 | var/ 16 | wheels/ 17 | share/python-wheels/ 18 | *.egg-info/ 19 | .installed.cfg 20 | *.egg 21 | 22 | # Unit test / coverage reports 23 | htmlcov/ 24 | .tox/ 25 | .nox/ 26 | .coverage 27 | .coverage.* 28 | .cache 29 | nosetests.xml 30 | coverage.xml 31 | *.cover 32 | *.py,cover 33 | .hypothesis/ 34 | .pytest_cache/ 35 | cover/ 36 | 37 | # Environments 38 | .env 39 | .venv 40 | env/ 41 | venv/ 42 | ENV/ 43 | env.bak/ 44 | venv.bak/ 45 | 46 | # mypy 47 | .mypy_cache/ 48 | .dmypy.json 49 | dmypy.json 50 | 51 | # System 52 | .DS_Store 53 | 54 | # Scrapscript 55 | .scrap-history -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2023 Max Bernstein , 4 | Chris Gregory , 5 | and contributors (see https://github.com/tekknolagi/scrapscript/contributors) 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scrapscript Interpreter 2 | 3 | See [scrapscript.org](https://scrapscript.org/) for some more information. Keep 4 | in mind that the syntax on the website will change a little bit in the coming 5 | weeks to match this repository. 6 | 7 | Take a look inside [scrapscript.py](scrapscript.py) and all of its tests to get 8 | an idea for how the language works. 9 | 10 | ## Usage 11 | 12 | We support python3.8+. 13 | 14 | ```bash 15 | # With a file 16 | python3 scrapscript.py eval examples/0_home/factorial.scrap 17 | 18 | # With a string literal 19 | python3 scrapscript.py apply "1 + 2" 20 | 21 | # With a REPL 22 | python3 scrapscript.py repl 23 | ``` 24 | 25 | or with [Cosmopolitan](https://justine.lol/cosmopolitan/index.html): 26 | 27 | ```bash 28 | ./util/build-com 29 | 30 | # With a file 31 | ./scrapscript.com eval examples/0_home/factorial.scrap 32 | 33 | # With a string literal 34 | ./scrapscript.com apply "1 + 2" 35 | 36 | # With a REPL 37 | ./scrapscript.com repl 38 | ``` 39 | 40 | (if you have an exec format error and use Zsh, either upgrade Zsh or prefix 41 | with `sh`) 42 | 43 | or with Docker: 44 | 45 | ```bash 46 | # With a file (mount your local directory) 47 | docker run --mount type=bind,source="$(pwd)",target=/mnt -i -t ghcr.io/tekknolagi/scrapscript:trunk eval /mnt/examples/0_home/factorial.scrap 48 | 49 | # With a string literal 50 | docker run -i -t ghcr.io/tekknolagi/scrapscript:trunk apply "1 + 2" 51 | 52 | # With a REPL 53 | docker run -i -t ghcr.io/tekknolagi/scrapscript:trunk repl 54 | ``` 55 | 56 | ### The experimental compiler: 57 | 58 | #### Normal ELF 59 | 60 | ```bash 61 | ./scrapscript.py compile some.scrap # produces output.c 62 | ./scrapscript.py compile some.scrap --compile # produces a.out 63 | ``` 64 | 65 | #### Cosmopolitan 66 | 67 | ```bash 68 | CC=~/Downloads/cosmos/bin/cosmocc ./scrapscript.py compile some.scrap --compile # produces a.out 69 | ``` 70 | 71 | #### Wasm 72 | 73 | ```bash 74 | CC=/opt/wasi-sdk/bin/clang \ 75 | CFLAGS=-D_WASI_EMULATED_MMAN \ 76 | LDFLAGS=-lwasi-emulated-mman \ 77 | ./scrapscript.py compile some.scrap --compile # produces a.out 78 | ``` 79 | 80 | ## Development Workflow 81 | 82 | ### Running Tests 83 | 84 | ```bash 85 | python scrapscript_tests.py 86 | ``` 87 | 88 | ### Type Checking the Python Sources 89 | ```bash 90 | mypy --strict scrapscript.py 91 | ``` 92 | 93 | ### Formatting the Python Sources 94 | ```bash 95 | ruff format scrapscript.py 96 | ``` 97 | 98 | ### Checking for Format Errors 99 | ```bash 100 | ruff check scrapscript.py 101 | ``` 102 | 103 | ### Using `uv` 104 | If you choose to use `uv` to manage development dependencies, you can run any of the previous four commands by prefixing them with `uv run`, e.g.: 105 | 106 | ```bash 107 | uv run python scrapscript_tests.py 108 | ``` 109 | -------------------------------------------------------------------------------- /cli.c: -------------------------------------------------------------------------------- 1 | int main() { 2 | #ifdef STATIC_HEAP 3 | char memory[MEMORY_SIZE] = {0}; 4 | struct space space = make_space(memory, MEMORY_SIZE); 5 | #else 6 | struct space space = make_space(MEMORY_SIZE); 7 | #endif 8 | init_heap(heap, space); 9 | HANDLES(); 10 | GC_HANDLE(struct object*, result, scrap_main()); 11 | println(result); 12 | destroy_space(space); 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /compiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import dataclasses 3 | import io 4 | import itertools 5 | import json 6 | import os 7 | import typing 8 | 9 | from typing import Dict, Optional, Tuple 10 | 11 | from scrapscript import ( 12 | Access, 13 | Apply, 14 | Assign, 15 | Binop, 16 | BinopKind, 17 | Function, 18 | Hole, 19 | Int, 20 | List, 21 | MatchFunction, 22 | Object, 23 | Record, 24 | Spread, 25 | String, 26 | Var, 27 | Variant, 28 | Where, 29 | free_in, 30 | type_of, 31 | IntType, 32 | StringType, 33 | parse, # needed for /compilerepl 34 | tokenize, # needed for /compilerepl 35 | ) 36 | 37 | Env = Dict[str, str] 38 | 39 | 40 | @dataclasses.dataclass 41 | class CompiledFunction: 42 | name: str 43 | params: typing.List[str] 44 | fields: typing.List[str] = dataclasses.field(default_factory=list) 45 | code: typing.List[str] = dataclasses.field(default_factory=list) 46 | 47 | def __post_init__(self) -> None: 48 | self.code.append("HANDLES();") 49 | for param in self.params: 50 | # The parameters are raw pointers and must be updated on GC 51 | self.code.append(f"GC_PROTECT({param});") 52 | 53 | def decl(self) -> str: 54 | args = ", ".join(f"struct object* {arg}" for arg in self.params) 55 | return f"struct object* {self.name}({args})" 56 | 57 | 58 | class Compiler: 59 | def __init__(self, main_fn: CompiledFunction) -> None: 60 | self.gensym_counter: int = 0 61 | self.functions: typing.List[CompiledFunction] = [main_fn] 62 | self.function: CompiledFunction = main_fn 63 | self.record_keys: Dict[str, int] = {} 64 | self.record_builders: Dict[Tuple[str, ...], CompiledFunction] = {} 65 | self.variant_tags: Dict[str, int] = {} 66 | self.debug: bool = False 67 | self.const_heap: typing.List[str] = [] 68 | 69 | def record_key(self, key: str) -> str: 70 | if key not in self.record_keys: 71 | self.record_keys[key] = len(self.record_keys) 72 | return f"Record_{key}" 73 | 74 | def record_builder(self, keys: Tuple[str, ...]) -> CompiledFunction: 75 | builder = self.record_builders.get(keys) 76 | if builder is not None: 77 | return builder 78 | 79 | builder = CompiledFunction(f"Record_builder_{'_'.join(keys)}", list(keys)) 80 | self.functions.append(builder) 81 | cur = self.function 82 | self.function = builder 83 | 84 | result = self._mktemp(f"mkrecord(heap, {len(keys)})") 85 | for i, key in enumerate(keys): 86 | key_idx = self.record_key(key) 87 | self._emit(f"record_set({result}, /*index=*/{i}, (struct record_field){{.key={key_idx}, .value={key}}});") 88 | self._debug("collect(heap);") 89 | self._emit(f"return {result};") 90 | 91 | self.function = cur 92 | self.record_builders[keys] = builder 93 | return builder 94 | 95 | def variant_tag(self, key: str) -> int: 96 | result = self.variant_tags.get(key) 97 | if result is not None: 98 | return result 99 | result = self.variant_tags[key] = len(self.variant_tags) 100 | return result 101 | 102 | def gensym(self, stem: str = "tmp") -> str: 103 | self.gensym_counter += 1 104 | return f"{stem}_{self.gensym_counter-1}" 105 | 106 | def _emit(self, line: str) -> None: 107 | self.function.code.append(line) 108 | 109 | def _debug(self, line: str) -> None: 110 | if not self.debug: 111 | return 112 | self._emit("#ifndef NDEBUG") 113 | self._emit(line) 114 | self._emit("#endif") 115 | 116 | def _handle(self, name: str, exp: str) -> str: 117 | # TODO(max): Liveness analysis to avoid unnecessary handles 118 | self._emit(f"OBJECT_HANDLE({name}, {exp});") 119 | return name 120 | 121 | def _guard(self, cond: str, msg: Optional[str] = None) -> None: 122 | if msg is None: 123 | msg = f"assertion {cond!s} failed" 124 | self._emit(f"if (!({cond})) {{") 125 | self._emit(f'fprintf(stderr, "{msg}\\n");') 126 | self._emit("abort();") 127 | self._emit("}") 128 | 129 | def _guard_int(self, exp: Object, c_name: str) -> None: 130 | if type_of(exp) != IntType: 131 | self._guard(f"is_num({c_name})") 132 | 133 | def _guard_str(self, exp: Object, c_name: str) -> None: 134 | if type_of(exp) != StringType: 135 | self._guard(f"is_string({c_name})") 136 | 137 | def _mktemp(self, exp: str) -> str: 138 | temp = self.gensym() 139 | return self._handle(temp, exp) 140 | 141 | def compile_assign(self, env: Env, exp: Assign) -> Env: 142 | assert isinstance(exp.name, Var) 143 | name = exp.name.name 144 | if isinstance(exp.value, Function): 145 | # Named function 146 | value = self.compile_function(env, exp.value, name) 147 | return {**env, name: value} 148 | if isinstance(exp.value, MatchFunction): 149 | # Named match function 150 | value = self.compile_match_function(env, exp.value, name) 151 | return {**env, name: value} 152 | value = self.compile(env, exp.value) 153 | return {**env, name: value} 154 | 155 | def make_compiled_function(self, arg: str, exp: Object, name: Optional[str]) -> CompiledFunction: 156 | assert isinstance(exp, (Function, MatchFunction)) 157 | free = free_in(exp) 158 | if name is not None and name in free: 159 | free.remove(name) 160 | fields = sorted(free) 161 | fn_name = self.gensym(name if name else "fn") # must be globally unique 162 | return CompiledFunction(fn_name, params=["this", arg], fields=fields) 163 | 164 | def compile_function_env(self, fn: CompiledFunction, name: Optional[str]) -> Env: 165 | result = {param: param for param in fn.params} 166 | if name is not None: 167 | result[name] = "this" 168 | for i, field in enumerate(fn.fields): 169 | result[field] = self._mktemp(f"closure_get(this, /*{field}=*/{i})") 170 | return result 171 | 172 | def compile_function(self, env: Env, exp: Function, name: Optional[str]) -> str: 173 | assert isinstance(exp.arg, Var) 174 | fn = self.make_compiled_function(exp.arg.name, exp, name) 175 | self.functions.append(fn) 176 | cur = self.function 177 | self.function = fn 178 | funcenv = self.compile_function_env(fn, name) 179 | val = self.compile(funcenv, exp.body) 180 | fn.code.append(f"return {val};") 181 | self.function = cur 182 | if not fn.fields: 183 | # TODO(max): Closure over freevars but only consts 184 | return self._const_closure(fn) 185 | return self.make_closure(env, fn) 186 | 187 | def try_match(self, env: Env, arg: str, pattern: Object, fallthrough: str) -> Env: 188 | # TODO(max): Give `arg` an AST node so we can track its inferred type 189 | # and make use of that in pattern matching 190 | if isinstance(pattern, Int): 191 | self._emit(f"if (!is_num_equal_word({arg}, {pattern.value})) {{ goto {fallthrough}; }}") 192 | return {} 193 | if isinstance(pattern, Hole): 194 | self._emit(f"if (!is_hole({arg})) {{ goto {fallthrough}; }}") 195 | return {} 196 | if isinstance(pattern, Variant): 197 | self.variant_tag(pattern.tag) # register it for the big enum 198 | if isinstance(pattern.value, Hole): 199 | # This is an optimization for immediate variants but it's not 200 | # necessary; the non-Hole case would work just fine. 201 | self._emit(f"if ({arg} != mk_immediate_variant(Tag_{pattern.tag})) {{ goto {fallthrough}; }}") 202 | return {} 203 | self._emit(f"if (!is_variant({arg})) {{ goto {fallthrough}; }}") 204 | self._emit(f"if (variant_tag({arg}) != Tag_{pattern.tag}) {{ goto {fallthrough}; }}") 205 | return self.try_match(env, self._mktemp(f"variant_value({arg})"), pattern.value, fallthrough) 206 | 207 | if isinstance(pattern, String): 208 | value = pattern.value 209 | if len(value) < 8: 210 | self._emit(f"if ({arg} != mksmallstring({json.dumps(value)}, {len(value)})) {{ goto {fallthrough}; }}") 211 | return {} 212 | self._emit(f"if (!is_string({arg})) {{ goto {fallthrough}; }}") 213 | self._emit( 214 | f"if (!string_equal_cstr_len({arg}, {json.dumps(value)}, {len(value)})) {{ goto {fallthrough}; }}" 215 | ) 216 | return {} 217 | if isinstance(pattern, Var): 218 | return {pattern.name: arg} 219 | if isinstance(pattern, List): 220 | self._emit(f"if (!is_list({arg})) {{ goto {fallthrough}; }}") 221 | updates = {} 222 | the_list = arg 223 | for i, pattern_item in enumerate(pattern.items): 224 | if isinstance(pattern_item, Spread): 225 | if pattern_item.name: 226 | updates[pattern_item.name] = the_list 227 | return updates 228 | # Not enough elements 229 | self._emit(f"if (is_empty_list({the_list})) {{ goto {fallthrough}; }}") 230 | list_item = self._mktemp(f"list_first({the_list})") 231 | updates.update(self.try_match(env, list_item, pattern_item, fallthrough)) 232 | the_list = self._mktemp(f"list_rest({the_list})") 233 | # Too many elements 234 | self._emit(f"if (!is_empty_list({the_list})) {{ goto {fallthrough}; }}") 235 | return updates 236 | if isinstance(pattern, Record): 237 | self._emit(f"if (!is_record({arg})) {{ goto {fallthrough}; }}") 238 | updates = {} 239 | for key, pattern_value in pattern.data.items(): 240 | if isinstance(pattern_value, Spread): 241 | if pattern_value.name: 242 | raise NotImplementedError("named record spread not yet supported") 243 | return updates 244 | key_idx = self.record_key(key) 245 | record_value = self._mktemp(f"record_get({arg}, {key_idx})") 246 | # TODO(max): If the key is present in the type, don't emit this 247 | # check 248 | self._emit(f"if ({record_value} == NULL) {{ goto {fallthrough}; }}") 249 | updates.update(self.try_match(env, record_value, pattern_value, fallthrough)) 250 | self._emit(f"if (record_num_fields({arg}) != {len(pattern.data)}) {{ goto {fallthrough}; }}") 251 | return updates 252 | raise NotImplementedError("try_match", pattern) 253 | 254 | def compile_match_function(self, env: Env, exp: MatchFunction, name: Optional[str]) -> str: 255 | arg = self.gensym() 256 | fn = self.make_compiled_function(arg, exp, name) 257 | self.functions.append(fn) 258 | cur = self.function 259 | self.function = fn 260 | funcenv = self.compile_function_env(fn, name) 261 | for i, case in enumerate(exp.cases): 262 | fallthrough = f"case_{i+1}" if i < len(exp.cases) - 1 else "no_match" 263 | env_updates = self.try_match(funcenv, arg, case.pattern, fallthrough) 264 | case_result = self.compile({**funcenv, **env_updates}, case.body) 265 | self._emit(f"return {case_result};") 266 | self._emit(f"{fallthrough}:;") 267 | self._emit(r'fprintf(stderr, "no matching cases\n");') 268 | self._emit("abort();") 269 | # Pacify the C compiler 270 | self._emit("return NULL;") 271 | self.function = cur 272 | if not fn.fields: 273 | # TODO(max): Closure over freevars but only consts 274 | return self._const_closure(fn) 275 | return self.make_closure(env, fn) 276 | 277 | def make_closure(self, env: Env, fn: CompiledFunction) -> str: 278 | name = self._mktemp(f"mkclosure(heap, {fn.name}, {len(fn.fields)})") 279 | for i, field in enumerate(fn.fields): 280 | self._emit(f"closure_set({name}, /*{field}=*/{i}, {env[field]});") 281 | self._debug("collect(heap);") 282 | return name 283 | 284 | def _is_const(self, exp: Object) -> bool: 285 | if isinstance(exp, Int): 286 | return True 287 | if isinstance(exp, String): 288 | return True 289 | if isinstance(exp, Variant): 290 | return self._is_const(exp.value) 291 | if isinstance(exp, Record): 292 | return all(self._is_const(value) for value in exp.data.values()) 293 | if isinstance(exp, List): 294 | return all(self._is_const(item) for item in exp.items) 295 | if isinstance(exp, Hole): 296 | return True 297 | if isinstance(exp, Function) and len(free_in(exp)) == 0: 298 | return True 299 | return False 300 | 301 | def _const_obj(self, type: str, tag: str, contents: str) -> str: 302 | result = self.gensym(f"const_{type}") 303 | self.const_heap.append(f"CONST_HEAP struct {type} {result} = {{.HEAD.tag={tag}, {contents} }};") 304 | return f"ptrto({result})" 305 | 306 | def _const_cons(self, first: str, rest: str) -> str: 307 | return self._const_obj("list", "TAG_LIST", f".first={first}, .rest={rest}") 308 | 309 | def _const_closure(self, fn: CompiledFunction) -> str: 310 | assert len(fn.fields) == 0 311 | return self._const_obj("closure", "TAG_CLOSURE", f".fn={fn.name}, .size=0") 312 | 313 | def _emit_small_string(self, value_str: str) -> str: 314 | value = value_str.encode("utf-8") 315 | length = len(value) 316 | assert length < 8, "small string must be less than 8 bytes" 317 | value_int = int.from_bytes(value, "little") 318 | return f"(struct object*)(({hex(value_int)}ULL << kBitsPerByte) | ({length}ULL << kImmediateTagBits) | (uword)kSmallStringTag /* {value_str!r} */)" 319 | 320 | def _emit_const(self, exp: Object) -> str: 321 | assert self._is_const(exp), f"not a constant {exp}" 322 | if isinstance(exp, Hole): 323 | return "hole()" 324 | if isinstance(exp, Int): 325 | # TODO(max): Bignum 326 | return f"_mksmallint({exp.value})" 327 | if isinstance(exp, List): 328 | items = [self._emit_const(item) for item in exp.items] 329 | result = "empty_list()" 330 | for item in reversed(items): 331 | result = self._const_cons(item, result) 332 | return result 333 | if isinstance(exp, String): 334 | if len(exp.value) < 8: 335 | return self._emit_small_string(exp.value) 336 | return self._const_obj( 337 | "heap_string", "TAG_STRING", f".size={len(exp.value)}, .data={json.dumps(exp.value)}" 338 | ) 339 | if isinstance(exp, Variant): 340 | self.variant_tag(exp.tag) 341 | if isinstance(exp.value, Hole): 342 | return f"mk_immediate_variant(Tag_{exp.tag})" 343 | value = self._emit_const(exp.value) 344 | return self._const_obj("variant", "TAG_VARIANT", f".tag=Tag_{exp.tag}, .value={value}") 345 | if isinstance(exp, Record): 346 | values = {self.record_key(key): self._emit_const(value) for key, value in exp.data.items()} 347 | fields = ",\n".join(f"{{.key={key}, .value={value} }}" for key, value in values.items()) 348 | return self._const_obj("record", "TAG_RECORD", f".size={len(values)}, .fields={{ {fields} }}") 349 | if isinstance(exp, Function): 350 | assert len(free_in(exp)) == 0, "only constant functions can be constified" 351 | return self.compile_function({}, exp, name=None) 352 | raise NotImplementedError(f"const {exp}") 353 | 354 | def compile(self, env: Env, exp: Object) -> str: 355 | if self._is_const(exp): 356 | return self._emit_const(exp) 357 | if isinstance(exp, Variant): 358 | assert not isinstance(exp.value, Hole), "immediate variant should be handled in _emit_const" 359 | assert not self._is_const(exp.value), "const heap variant should be handled in _emit_const" 360 | self._debug("collect(heap);") 361 | self.variant_tag(exp.tag) 362 | value = self.compile(env, exp.value) 363 | result = self._mktemp(f"mkvariant(heap, Tag_{exp.tag})") 364 | self._emit(f"variant_set({result}, {value});") 365 | return result 366 | if isinstance(exp, String): 367 | assert len(exp.value.encode("utf-8")) >= 8, "small string should be handled in _emit_const" 368 | self._debug("collect(heap);") 369 | string_repr = json.dumps(exp.value) 370 | return self._mktemp(f"mkstring(heap, {string_repr}, {len(exp.value)});") 371 | if isinstance(exp, Binop): 372 | left = self.compile(env, exp.left) 373 | right = self.compile(env, exp.right) 374 | if exp.op == BinopKind.ADD: 375 | self._debug("collect(heap);") 376 | self._guard_int(exp.left, left) 377 | self._guard_int(exp.right, right) 378 | return self._mktemp(f"num_add({left}, {right})") 379 | if exp.op == BinopKind.MUL: 380 | self._debug("collect(heap);") 381 | self._guard_int(exp.left, left) 382 | self._guard_int(exp.right, right) 383 | return self._mktemp(f"num_mul({left}, {right})") 384 | if exp.op == BinopKind.SUB: 385 | self._debug("collect(heap);") 386 | self._guard_int(exp.left, left) 387 | self._guard_int(exp.right, right) 388 | return self._mktemp(f"num_sub({left}, {right})") 389 | if exp.op == BinopKind.LIST_CONS: 390 | self._debug("collect(heap);") 391 | return self._mktemp(f"list_cons({left}, {right})") 392 | if exp.op == BinopKind.STRING_CONCAT: 393 | self._debug("collect(heap);") 394 | self._guard_str(exp.left, left) 395 | self._guard_str(exp.right, right) 396 | return self._mktemp(f"string_concat({left}, {right})") 397 | raise NotImplementedError(f"binop {exp.op}") 398 | if isinstance(exp, Where): 399 | assert isinstance(exp.binding, Assign) 400 | res_env = self.compile_assign(env, exp.binding) 401 | new_env = {**env, **res_env} 402 | return self.compile(new_env, exp.body) 403 | if isinstance(exp, Var): 404 | var_value = env.get(exp.name) 405 | if var_value is None: 406 | raise NameError(f"name '{exp.name}' is not defined") 407 | return var_value 408 | if isinstance(exp, Apply): 409 | callee = self.compile(env, exp.func) 410 | arg = self.compile(env, exp.arg) 411 | return self._mktemp(f"closure_call({callee}, {arg})") 412 | if isinstance(exp, List): 413 | items = [self.compile(env, item) for item in exp.items] 414 | result = self._mktemp("empty_list()") 415 | for item in reversed(items): 416 | result = self._mktemp(f"list_cons({item}, {result})") 417 | self._debug("collect(heap);") 418 | return result 419 | if isinstance(exp, Record): 420 | values: Dict[str, str] = {} 421 | for key, value_exp in exp.data.items(): 422 | values[key] = self.compile(env, value_exp) 423 | keys = tuple(sorted(exp.data.keys())) 424 | builder = self.record_builder(keys) 425 | return self._mktemp(f"{builder.name}({', '.join(values[key] for key in keys)})") 426 | if isinstance(exp, Access): 427 | assert isinstance(exp.at, Var), f"only Var access is supported, got {type(exp.at)}" 428 | record = self.compile(env, exp.obj) 429 | key_idx = self.record_key(exp.at.name) 430 | # Check if the record is a record 431 | self._guard(f"is_record({record})", "not a record") 432 | value = self._mktemp(f"record_get({record}, {key_idx})") 433 | self._guard(f"{value} != NULL", f"missing key {exp.at.name!s}") 434 | return value 435 | if isinstance(exp, Function): 436 | # Anonymous function 437 | return self.compile_function(env, exp, name=None) 438 | if isinstance(exp, MatchFunction): 439 | # Anonymous match function 440 | return self.compile_match_function(env, exp, name=None) 441 | raise NotImplementedError(f"exp {type(exp)} {exp}") 442 | 443 | 444 | def compile_to_string(program: Object, debug: bool) -> str: 445 | main_fn = CompiledFunction("scrap_main", params=[]) 446 | compiler = Compiler(main_fn) 447 | compiler.debug = debug 448 | result = compiler.compile({}, program) 449 | main_fn.code.append(f"return {result};") 450 | 451 | f = io.StringIO() 452 | constants = [ 453 | ("uword", "kKiB", 1024), 454 | ("uword", "kMiB", "kKiB * kKiB"), 455 | ("uword", "kGiB", "kKiB * kKiB * kKiB"), 456 | ("uword", "kPageSize", "4 * kKiB"), 457 | ("uword", "kSmallIntTagBits", 1), 458 | ("uword", "kPrimaryTagBits", 3), 459 | ("uword", "kObjectAlignmentLog2", 3), # bits 460 | ("uword", "kObjectAlignment", "1ULL << kObjectAlignmentLog2"), 461 | ("uword", "kImmediateTagBits", 5), 462 | ("uword", "kSmallIntTagMask", "(1ULL << kSmallIntTagBits) - 1"), 463 | ("uword", "kPrimaryTagMask", "(1ULL << kPrimaryTagBits) - 1"), 464 | ("uword", "kImmediateTagMask", "(1ULL << kImmediateTagBits) - 1"), 465 | ("uword", "kWordSize", "sizeof(word)"), 466 | ("uword", "kMaxSmallStringLength", "kWordSize - 1"), 467 | ("uword", "kBitsPerByte", 8), 468 | # Up to the five least significant bits are used to tag the object's layout. 469 | # The three low bits make up a primary tag, used to differentiate gc_obj 470 | # from immediate objects. All even tags map to SmallInt, which is 471 | # optimized by checking only the lowest bit for parity. 472 | ("uword", "kSmallIntTag", 0), # 0b****0 473 | ("uword", "kHeapObjectTag", 1), # 0b**001 474 | ("uword", "kEmptyListTag", 5), # 0b00101 475 | ("uword", "kHoleTag", 7), # 0b00111 476 | ("uword", "kSmallStringTag", 13), # 0b01101 477 | ("uword", "kVariantTag", 15), # 0b01111 478 | # TODO(max): Fill in 21 479 | # TODO(max): Fill in 23 480 | # TODO(max): Fill in 29 481 | # TODO(max): Fill in 31 482 | ("uword", "kBitsPerPointer", "kBitsPerByte * kWordSize"), 483 | ("word", "kSmallIntBits", "kBitsPerPointer - kSmallIntTagBits"), 484 | ("word", "kSmallIntMinValue", "-(((word)1) << (kSmallIntBits - 1))"), 485 | ("word", "kSmallIntMaxValue", "(((word)1) << (kSmallIntBits - 1)) - 1"), 486 | ] 487 | for type_, name, value in constants: 488 | print(f"#define {name} ({type_})({value})", file=f) 489 | # The runtime is in the same directory as this file 490 | dirname = os.path.dirname(__file__) 491 | with open(os.path.join(dirname, "runtime.c"), "r") as runtime: 492 | print(runtime.read(), file=f) 493 | print("#define OBJECT_HANDLE(name, exp) GC_HANDLE(struct object*, name, exp)", file=f) 494 | if compiler.record_keys: 495 | print("const char* record_keys[] = {", file=f) 496 | for key in compiler.record_keys: 497 | print(f'"{key}",', file=f) 498 | print("};", file=f) 499 | print("enum {", file=f) 500 | for key, idx in compiler.record_keys.items(): 501 | print(f"Record_{key} = {idx},", file=f) 502 | print("};", file=f) 503 | else: 504 | # Pacify the C compiler 505 | print("const char* record_keys[] = { NULL };", file=f) 506 | if compiler.variant_tags: 507 | print("const char* variant_names[] = {", file=f) 508 | for key in compiler.variant_tags: 509 | print(f'"{key}",', file=f) 510 | print("};", file=f) 511 | print("enum {", file=f) 512 | for key, idx in compiler.variant_tags.items(): 513 | print(f"Tag_{key} = {idx},", file=f) 514 | print("};", file=f) 515 | else: 516 | # Pacify the C compiler 517 | print("const char* variant_names[] = { NULL };", file=f) 518 | # Declare all functions 519 | for function in compiler.functions: 520 | print(function.decl() + ";", file=f) 521 | # Emit the const heap 522 | print("#define ptrto(obj) ((struct object*)((uword)&(obj) + 1))", file=f) 523 | for line in compiler.const_heap: 524 | print(line, file=f) 525 | for function in compiler.functions: 526 | print(f"{function.decl()} {{", file=f) 527 | for line in function.code: 528 | print(line, file=f) 529 | print("}", file=f) 530 | return f.getvalue() 531 | -------------------------------------------------------------------------------- /compiler_tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | import subprocess 4 | 5 | from scrapscript import env_get_split, discover_cflags, parse, tokenize 6 | from compiler import compile_to_string 7 | 8 | 9 | def compile_to_binary(source: str, memory: int, debug: bool) -> str: 10 | import shlex 11 | import subprocess 12 | import sysconfig 13 | import tempfile 14 | 15 | cc = env_get_split("CC", shlex.split(sysconfig.get_config_var("CC"))) 16 | cflags = discover_cflags(cc, debug) 17 | cflags += [f"-DMEMORY_SIZE={memory}"] 18 | program = parse(tokenize(source)) 19 | c_code = compile_to_string(program, debug) 20 | with tempfile.NamedTemporaryFile(mode="w", suffix=".c", delete=False) as c_file: 21 | c_file.write(c_code) 22 | # The platform is in the same directory as this file 23 | dirname = os.path.dirname(__file__) 24 | with open(os.path.join(dirname, "cli.c"), "r") as f: 25 | c_file.write(f.read()) 26 | with tempfile.NamedTemporaryFile(mode="w", suffix=".out", delete=False) as out_file: 27 | subprocess.run([*cc, *cflags, "-o", out_file.name, c_file.name], check=True) 28 | return out_file.name 29 | 30 | 31 | class CompilerEndToEndTests(unittest.TestCase): 32 | def _run(self, code: str) -> str: 33 | use_valgrind = bool(os.environ.get("USE_VALGRIND", False)) 34 | binary = compile_to_binary(code, memory=4096, debug=True) 35 | if use_valgrind: 36 | cmd = ["valgrind", binary] 37 | else: 38 | cmd = [binary] 39 | result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True) 40 | return result.stdout 41 | 42 | def test_int(self) -> None: 43 | self.assertEqual(self._run("1"), "1\n") 44 | 45 | def test_small_string(self) -> None: 46 | self.assertEqual(self._run('"hello"'), '"hello"\n') 47 | 48 | def test_small_string_concat(self) -> None: 49 | self.assertEqual(self._run('"abc" ++ "def"'), '"abcdef"\n') 50 | 51 | def test_const_large_string(self) -> None: 52 | self.assertEqual(self._run('"hello world"'), '"hello world"\n') 53 | 54 | def test_heap_string_concat(self) -> None: 55 | self.assertEqual(self._run('"hello world" ++ " and goodbye"'), '"hello world and goodbye"\n') 56 | 57 | def test_const_list(self) -> None: 58 | self.assertEqual( 59 | self._run("""[1, "2", [3, 4], {a=1}, #foo ()]"""), 60 | """[1, "2", [3, 4], {a = 1}, #foo ()]\n""", 61 | ) 62 | 63 | def test_add(self) -> None: 64 | self.assertEqual(self._run("1 + 2"), "3\n") 65 | 66 | def test_sub(self) -> None: 67 | self.assertEqual(self._run("1 - 2"), "-1\n") 68 | 69 | def test_mul(self) -> None: 70 | self.assertEqual(self._run("2 * 3"), "6\n") 71 | 72 | def test_list(self) -> None: 73 | self.assertEqual(self._run("[1, 2, 3]"), "[1, 2, 3]\n") 74 | 75 | def test_list_concat(self) -> None: 76 | self.assertEqual(self._run("0 >+ [1, 2, 3]"), "[0, 1, 2, 3]\n") 77 | 78 | def test_var(self) -> None: 79 | self.assertEqual(self._run("a . a = 1"), "1\n") 80 | 81 | def test_record(self) -> None: 82 | self.assertEqual(self._run("{a = 1, b = 2}"), "{a = 1, b = 2}\n") 83 | 84 | def test_record_builder(self) -> None: 85 | self.assertEqual(self._run("f 1 2 . f = x -> y -> {a = x, b = y}"), "{a = 1, b = 2}\n") 86 | 87 | def test_record_access(self) -> None: 88 | self.assertEqual(self._run("rec@a . rec = {a = 1, b = 2}"), "1\n") 89 | 90 | def test_record_builder_access(self) -> None: 91 | self.assertEqual(self._run("(f 1 2)@a . f = x -> y -> {a = x, b = y}"), "1\n") 92 | 93 | def test_hole(self) -> None: 94 | self.assertEqual(self._run("()"), "()\n") 95 | 96 | def test_variant(self) -> None: 97 | self.assertEqual(self._run("# foo 123"), "#foo 123\n") 98 | 99 | def test_variant_builder(self) -> None: 100 | self.assertEqual(self._run("f 123 . f = x -> # foo x"), "#foo 123\n") 101 | 102 | def test_function(self) -> None: 103 | self.assertEqual(self._run("f 1 . f = x -> x + 1"), "2\n") 104 | 105 | def test_anonymous_function_as_value(self) -> None: 106 | self.assertEqual(self._run("x -> x"), "\n") 107 | 108 | def test_anonymous_function(self) -> None: 109 | self.assertEqual(self._run("((x -> x + 1) 1)"), "2\n") 110 | 111 | def test_match_int(self) -> None: 112 | self.assertEqual(self._run("f 3 . f = | 1 -> 2 | 3 -> 4"), "4\n") 113 | 114 | def test_match_list(self) -> None: 115 | self.assertEqual(self._run("f [4, 5] . f = | [1, 2] -> 3 | [4, 5] -> 6"), "6\n") 116 | 117 | def test_match_list_spread(self) -> None: 118 | self.assertEqual(self._run("f [4, 5] . f = | [_, ...xs] -> xs"), "[5]\n") 119 | 120 | def test_match_record(self) -> None: 121 | self.assertEqual(self._run("f {a = 4, b = 5} . f = | {a = 1, b = 2} -> 3 | {a = 4, b = 5} -> 6"), "6\n") 122 | 123 | def test_match_record_too_few_keys(self) -> None: 124 | self.assertEqual(self._run("f {a = 4, b = 5} . f = | {a = _} -> 3 | {a = _, b = _} -> 6"), "6\n") 125 | 126 | def test_match_record_spread(self) -> None: 127 | self.assertEqual(self._run("f {a=1, b=2, c=3} . f = | {a=a, ...} -> a"), "1\n") 128 | 129 | @unittest.skip("TODO") 130 | def test_match_record_spread_named(self) -> None: 131 | self.assertEqual(self._run("f {a=1, b=2, c=3} . f = | {a=1, ...rest} -> rest"), "[5]\n") 132 | 133 | def test_match_hole(self) -> None: 134 | self.assertEqual(self._run("f () . f = | 1 -> 3 | () -> 4"), "4\n") 135 | 136 | def test_match_immediate_variant(self) -> None: 137 | self.assertEqual(self._run("f #foo () . f = | # bar 1 -> 3 | # foo () -> 4"), "4\n") 138 | 139 | def test_match_heap_variant(self) -> None: 140 | self.assertEqual(self._run("f #bar 1 . f = | # bar 1 -> 3 | # foo () -> 4"), "3\n") 141 | 142 | @unittest.skipIf("STATIC_HEAP" in os.environ.get("CFLAGS", ""), "Can't grow heap in static heap mode") 143 | def test_heap_growth(self) -> None: 144 | self.assertEqual( 145 | self._run( 146 | """ 147 | countdown 1000 148 | . countdown = 149 | | 0 -> [] 150 | | n -> n >+ countdown (n - 1) 151 | """ 152 | ), 153 | "[" + ", ".join(str(i) for i in range(1000, 0, -1)) + "]\n", 154 | ) 155 | 156 | 157 | if __name__ == "__main__": 158 | unittest.main() 159 | -------------------------------------------------------------------------------- /examples/0_home/a.scrap: -------------------------------------------------------------------------------- 1 | greet <| person:ron 3 2 | 3 | . greet :: person -> text = 4 | | :cowboy -> "howdy" 5 | | :ron n -> "hi " ++ a ++ "ron" , a = text/repeat n "a" 6 | | :parent :m -> "hey mom" 7 | | :parent :f -> "greetings father" 8 | | :friend n -> "yo" |> list/repeat n |> string/join " " 9 | | :stranger "felicia" -> "bye" 10 | | :stranger name -> "hello " ++ name 11 | 12 | . person = 13 | : cowboy 14 | : ron int 15 | : parent s , s = (: m : f) 16 | : friend int 17 | : stranger text 18 | -------------------------------------------------------------------------------- /examples/0_home/combinators.scrap: -------------------------------------------------------------------------------- 1 | Z factr 5 2 | 3 | . S = x -> y -> z -> x -> z (y z) 4 | . K = x -> y -> x 5 | . I = x -> x 6 | 7 | . A = x -> y -> y 8 | . B = x -> y -> z -> x (y z) 9 | . C = x -> y -> z -> x z y 10 | . M = x -> x x 11 | . T = x -> y -> y x 12 | . W = x -> y -> x y y 13 | 14 | . Y = f -> (x -> f (x x)) (x -> f (x x)) 15 | . Z = f -> (x -> f (v -> (x x) v)) (x -> f (v -> (x x) v)) 16 | 17 | . factr = facti -> 18 | | 0 -> 1 19 | | n -> (mult n) (facti (n - 1)) 20 | . mult = x -> y -> x * y -------------------------------------------------------------------------------- /examples/0_home/factorial.scrap: -------------------------------------------------------------------------------- 1 | factorial 5 2 | . factorial = 3 | | 0 -> 1 4 | | n -> n * factorial (n - 1) -------------------------------------------------------------------------------- /examples/10_types/a.scrap: -------------------------------------------------------------------------------- 1 | scoop:chocolate 2 | . scoop = 3 | : vanilla 4 | : chocolate 5 | : strawberry -------------------------------------------------------------------------------- /examples/10_types/b.scrap: -------------------------------------------------------------------------------- 1 | t:point 3 4 2 | . t = : point int int -------------------------------------------------------------------------------- /examples/10_types/c.scrap: -------------------------------------------------------------------------------- 1 | tuple:triplet 1.0 "A" '2B 2 | . tuple = x -> y -> z -> 3 | : pair x y 4 | : triplet x y z -------------------------------------------------------------------------------- /examples/10_types/d.scrap: -------------------------------------------------------------------------------- 1 | typ:fun (n -> x * 2) 2 | . typ = : fun (int => int) -------------------------------------------------------------------------------- /examples/10_types/e.scrap: -------------------------------------------------------------------------------- 1 | hand:left 5 |> 2 | | :l n -> n * 2 3 | | :r n -> n * 3 4 | . hand = 5 | : l int 6 | : r int -------------------------------------------------------------------------------- /examples/10_types/f.scrap: -------------------------------------------------------------------------------- 1 | t 2 | . t = 3 | : a a1 a2 a3 4 | , a1 = text 5 | , a2 = (: x : y : z) 6 | , a3 = text 7 | : b int b2 8 | , b2 = int 9 | : c byte byte byte -------------------------------------------------------------------------------- /examples/10_types/g.scrap: -------------------------------------------------------------------------------- 1 | | :a { x = :l 0 } -> () 2 | | :a { x = _ } -> () 3 | | :b { x = :l 1 } -> () 4 | | :b { x = _ } -> () 5 | | :c { x = :l 2 } -> () 6 | | :c { x = - } -> () 7 | | _ -> () -------------------------------------------------------------------------------- /examples/11_platforms/web/Dockerfile: -------------------------------------------------------------------------------- 1 | # Set things up 2 | FROM alpine:latest as build 3 | RUN apk add clang python3 4 | ARG VER=3.4.0 5 | RUN wget https://cosmo.zip/pub/cosmocc/cosmocc-$VER.zip 6 | WORKDIR cosmo 7 | RUN unzip ../cosmocc-$VER.zip 8 | COPY scrapscript.py . 9 | COPY compiler.py . 10 | COPY runtime.c . 11 | COPY examples/11_platforms/web/ . 12 | RUN CC=bin/cosmocc python3 compiler.py --compile --platform web.c handler.scrap 13 | RUN bin/assimilate a.out 14 | RUN du -sh a.out 15 | 16 | # Set up the container 17 | FROM scratch as server 18 | COPY --from=build /cosmo/a.out . 19 | EXPOSE 8000 20 | ENTRYPOINT ["./a.out"] 21 | 22 | # Deploy from the repo root with 23 | # fly deploy --dockerfile examples/11_platforms/web/Dockerfile --local-only --config examples/11_platforms/web/fly.toml 24 | -------------------------------------------------------------------------------- /examples/11_platforms/web/handler.scrap: -------------------------------------------------------------------------------- 1 | | "/" -> (status 200 <| page "you're on the index") 2 | | "/about" -> (status 200 <| page "you're on the about page") 3 | | _ -> notfound 4 | 5 | . notfound = (status 404 <| page "not found") 6 | . status = code -> body -> { code = code, body = body } 7 | . page = body -> "" ++ body ++ "" 8 | 9 | -------------------------------------------------------------------------------- /examples/11_platforms/web/web.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #define WBY_IMPLEMENTATION 5 | #include "web.h" 6 | 7 | // $ CFLAGS="-I examples/11_platforms/web/" ./compiler.py --compile --platform examples/11_platforms/web/web.c examples/11_platforms/web/handler.scrap 8 | // or 9 | // $ ./compiler.py --platform examples/11_platforms/web/web.c examples/11_platforms/web/handler.scrap 10 | // $ cc -I examples/11_platforms/web/ output.c 11 | 12 | static int 13 | dispatch(struct wby_con *connection, void *userdata) 14 | { 15 | HANDLES(); 16 | GC_HANDLE(struct object*, handler, *(struct object**)userdata); 17 | GC_HANDLE(struct object*, url, mkstring(heap, connection->request.uri, strlen(connection->request.uri))); 18 | GC_HANDLE(struct object*, response, closure_call(handler, url)); 19 | assert(is_record(response)); 20 | GC_HANDLE(struct object*, code, record_get(response, Record_code)); 21 | assert(is_num(code)); 22 | GC_HANDLE(struct object*, body, record_get(response, Record_body)); 23 | assert(is_string(body)); 24 | 25 | wby_response_begin(connection, num_value(code), string_length(body), NULL, 0); 26 | // TODO(max): Copy into buffer or strdup 27 | wby_write(connection, as_heap_string(body)->data, string_length(body)); 28 | wby_response_end(connection); 29 | fprintf(stderr, "%ld %s\n", num_value(code), connection->request.uri); 30 | return num_value(code) == 200; 31 | } 32 | 33 | int main(int argc, const char * argv[]) 34 | { 35 | /* boot scrapscript */ 36 | #ifdef STATIC_HEAP 37 | char memory[MEMORY_SIZE] = {0}; 38 | struct space space = make_space(memory, MEMORY_SIZE); 39 | #else 40 | struct space space = make_space(MEMORY_SIZE); 41 | #endif 42 | init_heap(heap, space); 43 | HANDLES(); 44 | GC_HANDLE(struct object*, handler, scrap_main()); 45 | assert(is_closure(handler)); 46 | 47 | /* setup config */ 48 | struct wby_config config; 49 | memset(&config, 0, sizeof(config)); 50 | config.address = "0.0.0.0"; 51 | config.port = 8000; 52 | config.connection_max = 8; 53 | config.request_buffer_size = 2048; 54 | config.io_buffer_size = 8192; 55 | config.dispatch = dispatch; 56 | config.userdata = &handler; 57 | 58 | /* compute and allocate needed memory and start server */ 59 | struct wby_server server; 60 | size_t needed_memory; 61 | wby_init(&server, &config, &needed_memory); 62 | void *memory = calloc(needed_memory, 1); 63 | printf("serving at http://%s:%d\n", config.address, config.port); 64 | wby_start(&server, memory); 65 | while (1) { 66 | wby_update(&server); 67 | } 68 | wby_stop(&server); 69 | free(memory); 70 | destroy_space(space); 71 | } 72 | 73 | -------------------------------------------------------------------------------- /examples/11_platforms/web/web.h: -------------------------------------------------------------------------------- 1 | /* 2 | web.h - BSD LICENSE - Andreas Fredriksson 3 | 4 | ABOUT: 5 | This is a web server intended for debugging tools inside a 6 | program with a continously running main loop. It's intended to be used when 7 | all you need is something tiny and performance isn't a key concern. 8 | NOTE: this is a single header port of Andreas Fredriksson 9 | Webby(https://github.com/deplinenoise/webby). 10 | 11 | Features 12 | - Single header library to be easy to embed into your code. 13 | - No dynamic memory allocations -- server memory is completely fixed 14 | - No threading, all I/O and serving happens on the calling thread 15 | - Supports socket keep-alives 16 | - Supports the 100-Continue scheme for file uploading 17 | - Basic support for WebSockets is available. 18 | 19 | Because request/response I/O is synchronous on the calling thread, performance 20 | will suffer when you are serving data. For the use-cases wby is intended for, 21 | this is fine. You can still run wby in a background thread at your 22 | discretion if this is a problem. 23 | 24 | DEFINES: 25 | WBY_IMPLEMENTATION 26 | Generates the implementation of the library into the included file. 27 | If not provided the library is in header only mode and can be included 28 | in other headers or source files without problems. But only ONE file 29 | should hold the implementation. 30 | 31 | WBY_STATIC 32 | The generated implementation will stay private inside implementation 33 | file and all internal symbols and functions will only be visible inside 34 | that file. 35 | 36 | WBY_ASSERT 37 | WBY_USE_ASSERT 38 | If you define WBY_USE_ASSERT without defining ASSERT web.h 39 | will use assert.h and asssert(). Otherwise it will use your assert 40 | method. If you do not define WBY_USE_ASSERT no additional checks 41 | will be added. This is the only C standard library function used 42 | by web. 43 | 44 | WBY_UINT_PTR 45 | If your compiler is C99 you do not need to define this. 46 | Otherwise, web will try default assignments for them 47 | and validate them at compile time. If they are incorrect, you will 48 | get compile errors and will need to define them yourself. 49 | 50 | You can define this to 'size_t' if you use the standard library, 51 | otherwise it needs to be able to hold the maximum addressable memory 52 | space. If you do not define this it will default to unsigned long. 53 | 54 | 55 | LICENSE: (BSD) 56 | Copyright (c) 2016, Andreas Fredriksson, Micha Mettke 57 | All rights reserved. 58 | 59 | Redistribution and use in source and binary forms, with or without 60 | modification, are permitted provided that the following conditions are met: 61 | 62 | 1. Redistributions of source code must retain the above copyright notice, this 63 | list of conditions and the following disclaimer. 64 | 2. Redistributions in binary form must reproduce the above copyright notice, 65 | this list of conditions and the following disclaimer in the documentation 66 | and/or other materials provided with the distribution. 67 | 68 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 69 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 70 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 71 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 72 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 73 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 74 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 75 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 76 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 77 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 78 | 79 | CONTRIBUTORS: 80 | Andreas Fredriksson (implementation) 81 | Micha Mettke (single header conversion) 82 | 83 | USAGE: 84 | Request handling 85 | When you configure the server, you give it a function pointer to your 86 | dispatcher. The dispatcher is called by wby when a request has been fully 87 | read into memory and is ready for processing. The socket the request came in on 88 | has then been switched to blocking mode, and you're free to read any request 89 | data using `wby_read()` (if present, check `content_length`) and then write 90 | your response. 91 | There are two ways to generate a response; explicit size or chunked. 92 | 93 | When you know the size of the data 94 | When you know in advance how big the response is going to be, you should pass 95 | that size in bytes to `wby_response_begin()` (it will be sent as the 96 | Content-Length header). You then call `wby_write()` to push that data out, and 97 | finally `wby_response_end()` to finalize the response and prepare the socket 98 | for a new request. 99 | 100 | When the response size is dynamic 101 | Sometimes you want to generate an arbitrary amount of text in the response, and 102 | you don't know how much that will be. Rather than buffering everything in RAM, 103 | you can use chunked encoding. First call `wby_response_begin()` as normal, but 104 | pass it -1 for the content length. This triggers sending the 105 | `Transfer-Encoding: chunked` header. You then call `wby_write()` as desired 106 | until the response is complete. When you're done, call `wby_response_end()` to finish up. 107 | 108 | EXAMPLES: 109 | for a actual working example please look inside tests/wby_test.c */ 110 | #if 0 111 | /* request and websocket handling callback */ 112 | static int dispatch(struct wby_con *connection, void *pArg); 113 | static int websocket_connect(struct wby_con *conn, void *pArg); 114 | static void websocket_connected(struct wby_con *con, void *pArg); 115 | static int websocket_frame(struct wby_con *conn, const struct wby_frame *frame, void *pArg); 116 | static void websocket_closed(struct wby_con *connection, void *pArg); 117 | 118 | int main(int argc, const char * argv[]) 119 | { 120 | /* setup config */ 121 | struct wby_config config; 122 | memset(config, 0, sizeof(config)); 123 | config.address = "127.0.0.1"; 124 | config.port = 8888; 125 | config.connection_max = 8; 126 | config.request_buffer_size = 2048; 127 | config.io_buffer_size = 8192; 128 | config.dispatch = dispatch; 129 | config.ws_connect = websocket_connect; 130 | config.ws_connected = websocket_connected; 131 | config.ws_frame = websocket_frame; 132 | config.ws_closed = websocket_closed; 133 | 134 | /* compute and allocate needed memory and start server */ 135 | struct wby_server server; 136 | size_t needed_memory; 137 | wby_server_init(&server, &config, &needed_memory); 138 | void *memory = calloc(needed_memory, 1); 139 | wby_server_start(&server, memory); 140 | while (1) { 141 | wby_server_update(&server); 142 | 143 | } 144 | wby_server_stop(&server); 145 | free(memory); 146 | } 147 | #endif 148 | /* =============================================================== 149 | * 150 | * HEADER 151 | * 152 | * =============================================================== */ 153 | #ifndef WBY_H_ 154 | #define WBY_H_ 155 | 156 | #ifdef __cplusplus 157 | extern "C" { 158 | #endif 159 | 160 | #ifdef WBY_STATIC 161 | #define WBY_API static 162 | #else 163 | #define WBY_API extern 164 | #endif 165 | 166 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 19901L) 167 | #include 168 | #ifndef WBY_UINT_PTR 169 | #define WBY_UINT_PTR uintptr_t 170 | #endif 171 | #else 172 | #ifndef WBY_UINT_PTR 173 | 174 | #if defined(__i386__) || (!defined(_WIN64) && defined(_WIN32)) 175 | #define WBY_UINT_PTR unsigned long 176 | #else 177 | #define WBY_UINT_PTR unsigned long long 178 | #endif 179 | 180 | #endif 181 | #endif 182 | typedef unsigned char wby_byte; 183 | typedef WBY_UINT_PTR wby_size; 184 | typedef WBY_UINT_PTR wby_ptr; 185 | 186 | #define WBY_OK (0) 187 | #define WBY_FLAG(x) (1 << (x)) 188 | 189 | #ifndef WBY_MAX_HEADERS 190 | #define WBY_MAX_HEADERS 64 191 | #endif 192 | 193 | struct wby_header { 194 | const char *name; 195 | const char *value; 196 | }; 197 | 198 | /* A HTTP request. */ 199 | struct wby_request { 200 | const char *method; 201 | /* The method of the request, e.g. "GET", "POST" and so on */ 202 | const char *uri; 203 | /* The URI that was used. */ 204 | const char *http_version; 205 | /* The used HTTP version */ 206 | const char *query_params; 207 | /* The query parameters passed in the URL, or NULL if none were passed. */ 208 | int content_length; 209 | /* The number of bytes of request body that are available via WebbyRead() */ 210 | int header_count; 211 | /* The number of headers */ 212 | struct wby_header headers[WBY_MAX_HEADERS]; 213 | /* Request headers */ 214 | }; 215 | 216 | /* Connection state, as published to the serving callback. */ 217 | struct wby_con { 218 | struct wby_request request; 219 | /* The request being served. Read-only. */ 220 | void *user_data; 221 | /* User data. Read-write. wby doesn't care about this. */ 222 | }; 223 | 224 | struct wby_frame { 225 | wby_byte flags; 226 | wby_byte opcode; 227 | wby_byte header_size; 228 | wby_byte padding_; 229 | wby_byte mask_key[4]; 230 | int payload_length; 231 | }; 232 | 233 | enum wby_websock_flags { 234 | WBY_WSF_FIN = WBY_FLAG(0), 235 | WBY_WSF_MASKED = WBY_FLAG(1) 236 | }; 237 | 238 | enum wby_websock_operation { 239 | WBY_WSOP_CONTINUATION = 0, 240 | WBY_WSOP_TEXT_FRAME = 1, 241 | WBY_WSOP_BINARY_FRAME = 2, 242 | WBY_WSOP_CLOSE = 8, 243 | WBY_WSOP_PING = 9, 244 | WBY_WSOP_PONG = 10 245 | }; 246 | 247 | /* Configuration data required for starting a server. */ 248 | typedef void(*wby_log_f)(const char *msg); 249 | struct wby_config { 250 | void *userdata; 251 | /* userdata which will be passed */ 252 | const char *address; 253 | /* The bind address. Must be a textual IP address. */ 254 | unsigned short port; 255 | /* The port to listen to. */ 256 | unsigned int connection_max; 257 | /* Maximum number of simultaneous connections. */ 258 | wby_size request_buffer_size; 259 | /* The size of the request buffer. This must be big enough to contain all 260 | * headers and the request line sent by the client. 2-4k is a good size for 261 | * this buffer. */ 262 | wby_size io_buffer_size; 263 | /* The size of the I/O buffer, used when writing the reponse. 4k is a good 264 | * choice for this buffer.*/ 265 | wby_log_f log; 266 | /* Optional callback function that receives debug log text (without newlines). */ 267 | int(*dispatch)(struct wby_con *con, void *userdata); 268 | /* Request dispatcher function. This function is called when the request 269 | * structure is ready. 270 | * If you decide to handle the request, call wby_response_begin(), 271 | * wby_write() and wby_response_end() and then return 0. Otherwise, return a 272 | * non-zero value to have Webby send back a 404 response. */ 273 | int(*ws_connect)(struct wby_con*, void *userdata); 274 | /*WebSocket connection dispatcher. Called when an incoming request wants to 275 | * update to a WebSocket connection. 276 | * Return 0 to allow the connection. 277 | * Return 1 to ignore the connection.*/ 278 | void (*ws_connected)(struct wby_con*, void *userdata); 279 | /* Called when a WebSocket connection has been established.*/ 280 | void (*ws_closed)(struct wby_con*, void *userdata); 281 | /*Called when a WebSocket connection has been closed.*/ 282 | int (*ws_frame)(struct wby_con*, const struct wby_frame *frame, void *userdata); 283 | /*Called when a WebSocket data frame is incoming. 284 | * Call wby_read() to read the payload data. 285 | * Return non-zero to close the connection.*/ 286 | }; 287 | 288 | struct wby_connection; 289 | struct wby_server { 290 | struct wby_config config; 291 | /* server configuration */ 292 | wby_size memory_size; 293 | /* minimum required memory */ 294 | wby_ptr socket; 295 | /* server socket */ 296 | wby_size con_count; 297 | /* number of active connection */ 298 | struct wby_connection *con; 299 | /* connections */ 300 | #ifdef _WIN32 301 | int windows_socket_initialized; 302 | /* whether WSAStartup had to be called on Windows */ 303 | #endif 304 | }; 305 | 306 | WBY_API void wby_init(struct wby_server*, const struct wby_config*, 307 | wby_size *needed_memory); 308 | /* this function clears the server and calculates the needed memory to run 309 | Input: 310 | - filled server configuration data to calculate the needed memory 311 | Output: 312 | - needed memory for the server to run 313 | */ 314 | WBY_API int wby_start(struct wby_server*, void *memory); 315 | /* this function starts running the server in the specificed memory space. Size 316 | * must be at least big enough as determined in the wby_server_init(). 317 | Input: 318 | - allocated memory space to create the server into 319 | */ 320 | WBY_API void wby_update(struct wby_server*); 321 | /* updates the server by being called frequenctly (at least once a frame) */ 322 | WBY_API void wby_stop(struct wby_server*); 323 | /* stops and shutdown the server */ 324 | WBY_API int wby_response_begin(struct wby_con*, int status_code, int content_length, 325 | const struct wby_header headers[], int header_count); 326 | /* this function begins a response 327 | Input: 328 | - HTTP status code to send. (Normally 200). 329 | - size in bytes you intend to write, or -1 for chunked encoding 330 | - array of HTTP headers to transmit (can be NULL of header_count == 0) 331 | - number of headers in the array 332 | Output: 333 | - returns 0 on success, non-zero on error. 334 | */ 335 | WBY_API void wby_response_end(struct wby_con*); 336 | /* this function finishes a response. When you're done wirting the response 337 | * body, call this function. this makes sure chunked encoding is terminated 338 | * correctly and that the connection is setup for reuse. */ 339 | WBY_API int wby_read(struct wby_con*, void *ptr, wby_size len); 340 | /* this function reads data from the request body. Only read what the client 341 | * has priovided (via content_length) parameter, or you will end up blocking 342 | * forever. 343 | Input: 344 | - pointer to a memory block that will be filled 345 | - size of the memory block to fill 346 | */ 347 | WBY_API int wby_write(struct wby_con*, const void *ptr, wby_size len); 348 | /* this function writes a response data to the connection. If you're not using 349 | * chunked encoding, be careful not to send more than the specified content 350 | * length. You can call this function multiple times as long as the total 351 | * number of bytes matches up with the content length. 352 | Input: 353 | - pointer to a memory block that will be send 354 | - size of the memory block to send 355 | */ 356 | WBY_API int wby_frame_begin(struct wby_con*, int opcode); 357 | /* this function begins an outgoing websocket frame */ 358 | WBY_API int wby_frame_end(struct wby_con*); 359 | /* this function finishes an outgoing websocket frame */ 360 | WBY_API int wby_find_query_var(const char *buf, const char *name, char *dst, wby_size dst_len); 361 | /* this function is a helper function to lookup a query parameter given a URL 362 | * encoded string. Returns the size of the returned data, or -1 if the query 363 | * var wasn't found. */ 364 | WBY_API const char* wby_find_header(struct wby_con*, const char *name); 365 | /* this convenience function to find a header in a request. Returns the value 366 | * of the specified header, or NULL if its was not present. */ 367 | 368 | #ifdef __cplusplus 369 | } 370 | #endif 371 | #endif /* WBY_H_ */ 372 | /* =============================================================== 373 | * 374 | * IMPLEMENTATION 375 | * 376 | * ===============================================================*/ 377 | #ifdef WBY_IMPLEMENTATION 378 | 379 | typedef int wby__check_ptr_size[(sizeof(void*) == sizeof(WBY_UINT_PTR)) ? 1 : -1]; 380 | #define WBY_LEN(a) (sizeof(a)/sizeof((a)[0])) 381 | #define WBY_UNUSED(a) ((void)(a)) 382 | 383 | #ifdef WBY_USE_ASSERT 384 | #ifndef WBY_ASSERT 385 | #include 386 | #define WBY_ASSERT(expr) assert(expr) 387 | #endif 388 | #else 389 | #define WBY_ASSERT(expr) 390 | #endif 391 | 392 | #include 393 | #include 394 | #include 395 | #include 396 | #include 397 | #include 398 | 399 | #define WBY_SOCK(s) ((wby_socket)(s)) 400 | #define WBY_INTERN static 401 | #define WBY_GLOBAL static 402 | #define WBY_STORAGE static 403 | 404 | /* =============================================================== 405 | * UTIL 406 | * ===============================================================*/ 407 | struct wby_buffer { 408 | wby_size used; 409 | /* current buffer size */ 410 | wby_size max; 411 | /* buffer capacity */ 412 | wby_byte *data; 413 | /* pointer inside a global buffer */ 414 | }; 415 | 416 | WBY_INTERN void 417 | wby_dbg(wby_log_f log, const char *fmt, ...) 418 | { 419 | char buffer[1024]; 420 | va_list args; 421 | if (!log) return; 422 | 423 | va_start(args, fmt); 424 | vsnprintf(buffer, sizeof buffer, fmt, args); 425 | va_end(args); 426 | buffer[(sizeof buffer)-1] = '\0'; 427 | log(buffer); 428 | } 429 | 430 | WBY_INTERN int 431 | wb_peek_request_size(const wby_byte *buf, int len) 432 | { 433 | int i; 434 | int max = len - 3; 435 | for (i = 0; i < max; ++i) { 436 | if ('\r' != buf[i + 0]) continue; 437 | if ('\n' != buf[i + 1]) continue; 438 | if ('\r' != buf[i + 2]) continue; 439 | if ('\n' != buf[i + 3]) continue; 440 | /* OK; we have CRLFCRLF which indicates the end of the header section */ 441 | return i + 4; 442 | } 443 | return -1; 444 | } 445 | 446 | WBY_INTERN char* 447 | wby_skipws(char *p) 448 | { 449 | for (;;) { 450 | char ch = *p; 451 | if (' ' == ch || '\t' == ch) ++p; 452 | else break; 453 | } 454 | return p; 455 | } 456 | 457 | #define WBY_TOK_SKIPWS WBY_FLAG(0) 458 | WBY_INTERN int 459 | wby_tok_inplace(char *buf, const char* separator, char *tokens[], int max, int flags) 460 | { 461 | char *b = buf; 462 | char *e = buf; 463 | int token_count = 0; 464 | int separator_len = (int)strlen(separator); 465 | while (token_count < max) { 466 | if (flags & WBY_TOK_SKIPWS) 467 | b = wby_skipws(b); 468 | if (NULL != (e = strstr(b, separator))) { 469 | int len = (int) (e - b); 470 | if (len > 0) 471 | tokens[token_count++] = b; 472 | *e = '\0'; 473 | b = e + separator_len; 474 | } else { 475 | tokens[token_count++] = b; 476 | break; 477 | } 478 | } 479 | return token_count; 480 | } 481 | 482 | WBY_INTERN wby_size 483 | wby_make_websocket_header(wby_byte buffer[10], wby_byte opcode, 484 | int payload_len, int fin) 485 | { 486 | buffer[0] = (wby_byte)((fin ? 0x80 : 0x00) | opcode); 487 | if (payload_len < 126) { 488 | buffer[1] = (wby_byte)(payload_len & 0x7f); 489 | return 2; 490 | } else if (payload_len < 65536) { 491 | buffer[1] = 126; 492 | buffer[2] = (wby_byte)(payload_len >> 8); 493 | buffer[3] = (wby_byte)payload_len; 494 | return 4; 495 | } else { 496 | buffer[1] = 127; 497 | /* Ignore high 32-bits. I didn't want to require 64-bit types and typdef hell in the API. */ 498 | buffer[2] = buffer[3] = buffer[4] = buffer[5] = 0; 499 | buffer[6] = (wby_byte)(payload_len >> 24); 500 | buffer[7] = (wby_byte)(payload_len >> 16); 501 | buffer[8] = (wby_byte)(payload_len >> 8); 502 | buffer[9] = (wby_byte)payload_len; 503 | return 10; 504 | } 505 | } 506 | 507 | WBY_INTERN int 508 | wby_read_buffered_data(int *data_left, struct wby_buffer* buffer, 509 | char **dest_ptr, wby_size *dest_len) 510 | { 511 | int offset, read_size; 512 | int left = *data_left; 513 | int len; 514 | if (left == 0) 515 | return 0; 516 | 517 | len = (int) *dest_len; 518 | offset = (int)buffer->used - left; 519 | read_size = (len > left) ? left : len; 520 | memcpy(*dest_ptr, buffer->data + offset, (wby_size)read_size); 521 | 522 | (*dest_ptr) += read_size; 523 | (*dest_len) -= (wby_size) read_size; 524 | (*data_left) -= read_size; 525 | return read_size; 526 | } 527 | 528 | /* --------------------------------------------------------------- 529 | * SOCKET 530 | * ---------------------------------------------------------------*/ 531 | #ifdef _WIN32 532 | #include 533 | #pragma comment(lib, "Ws2_32.lib") 534 | typedef SOCKET wby_socket; 535 | typedef int wby_socklen; 536 | typedef char wby_sockopt; 537 | 538 | #if defined(__GNUC__) 539 | #define WBY_ALIGN(x) __attribute__((aligned(x))) 540 | #else 541 | #define WBY_ALIGN(x) __declspec(align(x)) 542 | #endif 543 | 544 | #define WBY_INVALID_SOCKET INVALID_SOCKET 545 | #define snprintf _snprintf 546 | 547 | WBY_INTERN int 548 | wby_socket_error(void) 549 | { 550 | return WSAGetLastError(); 551 | } 552 | 553 | #if !defined(__GNUC__) 554 | WBY_INTERN int 555 | strcasecmp(const char *a, const char *b) 556 | { 557 | return _stricmp(a, b); 558 | } 559 | 560 | WBY_INTERN int 561 | strncasecmp(const char *a, const char *b, wby_size len) 562 | { 563 | return _strnicmp(a, b, len); 564 | } 565 | #endif 566 | 567 | WBY_INTERN int 568 | wby_socket_set_blocking(wby_socket socket, int blocking) 569 | { 570 | u_long val = !blocking; 571 | return ioctlsocket(socket, FIONBIO, &val); 572 | } 573 | 574 | WBY_INTERN int 575 | wby_socket_is_valid(wby_socket socket) 576 | { 577 | return (INVALID_SOCKET != socket); 578 | } 579 | 580 | WBY_INTERN void 581 | wby_socket_close(wby_socket socket) 582 | { 583 | closesocket(socket); 584 | } 585 | 586 | WBY_INTERN int 587 | wby_socket_is_blocking_error(int error) 588 | { 589 | return WSAEWOULDBLOCK == error; 590 | } 591 | 592 | #else /* UNIX */ 593 | 594 | #include 595 | #include 596 | #include 597 | #include 598 | #include 599 | #include 600 | #include 601 | #include 602 | #include 603 | #include 604 | 605 | typedef int wby_socket; 606 | typedef socklen_t wby_socklen; 607 | typedef int wby_sockopt; 608 | 609 | #define WBY_ALIGN(x) __attribute__((aligned(x))) 610 | #define WBY_INVALID_SOCKET (-1) 611 | 612 | WBY_INTERN int 613 | wby_socket_error(void) 614 | { 615 | return errno; 616 | } 617 | 618 | WBY_INTERN int 619 | wby_socket_is_valid(wby_socket socket) 620 | { 621 | return (socket > 0); 622 | } 623 | 624 | WBY_INTERN void 625 | wby_socket_close(wby_socket socket) 626 | { 627 | close(socket); 628 | } 629 | 630 | WBY_INTERN int 631 | wby_socket_is_blocking_error(int error) 632 | { 633 | return (EAGAIN == error); 634 | } 635 | 636 | WBY_INTERN int 637 | wby_socket_set_blocking(wby_socket socket, int blocking) 638 | { 639 | int flags = fcntl(socket, F_GETFL, 0); 640 | if (flags < 0) return flags; 641 | flags = blocking ? (flags & ~O_NONBLOCK) : (flags | O_NONBLOCK); 642 | return fcntl(socket, F_SETFL, flags); 643 | } 644 | #endif 645 | 646 | WBY_INTERN int 647 | wby_socket_config_incoming(wby_socket socket) 648 | { 649 | wby_sockopt off = 0; 650 | int err; 651 | if ((err = wby_socket_set_blocking(socket, 0)) != WBY_OK) return err; 652 | setsockopt(socket, SOL_SOCKET, SO_LINGER, (const char*) &off, sizeof(int)); 653 | return 0; 654 | } 655 | 656 | WBY_INTERN int 657 | wby_socket_send(wby_socket socket, const wby_byte *buffer, int size) 658 | { 659 | while (size > 0) { 660 | long err = send(socket, (const char*)buffer, (wby_size)size, 0); 661 | if (err <= 0) return 1; 662 | buffer += err; 663 | size -= (int)err; 664 | } 665 | return 0; 666 | } 667 | 668 | /* Read as much as possible without blocking while there is buffer space. */ 669 | enum {WBY_FILL_OK, WBY_FILL_ERROR, WBY_FILL_FULL}; 670 | WBY_INTERN int 671 | wby_socket_recv(wby_socket socket, struct wby_buffer *buf, wby_log_f log) 672 | { 673 | long err; 674 | int buf_left; 675 | for (;;) { 676 | buf_left = (int)buf->max - (int)buf->used; 677 | wby_dbg(log, "buffer space left = %d", buf_left); 678 | if (buf_left == 0) 679 | return WBY_FILL_FULL; 680 | 681 | /* Read what we can into the current buffer space. */ 682 | err = recv(socket, (char*)buf->data + buf->used, (wby_size)buf_left, 0); 683 | if (err < 0) { 684 | int sock_err = wby_socket_error(); 685 | if (wby_socket_is_blocking_error(sock_err)) { 686 | return WBY_FILL_OK; 687 | } else { 688 | /* Read error. Give up. */ 689 | wby_dbg(log, "read error %d - connection dead", sock_err); 690 | return WBY_FILL_ERROR; 691 | } 692 | } else if (err == 0) { 693 | /* The peer has closed the connection. */ 694 | wby_dbg(log, "peer has closed the connection"); 695 | return WBY_FILL_ERROR; 696 | } else buf->used += (wby_size)err; 697 | } 698 | } 699 | 700 | WBY_INTERN int 701 | wby_socket_flush(wby_socket socket, struct wby_buffer *buf) 702 | { 703 | if (buf->used > 0){ 704 | if (wby_socket_send(socket, buf->data, (int)buf->used) != WBY_OK) 705 | return 1; 706 | } 707 | buf->used = 0; 708 | return 0; 709 | } 710 | 711 | /* --------------------------------------------------------------- 712 | * URL 713 | * ---------------------------------------------------------------*/ 714 | /* URL-decode input buffer into destination buffer. 715 | * 0-terminate the destination buffer. Return the length of decoded data. 716 | * form-url-encoded data differs from URI encoding in a way that it 717 | * uses '+' as character for space, see RFC 1866 section 8.2.1 718 | * http://ftp.ics.uci.edu/pub/ietf/html/rfc1866.txt 719 | * 720 | * This bit of code was taken from mongoose. 721 | */ 722 | WBY_INTERN wby_size 723 | wby_url_decode(const char *src, wby_size src_len, char *dst, wby_size dst_len, 724 | int is_form_url_encoded) 725 | { 726 | int a, b; 727 | wby_size i, j; 728 | #define HEXTOI(x) (isdigit(x) ? x - '0' : x - 'W') 729 | for (i = j = 0; i < src_len && j < dst_len - 1; i++, j++) { 730 | if (src[i] == '%' && 731 | isxdigit(*(const wby_byte*)(src + i + 1)) && 732 | isxdigit(*(const wby_byte*)(src + i + 2))) 733 | { 734 | a = tolower(*(const wby_byte*)(src + i + 1)); 735 | b = tolower(*(const wby_byte*)(src + i + 2)); 736 | dst[j] = (char)((HEXTOI(a) << 4) | HEXTOI(b)); 737 | i += 2; 738 | } else if (is_form_url_encoded && src[i] == '+') { 739 | dst[j] = ' '; 740 | } else dst[j] = src[i]; 741 | } 742 | #undef HEXTOI 743 | dst[j] = '\0'; /* Null-terminate the destination */ 744 | return j; 745 | } 746 | 747 | /* Pulled from mongoose */ 748 | WBY_API int 749 | wby_find_query_var(const char *buf, const char *name, char *dst, wby_size dst_len) 750 | { 751 | const char *p, *e, *s; 752 | wby_size name_len; 753 | int len; 754 | wby_size buf_len = strlen(buf); 755 | 756 | name_len = strlen(name); 757 | e = buf + buf_len; 758 | len = -1; 759 | dst[0] = '\0'; 760 | 761 | /* buf is "var1=val1&var2=val2...". Find variable first */ 762 | for (p = buf; p != NULL && p + name_len < e; p++) { 763 | if ((p == buf || p[-1] == '&') && p[name_len] == '=' && 764 | strncasecmp(name, p, name_len) == 0) 765 | { 766 | /* Point p to variable value */ 767 | p += name_len + 1; 768 | /* Point s to the end of the value */ 769 | s = (const char *) memchr(p, '&', (wby_size)(e - p)); 770 | if (s == NULL) s = e; 771 | WBY_ASSERT(s >= p); 772 | /* Decode variable into destination buffer */ 773 | if ((wby_size) (s - p) < dst_len) 774 | len = (int)wby_url_decode(p, (wby_size)(s - p), dst, dst_len, 1); 775 | break; 776 | } 777 | } 778 | return len; 779 | } 780 | 781 | /* --------------------------------------------------------------- 782 | * BASE64 783 | * ---------------------------------------------------------------*/ 784 | #define WBY_BASE64_QUADS_BEFORE_LINEBREAK 19 785 | 786 | WBY_INTERN wby_size 787 | wby_base64_bufsize(wby_size input_size) 788 | { 789 | wby_size triplets = (input_size + 2) / 3; 790 | wby_size base_size = 4 * triplets; 791 | wby_size line_breaks = 2 * (triplets / WBY_BASE64_QUADS_BEFORE_LINEBREAK); 792 | wby_size null_termination = 1; 793 | return base_size + line_breaks + null_termination; 794 | } 795 | 796 | WBY_INTERN int 797 | wby_base64_encode(char* output, wby_size output_size, 798 | const wby_byte *input, wby_size input_size) 799 | { 800 | wby_size i = 0; 801 | int line_out = 0; 802 | WBY_STORAGE const char enc[] = 803 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 804 | "abcdefghijklmnopqrstuvwxyz" 805 | "0123456789+/="; 806 | if (output_size < wby_base64_bufsize(input_size)) 807 | return 1; 808 | 809 | while (i < input_size) { 810 | unsigned int idx_0, idx_1, idx_2, idx_3; 811 | unsigned int i0; 812 | 813 | i0 = (unsigned int)(input[i]) << 16; i++; 814 | i0 |= (unsigned int)(i < input_size ? input[i] : 0) << 8; i++; 815 | i0 |= (i < input_size ? input[i] : 0); i++; 816 | 817 | idx_0 = (i0 & 0xfc0000) >> 18; i0 <<= 6; 818 | idx_1 = (i0 & 0xfc0000) >> 18; i0 <<= 6; 819 | idx_2 = (i0 & 0xfc0000) >> 18; i0 <<= 6; 820 | idx_3 = (i0 & 0xfc0000) >> 18; 821 | 822 | if (i - 1 > input_size) idx_2 = 64; 823 | if (i > input_size) idx_3 = 64; 824 | 825 | *output++ = enc[idx_0]; 826 | *output++ = enc[idx_1]; 827 | *output++ = enc[idx_2]; 828 | *output++ = enc[idx_3]; 829 | 830 | if (++line_out == WBY_BASE64_QUADS_BEFORE_LINEBREAK) { 831 | *output++ = '\r'; 832 | *output++ = '\n'; 833 | } 834 | } 835 | *output = '\0'; 836 | return 0; 837 | } 838 | 839 | /* --------------------------------------------------------------- 840 | * SHA1 841 | * ---------------------------------------------------------------*/ 842 | struct wby_sha1 { 843 | unsigned int state[5]; 844 | unsigned int msg_size[2]; 845 | wby_size buf_used; 846 | wby_byte buffer[64]; 847 | }; 848 | 849 | WBY_INTERN unsigned int 850 | wby_sha1_rol(unsigned int value, unsigned int bits) 851 | { 852 | return ((value) << bits) | (value >> (32 - bits)); 853 | } 854 | 855 | WBY_INTERN void 856 | wby_sha1_hash_block(unsigned int state[5], const wby_byte *block) 857 | { 858 | int i; 859 | unsigned int a, b, c, d, e; 860 | unsigned int w[80]; 861 | 862 | /* Prepare message schedule */ 863 | for (i = 0; i < 16; ++i) { 864 | w[i] = (((unsigned int)block[(i*4)+0]) << 24) | 865 | (((unsigned int)block[(i*4)+1]) << 16) | 866 | (((unsigned int)block[(i*4)+2]) << 8) | 867 | (((unsigned int)block[(i*4)+3]) << 0); 868 | } 869 | 870 | for (i = 16; i < 80; ++i) 871 | w[i] = wby_sha1_rol(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1); 872 | /* Initialize working variables */ 873 | a = state[0]; b = state[1]; c = state[2]; d = state[3]; e = state[4]; 874 | 875 | /* This is the core loop for each 20-word span. */ 876 | #define SHA1_LOOP(start, end, func, constant) \ 877 | for (i = (start); i < (end); ++i) { \ 878 | unsigned int t = wby_sha1_rol(a, 5) + (func) + e + (constant) + w[i]; \ 879 | e = d; d = c; c = wby_sha1_rol(b, 30); b = a; a = t;} 880 | 881 | SHA1_LOOP( 0, 20, ((b & c) ^ (~b & d)), 0x5a827999) 882 | SHA1_LOOP(20, 40, (b ^ c ^ d), 0x6ed9eba1) 883 | SHA1_LOOP(40, 60, ((b & c) ^ (b & d) ^ (c & d)), 0x8f1bbcdc) 884 | SHA1_LOOP(60, 80, (b ^ c ^ d), 0xca62c1d6) 885 | #undef SHA1_LOOP 886 | 887 | /* Update state */ 888 | state[0] += a; state[1] += b; state[2] += c; state[3] += d; state[4] += e; 889 | } 890 | 891 | WBY_INTERN void 892 | wby_sha1_init(struct wby_sha1 *s) 893 | { 894 | s->state[0] = 0x67452301; 895 | s->state[1] = 0xefcdab89; 896 | s->state[2] = 0x98badcfe; 897 | s->state[3] = 0x10325476; 898 | s->state[4] = 0xc3d2e1f0; 899 | 900 | s->msg_size[0] = 0; 901 | s->msg_size[1] = 0; 902 | s->buf_used = 0; 903 | } 904 | 905 | WBY_INTERN void 906 | wby_sha1_update(struct wby_sha1 *s, const void *data_, wby_size size) 907 | { 908 | const char *data = (const char*)data_; 909 | unsigned int size_lo; 910 | unsigned int size_lo_orig; 911 | wby_size remain = size; 912 | 913 | while (remain > 0) { 914 | wby_size buf_space = sizeof(s->buffer) - s->buf_used; 915 | wby_size copy_size = (remain < buf_space) ? remain : buf_space; 916 | memcpy(s->buffer + s->buf_used, data, copy_size); 917 | 918 | s->buf_used += copy_size; 919 | data += copy_size; 920 | remain -= copy_size; 921 | 922 | if (s->buf_used == sizeof(s->buffer)) { 923 | wby_sha1_hash_block(s->state, s->buffer); 924 | s->buf_used = 0; 925 | } 926 | } 927 | 928 | size_lo = size_lo_orig = s->msg_size[1]; 929 | size_lo += (unsigned int)(size * 8); 930 | if (size_lo < size_lo_orig) 931 | s->msg_size[0] += 1; 932 | s->msg_size[1] = size_lo; 933 | } 934 | 935 | WBY_INTERN void 936 | wby_sha1_final(wby_byte digest[20], struct wby_sha1 *s) 937 | { 938 | wby_byte zero = 0x00; 939 | wby_byte one_bit = 0x80; 940 | wby_byte count_data[8]; 941 | int i; 942 | 943 | /* Generate size data in bit endian format */ 944 | for (i = 0; i < 8; ++i) { 945 | unsigned int word = s->msg_size[i >> 2]; 946 | count_data[i] = (wby_byte)(word >> ((3 - (i & 3)) * 8)); 947 | } 948 | 949 | /* Set trailing one-bit */ 950 | wby_sha1_update(s, &one_bit, 1); 951 | /* Emit null padding to to make room for 64 bits of size info in the last 512 bit block */ 952 | while (s->buf_used != 56) 953 | wby_sha1_update(s, &zero, 1); 954 | 955 | /* Write size in bits as last 64-bits */ 956 | wby_sha1_update(s, count_data, 8); 957 | /* Make sure we actually finalized our last block */ 958 | WBY_ASSERT(s->buf_used == 0); 959 | 960 | /* Generate digest */ 961 | for (i = 0; i < 20; ++i) { 962 | unsigned int word = s->state[i >> 2]; 963 | wby_byte byte = (wby_byte) ((word >> ((3 - (i & 3)) * 8)) & 0xff); 964 | digest[i] = byte; 965 | } 966 | } 967 | 968 | /* --------------------------------------------------------------- 969 | * CONNECTION 970 | * ---------------------------------------------------------------*/ 971 | #define WBY_WEBSOCKET_VERSION "13" 972 | WBY_GLOBAL const char wby_continue_header[] = "HTTP/1.1 100 Continue\r\n\r\n"; 973 | WBY_GLOBAL const wby_size wby_continue_header_len = sizeof(wby_continue_header) - 1; 974 | WBY_GLOBAL const char wby_websocket_guid[] = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"; 975 | WBY_GLOBAL const wby_size wby_websocket_guid_len = sizeof(wby_websocket_guid) - 1; 976 | WBY_GLOBAL const wby_byte wby_websocket_pong[] = { 0x80, WBY_WSOP_PONG, 0x00 }; 977 | WBY_GLOBAL const struct wby_header wby_plain_text_headers[]={{"Content-Type","text/plain"}}; 978 | 979 | enum wby_connection_flags { 980 | WBY_CON_FLAG_ALIVE = WBY_FLAG(0), 981 | WBY_CON_FLAG_FRESH_CONNECTION = WBY_FLAG(1), 982 | WBY_CON_FLAG_CLOSE_AFTER_RESPONSE = WBY_FLAG(2), 983 | WBY_CON_FLAG_CHUNKED_RESPONSE = WBY_FLAG(3), 984 | WBY_CON_FLAG_WEBSOCKET = WBY_FLAG(4) 985 | }; 986 | 987 | enum wby_connection_state { 988 | WBY_CON_STATE_REQUEST, 989 | WBY_CON_STATE_SEND_CONTINUE, 990 | WBY_CON_STATE_SERVE, 991 | WBY_CON_STATE_WEBSOCKET 992 | }; 993 | 994 | struct wby_connection { 995 | struct wby_con public_data; 996 | unsigned short flags; 997 | unsigned short state; 998 | 999 | wby_ptr socket; 1000 | wby_log_f log; 1001 | 1002 | wby_size request_buffer_size; 1003 | struct wby_buffer header_buf; 1004 | struct wby_buffer io_buf; 1005 | wby_size io_buffer_size; 1006 | 1007 | int header_body_left; 1008 | int io_data_left; 1009 | int continue_data_left; 1010 | int body_bytes_read; 1011 | 1012 | struct wby_frame ws_frame; 1013 | wby_byte ws_opcode; 1014 | wby_size blocking_count; 1015 | }; 1016 | 1017 | WBY_INTERN int 1018 | wby_connection_set_blocking(struct wby_connection *conn) 1019 | { 1020 | if (conn->blocking_count == 0) { 1021 | if (wby_socket_set_blocking(WBY_SOCK(conn->socket), 1) != WBY_OK) { 1022 | wby_dbg(conn->log, "failed to switch connection to blocking"); 1023 | conn->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1024 | return -1; 1025 | } 1026 | } 1027 | ++conn->blocking_count; 1028 | return 0; 1029 | } 1030 | 1031 | WBY_INTERN int 1032 | wby_connection_set_nonblocking(struct wby_connection *conn) 1033 | { 1034 | wby_size count = conn->blocking_count; 1035 | if ((conn->flags & WBY_CON_FLAG_ALIVE) != 0 && count == 1) { 1036 | if (wby_socket_set_blocking(WBY_SOCK(conn->socket), 0) != WBY_OK) { 1037 | wby_dbg(conn->log, "failed to switch connection to non-blocking"); 1038 | conn->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1039 | return -1; 1040 | } 1041 | } 1042 | conn->blocking_count = count - 1; 1043 | return 0; 1044 | } 1045 | 1046 | WBY_INTERN void 1047 | wby_connection_reset(struct wby_connection *conn, wby_size request_buffer_size, 1048 | wby_size io_buffer_size) 1049 | { 1050 | conn->header_buf.used = 0; 1051 | conn->header_buf.max = request_buffer_size; 1052 | conn->io_buf.used = 0; 1053 | conn->io_buf.max = io_buffer_size; 1054 | conn->header_body_left = 0; 1055 | conn->io_data_left = 0; 1056 | conn->continue_data_left = 0; 1057 | conn->body_bytes_read = 0; 1058 | conn->state = WBY_CON_STATE_REQUEST; 1059 | conn->public_data.user_data = NULL; 1060 | conn->blocking_count = 0; 1061 | } 1062 | 1063 | WBY_INTERN void 1064 | wby_connection_close(struct wby_connection* connection) 1065 | { 1066 | if (WBY_SOCK(connection->socket) != WBY_INVALID_SOCKET) { 1067 | wby_socket_close(WBY_SOCK(connection->socket)); 1068 | connection->socket = (wby_ptr)WBY_INVALID_SOCKET; 1069 | } 1070 | connection->flags = 0; 1071 | } 1072 | 1073 | WBY_INTERN int 1074 | wby_connection_setup_request(struct wby_connection *connection, int request_size) 1075 | { 1076 | char* lines[WBY_MAX_HEADERS + 2]; 1077 | int line_count; 1078 | char* tok[16]; 1079 | char* query_params; 1080 | int tok_count; 1081 | 1082 | int i; 1083 | int header_count; 1084 | char *buf = (char*) connection->header_buf.data; 1085 | struct wby_request *req = &connection->public_data.request; 1086 | 1087 | /* Null-terminate the request envelope by overwriting the last CRLF with 00LF */ 1088 | buf[request_size - 2] = '\0'; 1089 | /* Split header into lines */ 1090 | line_count = wby_tok_inplace(buf, "\r\n", lines, WBY_LEN(lines), 0); 1091 | header_count = line_count - 2; 1092 | if (line_count < 1 || header_count > (int) WBY_LEN(req->headers)) 1093 | return 1; 1094 | 1095 | /* Parse request line */ 1096 | tok_count = wby_tok_inplace(lines[0], " ", tok, WBY_LEN(tok), 0); 1097 | if (3 != tok_count) return 1; 1098 | 1099 | req->method = tok[0]; 1100 | req->uri = tok[1]; 1101 | req->http_version = tok[2]; 1102 | req->content_length = 0; 1103 | 1104 | /* See if there are any query parameters */ 1105 | if ((query_params = (char*) strchr(req->uri, '?')) != NULL) { 1106 | req->query_params = query_params + 1; 1107 | *query_params = '\0'; 1108 | } else req->query_params = NULL; 1109 | 1110 | { 1111 | /* Decode the URI in place */ 1112 | wby_size uri_len = strlen(req->uri); 1113 | wby_url_decode(req->uri, uri_len, (char*)req->uri, uri_len + 1, 1); 1114 | } 1115 | 1116 | /* Parse headers */ 1117 | for (i = 0; i < header_count; ++i) { 1118 | tok_count = wby_tok_inplace(lines[i + 1], ":", tok, 2, WBY_TOK_SKIPWS); 1119 | if (tok_count != 2) return 1; 1120 | req->headers[i].name = tok[0]; 1121 | req->headers[i].value = tok[1]; 1122 | 1123 | if (!strcasecmp("content-length", tok[0])) { 1124 | req->content_length = (int)strtoul(tok[1], NULL, 10); 1125 | wby_dbg(connection->log, "request has body; content length is %d", req->content_length); 1126 | } else if (!strcasecmp("transfer-encoding", tok[0])) { 1127 | wby_dbg(connection->log, "cowardly refusing to handle Transfer-Encoding: %s", tok[1]); 1128 | return 1; 1129 | } 1130 | } 1131 | req->header_count = header_count; 1132 | return 0; 1133 | } 1134 | 1135 | WBY_INTERN int 1136 | wby_connection_send_websocket_upgrade(struct wby_connection* connection) 1137 | { 1138 | const char *hdr; 1139 | struct wby_sha1 sha; 1140 | wby_byte digest[20]; 1141 | char output_digest[64]; 1142 | struct wby_header headers[3]; 1143 | struct wby_con *conn = &connection->public_data; 1144 | if ((hdr = wby_find_header(conn, "Sec-WebSocket-Version")) == NULL) { 1145 | wby_dbg(connection->log, "Sec-WebSocket-Version header not present"); 1146 | return 1; 1147 | } 1148 | if (strcmp(hdr, WBY_WEBSOCKET_VERSION)) { 1149 | wby_dbg(connection->log,"WebSocket version %s not supported (we only do %s)", 1150 | hdr, WBY_WEBSOCKET_VERSION); 1151 | return 1; 1152 | } 1153 | if ((hdr = wby_find_header(conn, "Sec-WebSocket-Key")) == NULL) { 1154 | wby_dbg(connection->log, "Sec-WebSocket-Key header not present"); 1155 | return 1; 1156 | } 1157 | /* Compute SHA1 hash of Sec-Websocket-Key + the websocket guid as required by 1158 | * the RFC. 1159 | * 1160 | * This handshake is bullshit. It adds zero security. Just forces me to drag 1161 | * in SHA1 and create a base64 encoder. 1162 | */ 1163 | wby_sha1_init(&sha); 1164 | wby_sha1_update(&sha, hdr, strlen(hdr)); 1165 | wby_sha1_update(&sha, wby_websocket_guid, wby_websocket_guid_len); 1166 | wby_sha1_final(&digest[0], &sha); 1167 | if (wby_base64_encode(output_digest, sizeof output_digest, &digest[0], sizeof(digest)) != WBY_OK) 1168 | return 1; 1169 | 1170 | headers[0].name = "Upgrade"; 1171 | headers[0].value = "websocket"; 1172 | headers[1].name = "Connection"; 1173 | headers[1].value = "Upgrade"; 1174 | headers[2].name = "Sec-WebSocket-Accept"; 1175 | headers[2].value = output_digest; 1176 | wby_response_begin(&connection->public_data, 101, 0, headers, WBY_LEN(headers)); 1177 | wby_response_end(&connection->public_data); 1178 | return 0; 1179 | } 1180 | 1181 | WBY_INTERN int 1182 | wby_connection_push(struct wby_connection *conn, const void *data_, int len) 1183 | { 1184 | struct wby_buffer *buf = &conn->io_buf; 1185 | const wby_byte* data = (const wby_byte*)data_; 1186 | if (conn->state != WBY_CON_STATE_SERVE) { 1187 | wby_dbg(conn->log, "attempt to write in non-serve state"); 1188 | return 1; 1189 | } 1190 | if (len == 0) 1191 | return wby_socket_flush(WBY_SOCK(conn->socket), buf); 1192 | 1193 | while (len > 0) { 1194 | int buf_space = (int)buf->max - (int)buf->used; 1195 | int copy_size = len < buf_space ? len : buf_space; 1196 | memcpy(buf->data + buf->used, data, (wby_size)copy_size); 1197 | buf->used += (wby_size)copy_size; 1198 | 1199 | data += copy_size; 1200 | len -= copy_size; 1201 | if (buf->used == buf->max) { 1202 | if (wby_socket_flush(WBY_SOCK(conn->socket), buf) != WBY_OK) 1203 | return 1; 1204 | if ((wby_size)len >= buf->max) 1205 | return wby_socket_send(WBY_SOCK(conn->socket), data, len); 1206 | } 1207 | } 1208 | return 0; 1209 | } 1210 | 1211 | /* --------------------------------------------------------------- 1212 | * CON/REQUEST 1213 | * ---------------------------------------------------------------*/ 1214 | WBY_INTERN int 1215 | wby_con_discard_incoming_data(struct wby_con* conn, int count) 1216 | { 1217 | while (count > 0) { 1218 | char buffer[1024]; 1219 | int read_size = (int)(((wby_size)count > sizeof(buffer)) ? 1220 | sizeof(buffer) : (wby_size)count); 1221 | if (wby_read(conn, buffer, (wby_size)read_size) != WBY_OK) 1222 | return -1; 1223 | count -= read_size; 1224 | } 1225 | return 0; 1226 | } 1227 | 1228 | WBY_API const char* 1229 | wby_find_header(struct wby_con *conn, const char *name) 1230 | { 1231 | int i, count; 1232 | for (i = 0, count = conn->request.header_count; i < count; ++i) { 1233 | if (!strcasecmp(conn->request.headers[i].name, name)) 1234 | return conn->request.headers[i].value; 1235 | } 1236 | return NULL; 1237 | } 1238 | 1239 | WBY_INTERN int 1240 | wby_con_is_websocket_request(struct wby_con* conn) 1241 | { 1242 | const char *hdr; 1243 | if ((hdr = wby_find_header(conn, "Connection")) == NULL) return 0; 1244 | if (strcasecmp(hdr, "Upgrade")) return 0; 1245 | if ((hdr = wby_find_header(conn, "Upgrade")) == NULL) return 0; 1246 | if (strcasecmp(hdr, "websocket")) return 0; 1247 | return 1; 1248 | } 1249 | 1250 | WBY_INTERN int 1251 | wby_scan_websocket_frame(struct wby_frame *frame, const struct wby_buffer *buf) 1252 | { 1253 | wby_byte flags = 0; 1254 | unsigned int len = 0; 1255 | unsigned int opcode = 0; 1256 | wby_byte* data = buf->data; 1257 | wby_byte* data_max = data + buf->used; 1258 | 1259 | int i; 1260 | int len_bytes = 0; 1261 | int mask_bytes = 0; 1262 | wby_byte header0, header1; 1263 | if (buf->used < 2) 1264 | return -1; 1265 | 1266 | header0 = *data++; 1267 | header1 = *data++; 1268 | if (header0 & 0x80) 1269 | flags |= WBY_WSF_FIN; 1270 | if (header1 & 0x80) { 1271 | flags |= WBY_WSF_MASKED; 1272 | mask_bytes = 4; 1273 | } 1274 | 1275 | opcode = header0 & 0xf; 1276 | len = header1 & 0x7f; 1277 | if (len == 126) 1278 | len_bytes = 2; 1279 | else if (len == 127) 1280 | len_bytes = 8; 1281 | if (data + len_bytes + mask_bytes > data_max) 1282 | return -1; 1283 | 1284 | /* Read big endian length from length bytes (if greater than 125) */ 1285 | len = len_bytes == 0 ? len : 0; 1286 | for (i = 0; i < len_bytes; ++i) { 1287 | /* This will totally overflow for 64-bit values. I don't care. 1288 | * If you're transmitting more than 4 GB of data using Webby, 1289 | * seek help. */ 1290 | len <<= 8; 1291 | len |= *data++; 1292 | } 1293 | 1294 | /* Read mask word if present */ 1295 | for (i = 0; i < mask_bytes; ++i) 1296 | frame->mask_key[i] = *data++; 1297 | frame->header_size = (wby_byte) (data - buf->data); 1298 | frame->flags = flags; 1299 | frame->opcode = (wby_byte) opcode; 1300 | frame->payload_length = (int)len; 1301 | return 0; 1302 | } 1303 | 1304 | WBY_API int 1305 | wby_frame_begin(struct wby_con *conn_pub, int opcode) 1306 | { 1307 | struct wby_connection *conn = (struct wby_connection*)conn_pub; 1308 | conn->ws_opcode = (wby_byte) opcode; 1309 | /* Switch socket to blocking mode */ 1310 | return wby_connection_set_blocking(conn); 1311 | } 1312 | 1313 | WBY_API int 1314 | wby_frame_end(struct wby_con *conn_pub) 1315 | { 1316 | wby_byte header[10]; 1317 | wby_size header_size; 1318 | struct wby_connection *conn = (struct wby_connection*) conn_pub; 1319 | header_size = wby_make_websocket_header(header, conn->ws_opcode, 0, 1); 1320 | if (wby_socket_send(WBY_SOCK(conn->socket), header, (int) header_size) != WBY_OK) 1321 | conn->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1322 | /* Switch socket to non-blocking mode */ 1323 | return wby_connection_set_nonblocking(conn); 1324 | } 1325 | 1326 | WBY_API int 1327 | wby_read(struct wby_con *conn, void *ptr_, wby_size len) 1328 | { 1329 | struct wby_connection* conn_prv = (struct wby_connection*)conn; 1330 | char *ptr = (char*) ptr_; 1331 | int count; 1332 | 1333 | int start_pos = conn_prv->body_bytes_read; 1334 | if (conn_prv->header_body_left > 0) { 1335 | count = wby_read_buffered_data(&conn_prv->header_body_left, &conn_prv->header_buf, &ptr, &len); 1336 | conn_prv->body_bytes_read += count; 1337 | } 1338 | 1339 | /* Read buffered websocket data */ 1340 | if (conn_prv->io_data_left > 0) { 1341 | count = wby_read_buffered_data(&conn_prv->io_data_left, &conn_prv->io_buf, &ptr, &len); 1342 | conn_prv->body_bytes_read += count; 1343 | } 1344 | 1345 | while (len > 0) { 1346 | long err = recv(WBY_SOCK(conn_prv->socket), ptr, (wby_size)len, 0); 1347 | if (err < 0) { 1348 | conn_prv->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1349 | return (int)err; 1350 | } 1351 | len -= (wby_size)err; 1352 | ptr += (wby_size)err; 1353 | conn_prv->body_bytes_read += (int)err; 1354 | } 1355 | 1356 | if ((conn_prv->flags & WBY_CON_FLAG_WEBSOCKET) && (conn_prv->ws_frame.flags & WBY_WSF_MASKED)) { 1357 | /* XOR outgoing data with websocket ofuscation key */ 1358 | int i, end_pos = conn_prv->body_bytes_read; 1359 | const wby_byte *mask = conn_prv->ws_frame.mask_key; 1360 | ptr = (char*) ptr_; /* start over */ 1361 | for (i = start_pos; i < end_pos; ++i) { 1362 | wby_byte byte = (wby_byte)*ptr; 1363 | *ptr++ = (char)(byte ^ mask[i & 3]); 1364 | } 1365 | } 1366 | return 0; 1367 | } 1368 | 1369 | WBY_API int 1370 | wby_write(struct wby_con *conn, const void *ptr, wby_size len) 1371 | { 1372 | struct wby_connection *conn_priv = (struct wby_connection*) conn; 1373 | if (conn_priv->flags & WBY_CON_FLAG_WEBSOCKET) { 1374 | wby_byte header[10]; 1375 | wby_size header_size; 1376 | header_size = wby_make_websocket_header(header, conn_priv->ws_opcode, (int)len, 0); 1377 | 1378 | /* Overwrite opcode to be continuation packages from here on out */ 1379 | conn_priv->ws_opcode = WBY_WSOP_CONTINUATION; 1380 | if (wby_socket_send(WBY_SOCK(conn_priv->socket), header, (int)header_size) != WBY_OK) { 1381 | conn_priv->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1382 | return -1; 1383 | } 1384 | if (wby_socket_send(WBY_SOCK(conn_priv->socket),(const wby_byte*)ptr, (int)len) != WBY_OK) { 1385 | conn_priv->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1386 | return -1; 1387 | } 1388 | return 0; 1389 | } else if (conn_priv->flags & WBY_CON_FLAG_CHUNKED_RESPONSE) { 1390 | char chunk_header[128]; 1391 | int header_len = snprintf(chunk_header, sizeof chunk_header, "%x\r\n", (int) len); 1392 | wby_connection_push(conn_priv, chunk_header, header_len); 1393 | wby_connection_push(conn_priv, ptr, (int)len); 1394 | return wby_connection_push(conn_priv, "\r\n", 2); 1395 | } else return wby_connection_push(conn_priv, ptr, (int) len); 1396 | } 1397 | 1398 | WBY_INTERN int 1399 | wby_printf(struct wby_con* conn, const char* fmt, ...) 1400 | { 1401 | int len; 1402 | char buffer[1024]; 1403 | va_list args; 1404 | va_start(args, fmt); 1405 | len = vsnprintf(buffer, sizeof buffer, fmt, args); 1406 | va_end(args); 1407 | return wby_write(conn, buffer, (wby_size)len); 1408 | } 1409 | 1410 | /* --------------------------------------------------------------- 1411 | * RESPONSE 1412 | * ---------------------------------------------------------------*/ 1413 | #define WBY_STATUS_MAP(STATUS)\ 1414 | STATUS(100, "Continue")\ 1415 | STATUS(101, "Switching Protocols")\ 1416 | STATUS(200, "OK")\ 1417 | STATUS(201, "Created")\ 1418 | STATUS(202, "Accepted")\ 1419 | STATUS(203, "Non-Authoritative Information")\ 1420 | STATUS(204, "No Content")\ 1421 | STATUS(205, "Reset Content")\ 1422 | STATUS(206, "Partial Content")\ 1423 | STATUS(300, "Multiple Choices")\ 1424 | STATUS(301, "Moved Permanently")\ 1425 | STATUS(302, "Found")\ 1426 | STATUS(303, "See Other")\ 1427 | STATUS(304, "Not Modified")\ 1428 | STATUS(305, "Use Proxy")\ 1429 | STATUS(307, "Temporary Redirect")\ 1430 | STATUS(400, "Bad Request")\ 1431 | STATUS(401, "Unauthorized")\ 1432 | STATUS(402, "Payment Required")\ 1433 | STATUS(403, "Forbidden")\ 1434 | STATUS(404, "Not Found")\ 1435 | STATUS(405, "Method Not Allowed")\ 1436 | STATUS(406, "Not Acceptable")\ 1437 | STATUS(407, "Proxy Authentication Required")\ 1438 | STATUS(408, "Request Time-out")\ 1439 | STATUS(409, "Conflict")\ 1440 | STATUS(410, "Gone")\ 1441 | STATUS(411, "Length Required")\ 1442 | STATUS(412, "Precondition Failed")\ 1443 | STATUS(413, "Request Entity Too Large")\ 1444 | STATUS(414, "Request-URI Too Large")\ 1445 | STATUS(415, "Unsupported Media Type")\ 1446 | STATUS(416, "Requested range not satisfiable")\ 1447 | STATUS(417, "Expectation Failed")\ 1448 | STATUS(500, "Internal Server Error")\ 1449 | STATUS(501, "Not Implemented")\ 1450 | STATUS(502, "Bad Gateway")\ 1451 | STATUS(503, "Service Unavailable")\ 1452 | STATUS(504, "Gateway Time-out")\ 1453 | STATUS(505, "HTTP Version not supported") 1454 | 1455 | WBY_GLOBAL const short wby_status_nums[] = { 1456 | #define WBY_STATUS(id, name) id, 1457 | WBY_STATUS_MAP(WBY_STATUS) 1458 | #undef WBY_STATUS 1459 | }; 1460 | WBY_GLOBAL const char* wby_status_text[] = { 1461 | #define WBY_STATUS(id, name) name, 1462 | WBY_STATUS_MAP(WBY_STATUS) 1463 | #undef WBY_STATUS 1464 | }; 1465 | 1466 | WBY_INTERN const char* 1467 | wby_response_status_text(int status_code) 1468 | { 1469 | int i; 1470 | for (i = 0; i < (int) WBY_LEN(wby_status_nums); ++i) { 1471 | if (wby_status_nums[i] == status_code) 1472 | return wby_status_text[i]; 1473 | } 1474 | return "Unknown"; 1475 | } 1476 | 1477 | WBY_API int 1478 | wby_response_begin(struct wby_con *conn_pub, int status_code, int content_length, 1479 | const struct wby_header *headers, int header_count) 1480 | { 1481 | int i = 0; 1482 | struct wby_connection *conn = (struct wby_connection *)conn_pub; 1483 | if (conn->body_bytes_read < (int)conn->public_data.request.content_length) { 1484 | int body_left = conn->public_data.request.content_length - (int)conn->body_bytes_read; 1485 | if (wby_con_discard_incoming_data(conn_pub, body_left) != WBY_OK) { 1486 | conn->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1487 | return -1; 1488 | } 1489 | } 1490 | 1491 | wby_printf(conn_pub, "HTTP/1.1 %d %s\r\n", status_code, wby_response_status_text(status_code)); 1492 | if (content_length >= 0) 1493 | wby_printf(conn_pub, "Content-Length: %d\r\n", content_length); 1494 | else wby_printf(conn_pub, "Transfer-Encoding: chunked\r\n"); 1495 | wby_printf(conn_pub, "Server: wby\r\n"); 1496 | 1497 | for (i = 0; i < header_count; ++i) { 1498 | if (!strcasecmp(headers[i].name, "Connection")) { 1499 | if (!strcasecmp(headers[i].value, "close")) 1500 | conn->flags |= WBY_CON_FLAG_CLOSE_AFTER_RESPONSE; 1501 | } 1502 | wby_printf(conn_pub, "%s: %s\r\n", headers[i].name, headers[i].value); 1503 | } 1504 | 1505 | if (!(conn->flags & WBY_CON_FLAG_CLOSE_AFTER_RESPONSE)) { 1506 | /* See if the client wants us to close the connection. */ 1507 | const char* connection_header = wby_find_header(conn_pub, "Connection"); 1508 | if (connection_header && !strcasecmp("close", connection_header)) { 1509 | conn->flags |= WBY_CON_FLAG_CLOSE_AFTER_RESPONSE; 1510 | wby_printf(conn_pub, "Connection: close\r\n"); 1511 | } 1512 | } 1513 | wby_printf(conn_pub, "\r\n"); 1514 | if (content_length < 0) 1515 | conn->flags |= WBY_CON_FLAG_CHUNKED_RESPONSE; 1516 | return 0; 1517 | } 1518 | 1519 | WBY_API void 1520 | wby_response_end(struct wby_con *conn) 1521 | { 1522 | struct wby_connection *conn_priv = (struct wby_connection*) conn; 1523 | if (conn_priv->flags & WBY_CON_FLAG_CHUNKED_RESPONSE) { 1524 | /* Write final chunk */ 1525 | wby_connection_push(conn_priv, "0\r\n\r\n", 5); 1526 | conn_priv->flags &= (unsigned short)~WBY_CON_FLAG_CHUNKED_RESPONSE; 1527 | } 1528 | /* Flush buffers */ 1529 | wby_connection_push(conn_priv, "", 0); 1530 | 1531 | /* Close connection when Content-Length is zero that maybe HTTP/1.0. */ 1532 | if (conn->request.content_length == 0 && !wby_con_is_websocket_request(conn)) 1533 | wby_connection_close(conn_priv); 1534 | } 1535 | 1536 | /* --------------------------------------------------------------- 1537 | * SERVER 1538 | * ---------------------------------------------------------------*/ 1539 | /* Pointer to Integer type conversion for pointer alignment */ 1540 | #if defined(__PTRDIFF_TYPE__) /* This case should work for GCC*/ 1541 | # define WBY_UINT_TO_PTR(x) ((void*)(__PTRDIFF_TYPE__)(x)) 1542 | # define WBY_PTR_TO_UINT(x) ((wby_size)(__PTRDIFF_TYPE__)(x)) 1543 | #elif !defined(__GNUC__) /* works for compilers other than LLVM */ 1544 | # define WBY_UINT_TO_PTR(x) ((void*)&((char*)0)[x]) 1545 | # define WBY_PTR_TO_UINT(x) ((wby_size)(((char*)x)-(char*)0)) 1546 | #elif defined(WBY_USE_FIXED_TYPES) /* used if we have */ 1547 | # define WBY_UINT_TO_PTR(x) ((void*)(uintptr_t)(x)) 1548 | # define WBY_PTR_TO_UINT(x) ((uintptr_t)(x)) 1549 | #else /* generates warning but works */ 1550 | # define WBY_UINT_TO_PTR(x) ((void*)(x)) 1551 | # define WBY_PTR_TO_UINT(x) ((wby_size)(x)) 1552 | #endif 1553 | 1554 | /* simple pointer math */ 1555 | #define WBY_PTR_ADD(t, p, i) ((t*)((void*)((wby_byte*)(p) + (i)))) 1556 | #define WBY_ALIGN_PTR(x, mask)\ 1557 | (WBY_UINT_TO_PTR((WBY_PTR_TO_UINT((wby_byte*)(x) + (mask-1)) & ~(mask-1)))) 1558 | 1559 | /* pointer alignment */ 1560 | #ifdef __cplusplus 1561 | template struct wby_alignof; 1562 | template struct wby_helper{enum {value = size_diff};}; 1563 | template struct wby_helper{enum {value = wby_alignof::value};}; 1564 | template struct wby_alignof{struct Big {T x; char c;}; enum { 1565 | diff = sizeof(Big) - sizeof(T), value = wby_helper::value};}; 1566 | #define WBY_ALIGNOF(t) (wby_alignof::value); 1567 | #else 1568 | #define WBY_ALIGNOF(t) ((char*)(&((struct {char c; t _h;}*)0)->_h) - (char*)0) 1569 | #endif 1570 | 1571 | WBY_API void 1572 | wby_init(struct wby_server *srv, const struct wby_config *cfg, wby_size *needed_memory) 1573 | { 1574 | WBY_STORAGE const wby_size wby_conn_align = WBY_ALIGNOF(struct wby_connection); 1575 | WBY_ASSERT(srv); 1576 | WBY_ASSERT(cfg); 1577 | WBY_ASSERT(needed_memory); 1578 | 1579 | memset(srv, 0, sizeof(*srv)); 1580 | srv->config = *cfg; 1581 | WBY_ASSERT(cfg->dispatch); 1582 | 1583 | *needed_memory = 0; 1584 | *needed_memory += cfg->connection_max * sizeof(struct wby_connection); 1585 | *needed_memory += cfg->connection_max * cfg->request_buffer_size; 1586 | *needed_memory += cfg->connection_max * cfg->io_buffer_size; 1587 | *needed_memory += wby_conn_align; 1588 | srv->memory_size = *needed_memory; 1589 | } 1590 | 1591 | WBY_API int 1592 | wby_start(struct wby_server *server, void *memory) 1593 | { 1594 | wby_size i; 1595 | wby_socket sock; 1596 | wby_sockopt on = 1; 1597 | wby_byte *buffer = (wby_byte*)memory; 1598 | struct sockaddr_in bind_addr; 1599 | WBY_STORAGE const wby_size wby_conn_align = WBY_ALIGNOF(struct wby_connection); 1600 | 1601 | WBY_ASSERT(server); 1602 | WBY_ASSERT(memory); 1603 | memset(buffer, 0, server->memory_size); 1604 | 1605 | /* setup sever memory */ 1606 | server->socket = (wby_ptr)WBY_INVALID_SOCKET; 1607 | server->con = (struct wby_connection*)WBY_ALIGN_PTR(buffer, wby_conn_align); 1608 | buffer += ((wby_byte*)server->con - buffer); 1609 | buffer += server->config.connection_max * sizeof(struct wby_connection); 1610 | 1611 | for (i = 0; i < server->config.connection_max; ++i) { 1612 | server->con[i].log = server->config.log; 1613 | server->con[i].header_buf.data = buffer; 1614 | buffer += server->config.request_buffer_size; 1615 | server->con[i].io_buf.data = buffer; 1616 | server->con[i].request_buffer_size = server->config.request_buffer_size; 1617 | server->con[i].io_buffer_size = server->config.io_buffer_size; 1618 | buffer += server->config.io_buffer_size; 1619 | } 1620 | WBY_ASSERT((wby_size)(buffer - (wby_byte*)memory) <= server->memory_size); 1621 | 1622 | /* server socket setup */ 1623 | sock = (wby_ptr)socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 1624 | #ifdef _WIN32 1625 | if (sock == INVALID_SOCKET && WSAGetLastError() == WSANOTINITIALISED) { 1626 | /* Make sure WSAStartup has been called. */ 1627 | wby_dbg(server->config.log, "Calling WSAStartup."); 1628 | WSADATA wsaData; 1629 | WSAStartup(MAKEWORD(2, 2), &wsaData); 1630 | sock = (wby_ptr)socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 1631 | server->windows_socket_initialized = 1; 1632 | } 1633 | #endif 1634 | wby_dbg(server->config.log, "Server socket = %d", (int)sock); 1635 | if (!wby_socket_is_valid(sock)) { 1636 | wby_dbg(server->config.log, "failed to initialized server socket: %d", wby_socket_error()); 1637 | goto error; 1638 | } 1639 | 1640 | setsockopt(WBY_SOCK(sock), SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 1641 | #ifdef __APPLE__ /* Don't generate SIGPIPE when writing to dead socket, we check all writes. */ 1642 | signal(SIGPIPE, SIG_IGN); 1643 | #endif 1644 | if (wby_socket_set_blocking(sock, 0) != WBY_OK) goto error; 1645 | 1646 | /* bind server socket */ 1647 | wby_dbg(server->config.log, "binding to %s:%d", server->config.address, server->config.port); 1648 | memset(&bind_addr, 0, sizeof(bind_addr)); 1649 | bind_addr.sin_family = AF_INET; 1650 | bind_addr.sin_port = htons((unsigned short)server->config.port); 1651 | bind_addr.sin_addr.s_addr = inet_addr(server->config.address); 1652 | if (bind(sock, (struct sockaddr*) &bind_addr, sizeof(bind_addr)) != WBY_OK) { 1653 | wby_dbg(server->config.log, "bind() failed: %d", wby_socket_error()); 1654 | wby_dbg(server->config.log, "bind() failed: %s", strerror(wby_socket_error())); 1655 | goto error; 1656 | } 1657 | 1658 | /* set server socket to listening */ 1659 | if (listen(sock, SOMAXCONN) != WBY_OK) { 1660 | wby_dbg(server->config.log, "listen() failed: %d", wby_socket_error()); 1661 | wby_socket_close(WBY_SOCK(sock)); 1662 | goto error; 1663 | } 1664 | server->socket = (wby_ptr)sock; 1665 | wby_dbg(server->config.log, "server initialized: %s", strerror(errno)); 1666 | return 0; 1667 | 1668 | error: 1669 | if (wby_socket_is_valid(WBY_SOCK(sock))) 1670 | wby_socket_close(WBY_SOCK(sock)); 1671 | return -1; 1672 | } 1673 | 1674 | WBY_API void 1675 | wby_stop(struct wby_server *srv) 1676 | { 1677 | #ifdef _WIN32 1678 | if (srv->windows_socket_initialized) { 1679 | WSACleanup(); 1680 | } 1681 | #endif 1682 | wby_size i; 1683 | wby_socket_close(WBY_SOCK(srv->socket)); 1684 | for (i = 0; i < srv->con_count; ++i) 1685 | wby_socket_close(WBY_SOCK(srv->con[i].socket)); 1686 | } 1687 | 1688 | WBY_INTERN int 1689 | wby_on_incoming(struct wby_server *srv) 1690 | { 1691 | wby_size connection_index; 1692 | char WBY_ALIGN(8) client_addr[64]; 1693 | struct wby_connection* connection; 1694 | wby_socklen client_addr_len = sizeof(client_addr); 1695 | wby_socket fd; 1696 | 1697 | /* Make sure we have space for a new connection */ 1698 | connection_index = srv->con_count; 1699 | if (connection_index == srv->config.connection_max) { 1700 | wby_dbg(srv->config.log, "out of connection slots"); 1701 | return 1; 1702 | } 1703 | 1704 | /* Accept the incoming connection. */ 1705 | fd = accept(WBY_SOCK(srv->socket), (struct sockaddr*)&client_addr[0], &client_addr_len); 1706 | if (!wby_socket_is_valid(fd)) { 1707 | int err = wby_socket_error(); 1708 | if (!wby_socket_is_blocking_error(err)) 1709 | wby_dbg(srv->config.log, "accept() failed: %d", err); 1710 | return 1; 1711 | } 1712 | 1713 | connection = &srv->con[connection_index]; 1714 | wby_connection_reset(connection, srv->config.request_buffer_size, srv->config.io_buffer_size); 1715 | connection->flags = WBY_CON_FLAG_FRESH_CONNECTION; 1716 | srv->con_count = connection_index + 1; 1717 | 1718 | /* Configure socket */ 1719 | if (wby_socket_config_incoming(fd) != WBY_OK) { 1720 | wby_socket_close(fd); 1721 | return 1; 1722 | } 1723 | 1724 | /* OK, keep this connection */ 1725 | wby_dbg(srv->config.log, "tagging connection %d as alive", connection_index); 1726 | connection->flags |= WBY_CON_FLAG_ALIVE; 1727 | connection->socket = (wby_ptr)fd; 1728 | return 0; 1729 | } 1730 | 1731 | WBY_INTERN void 1732 | wby_update_connection(struct wby_server *srv, struct wby_connection* connection) 1733 | { 1734 | /* This is no longer a fresh connection. Only read from it when select() says 1735 | * so in the future. */ 1736 | connection->flags &= (unsigned short)~WBY_CON_FLAG_FRESH_CONNECTION; 1737 | for (;;) 1738 | { 1739 | switch (connection->state) { 1740 | default: break; 1741 | case WBY_CON_STATE_REQUEST: { 1742 | const char *expect_header; 1743 | int request_size; 1744 | int result = wby_socket_recv(WBY_SOCK(connection->socket), 1745 | &connection->header_buf, srv->config.log); 1746 | if (WBY_FILL_ERROR == result) { 1747 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1748 | return; 1749 | } 1750 | 1751 | /* Scan to see if the buffer has a complete HTTP request header package. */ 1752 | request_size = wb_peek_request_size(connection->header_buf.data, 1753 | (int)connection->header_buf.used); 1754 | if (request_size < 0) { 1755 | /* Nothing yet. */ 1756 | if (connection->header_buf.max == connection->header_buf.used) { 1757 | wby_dbg(srv->config.log, "giving up as buffer is full"); 1758 | /* Give up, we can't fit the request in our buffer. */ 1759 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1760 | } 1761 | return; 1762 | } 1763 | wby_dbg(srv->config.log, "peek request size: %d", request_size); 1764 | 1765 | 1766 | /* Set up request data. */ 1767 | if (wby_connection_setup_request(connection, request_size) != WBY_OK) { 1768 | wby_dbg(srv->config.log, "failed to set up request"); 1769 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1770 | return; 1771 | } 1772 | 1773 | /* Remember how much of the remaining buffer is body data. */ 1774 | connection->header_body_left = (int)connection->header_buf.used - request_size; 1775 | /* If the client expects a 100 Continue, send one now. */ 1776 | if (NULL != (expect_header = wby_find_header(&connection->public_data, "Expect"))) { 1777 | if (!strcasecmp(expect_header, "100-continue")) { 1778 | wby_dbg(srv->config.log, "connection expects a 100 Continue header.. making him happy"); 1779 | connection->continue_data_left = (int)wby_continue_header_len; 1780 | connection->state = WBY_CON_STATE_SEND_CONTINUE; 1781 | } else { 1782 | wby_dbg(srv->config.log, "unrecognized Expected header %s", expect_header); 1783 | connection->state = WBY_CON_STATE_SERVE; 1784 | } 1785 | } else connection->state = WBY_CON_STATE_SERVE; 1786 | } break; /* WBY_REQUEST */ 1787 | 1788 | case WBY_CON_STATE_SEND_CONTINUE: { 1789 | int left = connection->continue_data_left; 1790 | int offset = (int)wby_continue_header_len - left; 1791 | long written = 0; 1792 | 1793 | written = send(WBY_SOCK(connection->socket), wby_continue_header + offset, (wby_size)left, 0); 1794 | wby_dbg(srv->config.log, "continue write: %d bytes", written); 1795 | if (written < 0) { 1796 | wby_dbg(srv->config.log, "failed to write 100-continue header"); 1797 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1798 | return; 1799 | } 1800 | left -= (int)written; 1801 | connection->continue_data_left = left; 1802 | if (left == 0) 1803 | connection->state = WBY_CON_STATE_SERVE; 1804 | } break; /* WBY_SEND_cONTINUE */ 1805 | 1806 | case WBY_CON_STATE_SERVE: { 1807 | /* Clear I/O buffer for output */ 1808 | connection->io_buf.used = 0; 1809 | /* Switch socket to blocking mode. */ 1810 | if (wby_connection_set_blocking(connection) != WBY_OK) 1811 | return; 1812 | 1813 | /* Figure out if this is a request to upgrade to WebSockets */ 1814 | if (wby_con_is_websocket_request(&connection->public_data)) { 1815 | wby_dbg(srv->config.log, "received a websocket upgrade request"); 1816 | if (!srv->config.ws_connect || 1817 | srv->config.ws_connect(&connection->public_data, srv->config.userdata) != WBY_OK) 1818 | { 1819 | wby_dbg(srv->config.log, "user callback failed connection attempt"); 1820 | wby_response_begin(&connection->public_data, 400, -1, 1821 | wby_plain_text_headers, WBY_LEN(wby_plain_text_headers)); 1822 | wby_printf(&connection->public_data, "WebSockets not supported at %s\r\n", 1823 | connection->public_data.request.uri); 1824 | wby_response_end(&connection->public_data); 1825 | } else { 1826 | /* OK, let's try to upgrade the connection to WebSockets */ 1827 | if (wby_connection_send_websocket_upgrade(connection) != WBY_OK) { 1828 | wby_dbg(srv->config.log, "websocket upgrade failed"); 1829 | wby_response_begin(&connection->public_data, 400, -1, 1830 | wby_plain_text_headers, WBY_LEN(wby_plain_text_headers)); 1831 | wby_printf(&connection->public_data, "WebSockets couldn't not be enabled\r\n"); 1832 | wby_response_end(&connection->public_data); 1833 | } else { 1834 | /* OK, we're now a websocket */ 1835 | connection->flags |= WBY_CON_FLAG_WEBSOCKET; 1836 | wby_dbg(srv->config.log, "connection %d upgraded to websocket", 1837 | (int)(connection - srv->con)); 1838 | srv->config.ws_connected(&connection->public_data, srv->config.userdata); 1839 | } 1840 | } 1841 | } else if (srv->config.dispatch(&connection->public_data, srv->config.userdata) != 0) { 1842 | static const struct wby_header headers[] = {{ "Content-Type", "text/plain" }}; 1843 | wby_response_begin(&connection->public_data, 404, -1, headers, WBY_LEN(headers)); 1844 | wby_printf(&connection->public_data, "No handler for %s\r\n", 1845 | connection->public_data.request.uri); 1846 | wby_response_end(&connection->public_data); 1847 | } 1848 | 1849 | /* Back to non-blocking mode, can make the socket die. */ 1850 | wby_connection_set_nonblocking(connection); 1851 | /* Ready for another request, unless we should close the connection. */ 1852 | if (connection->flags & WBY_CON_FLAG_ALIVE) { 1853 | if (connection->flags & WBY_CON_FLAG_CLOSE_AFTER_RESPONSE) { 1854 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1855 | return; 1856 | } else { 1857 | /* Reset connection for next request. */ 1858 | wby_connection_reset(connection, srv->config.request_buffer_size, 1859 | srv->config.io_buffer_size); 1860 | if (!(connection->flags & WBY_CON_FLAG_WEBSOCKET)) { 1861 | /* Loop back to request state */ 1862 | connection->state = WBY_CON_STATE_REQUEST; 1863 | } else { 1864 | /* Clear I/O buffer for input */ 1865 | connection->io_buf.used = 0; 1866 | /* Go to the web socket serving state */ 1867 | connection->state = WBY_CON_STATE_WEBSOCKET; 1868 | } 1869 | } 1870 | } 1871 | } break; /* WBY_SERVE */ 1872 | 1873 | case WBY_CON_STATE_WEBSOCKET: { 1874 | /* In this state, we're trying to read a websocket frame into the I/O 1875 | * buffer. Once we have enough data, we call the websocket frame 1876 | * callback and let the client read the data through WebbyRead. */ 1877 | if (WBY_FILL_ERROR == wby_socket_recv(WBY_SOCK(connection->socket), 1878 | &connection->io_buf, srv->config.log)) { 1879 | /* Give up on this connection */ 1880 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1881 | return; 1882 | } 1883 | 1884 | if (wby_scan_websocket_frame(&connection->ws_frame, &connection->io_buf) != WBY_OK) 1885 | return; 1886 | 1887 | connection->body_bytes_read = 0; 1888 | connection->io_data_left = (int)connection->io_buf.used - connection->ws_frame.header_size; 1889 | wby_dbg(srv->config.log, "%d bytes of incoming websocket data buffered", 1890 | (int)connection->io_data_left); 1891 | 1892 | /* Switch socket to blocking mode */ 1893 | if (wby_connection_set_blocking(connection) != WBY_OK) 1894 | return; 1895 | 1896 | switch (connection->ws_frame.opcode) 1897 | { 1898 | case WBY_WSOP_CLOSE: 1899 | wby_dbg(srv->config.log, "received websocket close request"); 1900 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1901 | return; 1902 | 1903 | case WBY_WSOP_PING: 1904 | wby_dbg(srv->config.log, "received websocket ping request"); 1905 | if (wby_socket_send(WBY_SOCK(connection->socket), wby_websocket_pong, 1906 | sizeof wby_websocket_pong)){ 1907 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1908 | return; 1909 | } 1910 | break; 1911 | 1912 | default: 1913 | /* Dispatch frame to user handler. */ 1914 | if (srv->config.ws_frame(&connection->public_data, 1915 | &connection->ws_frame, srv->config.userdata) != WBY_OK) { 1916 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1917 | return; 1918 | } 1919 | } 1920 | 1921 | /* Discard any data the client didn't read to retain the socket state. */ 1922 | if (connection->body_bytes_read < connection->ws_frame.payload_length) { 1923 | int size = connection->ws_frame.payload_length - connection->body_bytes_read; 1924 | if (wby_con_discard_incoming_data(&connection->public_data, size) != WBY_OK) { 1925 | connection->flags &= (unsigned short)~WBY_CON_FLAG_ALIVE; 1926 | return; 1927 | } 1928 | } 1929 | 1930 | /* Back to non-blocking mode */ 1931 | if (wby_connection_set_nonblocking(connection) != WBY_OK) 1932 | return; 1933 | 1934 | wby_connection_reset(connection, srv->config.request_buffer_size, srv->config.io_buffer_size); 1935 | connection->state = WBY_CON_STATE_WEBSOCKET; 1936 | } break; /* WBY_WEBSOCKET */ 1937 | } /* switch */ 1938 | } /* for */ 1939 | } 1940 | 1941 | WBY_API void 1942 | wby_update(struct wby_server *srv) 1943 | { 1944 | int err; 1945 | wby_size i, count; 1946 | wby_socket max_socket; 1947 | fd_set read_fds, write_fds, except_fds; 1948 | struct timeval timeout; 1949 | 1950 | /* Build set of sockets to check for events */ 1951 | FD_ZERO(&read_fds); 1952 | FD_ZERO(&write_fds); 1953 | FD_ZERO(&except_fds); 1954 | max_socket = 0; 1955 | 1956 | /* Only accept incoming connections if we have space */ 1957 | if (srv->con_count < srv->config.connection_max) { 1958 | FD_SET(srv->socket, &read_fds); 1959 | FD_SET(srv->socket, &except_fds); 1960 | max_socket = WBY_SOCK(srv->socket); 1961 | } 1962 | 1963 | for (i = 0, count = srv->con_count; i < count; ++i) { 1964 | wby_socket socket = WBY_SOCK(srv->con[i].socket); 1965 | FD_SET(socket, &read_fds); 1966 | FD_SET(socket, &except_fds); 1967 | if (srv->con[i].state == WBY_CON_STATE_SEND_CONTINUE) 1968 | FD_SET(socket, &write_fds); 1969 | if (socket > max_socket) 1970 | max_socket = socket; 1971 | } 1972 | 1973 | timeout.tv_sec = 0; 1974 | timeout.tv_usec = 5; 1975 | err = select((int)(max_socket + 1), &read_fds, &write_fds, &except_fds, &timeout); 1976 | if (err < 0) { 1977 | wby_dbg(srv->config.log, "failed to select"); 1978 | return; 1979 | } 1980 | 1981 | /* Handle incoming connections */ 1982 | if (FD_ISSET(WBY_SOCK(srv->socket), &read_fds)) { 1983 | do { 1984 | wby_dbg(srv->config.log, "awake on incoming"); 1985 | err = wby_on_incoming(srv); 1986 | } while (err == 0); 1987 | } 1988 | 1989 | /* Handle incoming connection data */ 1990 | for (i = 0, count = srv->con_count; i < count; ++i) { 1991 | struct wby_connection *conn = &srv->con[i]; 1992 | if (FD_ISSET(WBY_SOCK(conn->socket), &read_fds) || 1993 | FD_ISSET(WBY_SOCK(conn->socket), &write_fds) || 1994 | conn->flags & WBY_CON_FLAG_FRESH_CONNECTION) 1995 | { 1996 | wby_dbg(srv->config.log, "reading from connection %d", i); 1997 | wby_update_connection(srv, conn); 1998 | } 1999 | } 2000 | 2001 | /* Close stale connections & compact connection array. */ 2002 | for (i = 0; i < srv->con_count; ) { 2003 | struct wby_connection *connection = &srv->con[i]; 2004 | if (!(connection->flags & WBY_CON_FLAG_ALIVE)) { 2005 | wby_size remain; 2006 | wby_dbg(srv->config.log, "closing connection %d (%08x)", i, connection->flags); 2007 | if (connection->flags & WBY_CON_FLAG_WEBSOCKET) 2008 | srv->config.ws_closed(&connection->public_data, srv->config.userdata); 2009 | remain = srv->con_count - (wby_size)i - 1; 2010 | wby_connection_close(connection); 2011 | memmove(&srv->con[i], &srv->con[i + 1], remain*sizeof(srv->con[i])); 2012 | --srv->con_count; 2013 | } else ++i; 2014 | } 2015 | } 2016 | 2017 | #endif /* WBY_IMPLEMENTATION */ 2018 | -------------------------------------------------------------------------------- /examples/11_platforms/web/webplatform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3.10 2 | # Run from the root of the repository with 3 | # $ PYTHONPATH=. python3 examples/11_platforms/web/webplatform.py 4 | import http.server 5 | import os 6 | import socketserver 7 | 8 | from scrapscript import eval_exp, Apply, Record, parse, tokenize, String, Int 9 | 10 | HANDLER_FILE_NAME = "handler.scrap" 11 | PLATFORM_DIR_NAME = os.path.dirname(os.path.realpath(__file__)) 12 | with open(os.path.join(PLATFORM_DIR_NAME, HANDLER_FILE_NAME), "r") as f: 13 | HANDLER = eval_exp({}, parse(tokenize(f.read()))) 14 | 15 | 16 | class WebPlatform(http.server.SimpleHTTPRequestHandler): 17 | def handle_request(self) -> None: 18 | result = eval_exp({}, Apply(HANDLER, String(self.path))) 19 | assert isinstance(result, Record) 20 | assert "code" in result.data 21 | assert isinstance(result.data["code"], Int) 22 | assert "body" in result.data 23 | assert isinstance(result.data["body"], String) 24 | self.send_response(result.data["code"].value) 25 | # TODO(max): Move content-type into scrapscript code 26 | # TODO(max): Serve scrap objects over the wire as 27 | # application/scrapscript 28 | self.send_header("Content-type", "text/html") 29 | self.end_headers() 30 | self.wfile.write(result.data["body"].value.encode("utf-8")) 31 | 32 | def do_GET(self) -> None: 33 | self.handle_request() 34 | 35 | 36 | server = socketserver.TCPServer 37 | server.allow_reuse_address = True 38 | with server(("", 8000), WebPlatform) as httpd: 39 | host, port = httpd.server_address 40 | print(f"serving at http://{host!s}:{port}") 41 | httpd.serve_forever() 42 | -------------------------------------------------------------------------------- /examples/1_numbers/a.scrap: -------------------------------------------------------------------------------- 1 | 1 + 1 -------------------------------------------------------------------------------- /examples/1_numbers/b.scrap: -------------------------------------------------------------------------------- 1 | 1.0 + 1.0 -------------------------------------------------------------------------------- /examples/1_numbers/c.scrap: -------------------------------------------------------------------------------- 1 | 1.0 + to-float 1 -------------------------------------------------------------------------------- /examples/2_text/a.scrap: -------------------------------------------------------------------------------- 1 | "hello" ++ " " ++ "world" -------------------------------------------------------------------------------- /examples/2_text/b.scrap: -------------------------------------------------------------------------------- 1 | "hello" ++ " 2 | " ++ "world" -------------------------------------------------------------------------------- /examples/2_text/c.scrap: -------------------------------------------------------------------------------- 1 | "🐸" -------------------------------------------------------------------------------- /examples/3_bytes/a.scrap: -------------------------------------------------------------------------------- 1 | bytes/to-utf8-text `aGVsbG8gd29ybGQ=` -------------------------------------------------------------------------------- /examples/3_bytes/b.scrap: -------------------------------------------------------------------------------- 1 | bytes/to-utf8-text <| `aGVsbG8gd29ybGQ=` +< '21 -------------------------------------------------------------------------------- /examples/4_hole/a.scrap: -------------------------------------------------------------------------------- 1 | () -------------------------------------------------------------------------------- /examples/5_variables/a.scrap: -------------------------------------------------------------------------------- 1 | x . x = 100 -------------------------------------------------------------------------------- /examples/5_variables/b.scrap: -------------------------------------------------------------------------------- 1 | 200 + (x . x = 150) -------------------------------------------------------------------------------- /examples/5_variables/c.scrap: -------------------------------------------------------------------------------- 1 | a + b + c 2 | . a = 1 3 | . b = 2 4 | . c = 3 -------------------------------------------------------------------------------- /examples/5_variables/d.scrap: -------------------------------------------------------------------------------- 1 | a + b + c 2 | . a = 1 3 | . b = x + y 4 | , x = 1 5 | , y = 1 6 | . c = 3 -------------------------------------------------------------------------------- /examples/6_lists/a.scrap: -------------------------------------------------------------------------------- 1 | [1, 2, 3] +< 4 -------------------------------------------------------------------------------- /examples/7_records/a.scrap: -------------------------------------------------------------------------------- 1 | rec@a 2 | . rec = { a = 1, b = "x" } -------------------------------------------------------------------------------- /examples/7_records/b.scrap: -------------------------------------------------------------------------------- 1 | { a = 2, c = 'FF, ..g } 2 | . g = { a = 1, b = "x", c = '00 } -------------------------------------------------------------------------------- /examples/8_operators/a.scrap: -------------------------------------------------------------------------------- 1 | 1 |> f . f = a -> a + 1 -------------------------------------------------------------------------------- /examples/8_operators/b.scrap: -------------------------------------------------------------------------------- 1 | h 2 2 | . h = f >> g 3 | . f = a -> a + 1 4 | . g = b -> b * 2 -------------------------------------------------------------------------------- /examples/8_operators/c.scrap: -------------------------------------------------------------------------------- 1 | ((a -> a + 1) >> (b -> b * 2)) 3 -------------------------------------------------------------------------------- /examples/9_functions/a.scrap: -------------------------------------------------------------------------------- 1 | f 1 2 2 | . f = a -> b -> a + b -------------------------------------------------------------------------------- /examples/9_functions/b.scrap: -------------------------------------------------------------------------------- 1 | f "b" 2 | . f = 3 | | "a" -> 1 4 | | "b" -> 2 5 | | "c" -> 3 6 | | x -> 0 -------------------------------------------------------------------------------- /examples/9_functions/c.scrap: -------------------------------------------------------------------------------- 1 | f "b" 2 | . f = | "a" -> 1 | "b" -> 2 | "c" -> 3 | x -> 0 -------------------------------------------------------------------------------- /examples/9_functions/d.scrap: -------------------------------------------------------------------------------- 1 | (f >> (x -> x) >> g) 7 2 | . f = | 7 -> "cat" 3 | | 4 -> "dog" 4 | | _ -> "shark" 5 | . g = | "cat" -> "kitten" 6 | | "dog" -> "puppy" 7 | | a -> "baby " ++ a -------------------------------------------------------------------------------- /examples/9_functions/e.scrap: -------------------------------------------------------------------------------- 1 | (f >> (x -> x) >> g) 7 2 | . f = | 7 -> "cat" ++ m 3 | , m = "?" 4 | | 4 -> "dog" ++ n 5 | , n = "!" 6 | | _ -> "shark" 7 | . g = | "cat" -> "kitten" 8 | | "dog" -> "puppy" 9 | | a -> "baby " ++ a -------------------------------------------------------------------------------- /examples/9_functions/f.scrap: -------------------------------------------------------------------------------- 1 | (x -> x) (y -> y) -------------------------------------------------------------------------------- /examples/9_functions/g.scrap: -------------------------------------------------------------------------------- 1 | f 4 2 | . f = a -> a + 1 -------------------------------------------------------------------------------- /examples/9_functions/h.scrap: -------------------------------------------------------------------------------- 1 | f 1 2 | . f = x -> x + a 3 | . a = 2 -------------------------------------------------------------------------------- /examples/9_functions/i.scrap: -------------------------------------------------------------------------------- 1 | f 1 2 | . a = 2 3 | . f = x -> x + a -------------------------------------------------------------------------------- /fly.toml: -------------------------------------------------------------------------------- 1 | # fly.toml app configuration file generated for scrapscript on 2023-11-29T02:06:16-05:00 2 | # 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file. 4 | # 5 | 6 | app = "scrapscript" 7 | primary_region = "ewr" 8 | 9 | [build] 10 | build-target = "web" 11 | dockerfile = "util/Dockerfile" 12 | 13 | [http_service] 14 | internal_port = 8000 15 | force_https = true 16 | auto_stop_machines = true 17 | auto_start_machines = true 18 | min_machines_running = 0 19 | processes = ["app"] 20 | 21 | [[vm]] 22 | cpu_kind = "shared" 23 | cpus = 1 24 | memory_mb = 256 25 | -------------------------------------------------------------------------------- /hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Set up with `pushd .git/hooks; ln -s ../../hooks/pre-commit pre-commit; popd` 3 | 4 | set -eux 5 | 6 | if git rev-parse --verify HEAD >/dev/null 2>&1 7 | then 8 | against=HEAD 9 | else 10 | # Initial commit: diff against an empty tree object 11 | against=$(git hash-object -t tree /dev/null) 12 | fi 13 | # cat always has error code 0 14 | # ignore deleted files (can't be formatted) 15 | filenames=$(git diff --cached --name-only --diff-filter=d $against | grep '\.py$' | cat) 16 | # If changed files include scrapscript.py, run tests 17 | if echo $filenames | grep scrapscript 18 | then 19 | python3 ./scrapscript_tests.py 20 | fi 21 | 22 | # If there are any other changed Python files, make sure they lint 23 | if [ -n "$filenames" ] 24 | then 25 | ruff format --check $filenames 26 | ruff check $filenames 27 | mypy --strict $filenames 28 | fi 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | authors = [ 3 | "Max Bernstein ", 4 | "Chris Gregory ", 5 | ] 6 | classifiers = [ 7 | "Intended Audience :: Developers", 8 | "Topic :: Software Development", 9 | "Programming Language :: Python", 10 | "Programming Language :: Python :: 3.8", 11 | "Programming Language :: Python :: 3.9", 12 | "Programming Language :: Python :: 3.10", 13 | "Programming Language :: Python :: 3.11", 14 | ] 15 | description = "Scrapscript interpreter" 16 | keywords = ["scrapscript", "interpreter"] 17 | license = "MIT" 18 | name = "scrapscript" 19 | readme = "README.md" 20 | repository = "https://github.com/tekknolagi/scrapscript" 21 | version = "0.1.1" 22 | requires-python = ">=3.8" 23 | 24 | [tool.uv] 25 | dev-dependencies = ["mypy~=1.10.0", "pylint~=3.2.0", "ruff~=0.5.0"] 26 | 27 | [tool.mypy] 28 | disallow_incomplete_defs = true 29 | disallow_untyped_calls = true 30 | disallow_untyped_defs = true 31 | ignore_missing_imports = true 32 | 33 | [tool.pylint.basic] 34 | good-names = ["i", "x"] 35 | notes = ["FIXME"] 36 | 37 | [tool.pylint.format] 38 | max-line-length = 120 39 | 40 | [tool.pylint.messages_control] 41 | disable = [ 42 | "missing-function-docstring", 43 | "missing-module-docstring", 44 | "missing-class-docstring", 45 | ] 46 | 47 | [tool.pylint.reports] 48 | output-format = "colorized" 49 | 50 | [tool.ruff] 51 | line-length = 120 52 | 53 | [tool.ruff.lint] 54 | ignore = ["E741"] 55 | -------------------------------------------------------------------------------- /runtime.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifndef STATIC_HEAP 10 | #include 11 | #endif 12 | 13 | #define ALWAYS_INLINE inline __attribute__((always_inline)) 14 | #define NEVER_INLINE __attribute__((noinline)) 15 | 16 | const int kPointerSize = sizeof(void*); 17 | typedef intptr_t word; 18 | typedef uintptr_t uword; 19 | typedef unsigned char byte; 20 | 21 | // Garbage collector core by Andy Wingo . 22 | 23 | struct gc_obj { 24 | uintptr_t tag; // low bit is 0 if forwarding ptr 25 | }; 26 | 27 | // The low bit of the pointer is 1 if it's a heap object and 0 if it's an 28 | // immediate integer 29 | struct object; 30 | 31 | bool is_small_int(struct object* obj) { 32 | return (((uword)obj) & kSmallIntTagMask) == kSmallIntTag; 33 | } 34 | bool is_immediate_not_small_int(struct object* obj) { 35 | return (((uword)obj) & (kPrimaryTagMask & ~kSmallIntTagMask)) != 0; 36 | } 37 | bool is_heap_object(struct object* obj) { 38 | return (((uword)obj) & kPrimaryTagMask) == kHeapObjectTag; 39 | } 40 | #define empty_list() ((struct object*)kEmptyListTag) 41 | bool is_empty_list(struct object* obj) { return obj == empty_list(); } 42 | #define hole() ((struct object*)kHoleTag) 43 | bool is_hole(struct object* obj) { return (uword)obj == kHoleTag; } 44 | static ALWAYS_INLINE bool is_small_string(struct object* obj) { 45 | return (((uword)obj) & kImmediateTagMask) == kSmallStringTag; 46 | } 47 | #define mk_immediate_variant(tag) \ 48 | (struct object*)(((uword)(tag) << kImmediateTagBits) | kVariantTag) 49 | static ALWAYS_INLINE bool is_immediate_variant(struct object* obj) { 50 | return ((uword)obj & kImmediateTagMask) == kVariantTag; 51 | } 52 | static uword immediate_variant_tag(struct object* obj) { 53 | assert(is_immediate_variant(obj)); 54 | return ((uword)obj) >> kImmediateTagBits; 55 | } 56 | static ALWAYS_INLINE uword small_string_length(struct object* obj) { 57 | assert(is_small_string(obj)); 58 | return (((uword)obj) >> kImmediateTagBits) & kMaxSmallStringLength; 59 | } 60 | static ALWAYS_INLINE struct object* mksmallstring(const char* data, 61 | uword length) { 62 | assert(length <= kMaxSmallStringLength); 63 | uword result = 0; 64 | for (word i = length - 1; i >= 0; i--) { 65 | result = (result << kBitsPerByte) | data[i]; 66 | } 67 | struct object* result_obj = 68 | (struct object*)((result << kBitsPerByte) | 69 | (length << kImmediateTagBits) | kSmallStringTag); 70 | assert(!is_heap_object(result_obj)); 71 | assert(is_small_string(result_obj)); 72 | assert(small_string_length(result_obj) == length); 73 | return result_obj; 74 | } 75 | struct object* empty_string() { return (struct object*)kSmallStringTag; } 76 | bool is_empty_string(struct object* obj) { return obj == empty_string(); } 77 | static ALWAYS_INLINE char small_string_at(struct object* obj, uword index) { 78 | assert(is_small_string(obj)); 79 | assert(index < small_string_length(obj)); 80 | // +1 for (length | tag) byte 81 | return ((uword)obj >> ((index + 1) * kBitsPerByte)) & 0xFF; 82 | } 83 | static ALWAYS_INLINE struct gc_obj* as_heap_object(struct object* obj) { 84 | assert(is_heap_object(obj)); 85 | assert(kHeapObjectTag == 1); 86 | return (struct gc_obj*)((uword)obj - 1); 87 | } 88 | 89 | static const uintptr_t kNotForwardedBit = 1ULL; 90 | int is_forwarded(struct gc_obj* obj) { 91 | return (obj->tag & kNotForwardedBit) == 0; 92 | } 93 | struct gc_obj* forwarded(struct gc_obj* obj) { 94 | assert(is_forwarded(obj)); 95 | return (struct gc_obj*)obj->tag; 96 | } 97 | void forward(struct gc_obj* from, struct gc_obj* to) { 98 | assert(!is_forwarded(from)); 99 | assert((((uintptr_t)to) & kNotForwardedBit) == 0); 100 | from->tag = (uintptr_t)to; 101 | } 102 | 103 | struct gc_heap; 104 | 105 | typedef void (*VisitFn)(struct object**, struct gc_heap*); 106 | 107 | // To implement by the user: 108 | size_t heap_object_size(struct gc_obj* obj); 109 | void trace_heap_object(struct gc_obj* obj, struct gc_heap* heap, VisitFn visit); 110 | void trace_roots(struct gc_heap* heap, VisitFn visit); 111 | 112 | struct space { 113 | uintptr_t start; 114 | uintptr_t size; 115 | }; 116 | 117 | struct gc_heap { 118 | uintptr_t hp; 119 | uintptr_t limit; 120 | uintptr_t from_space; 121 | uintptr_t to_space; 122 | uintptr_t base; 123 | struct space space; 124 | }; 125 | 126 | static ALWAYS_INLINE uintptr_t align(uintptr_t val, uintptr_t alignment) { 127 | return (val + alignment - 1) & ~(alignment - 1); 128 | } 129 | static ALWAYS_INLINE uintptr_t align_size(uintptr_t size) { 130 | return align(size, kObjectAlignment); 131 | } 132 | static ALWAYS_INLINE bool is_size_aligned(uword size) { 133 | return size == align_size(size); 134 | } 135 | 136 | #ifdef STATIC_HEAP 137 | struct space make_space(void* mem, uintptr_t size) { 138 | return (struct space){(uintptr_t)mem, size}; 139 | } 140 | void destroy_space(struct space space) {} 141 | #else 142 | struct space make_space(uintptr_t size) { 143 | size = align(size, kPageSize); 144 | void* mem = mmap(NULL, size, PROT_READ | PROT_WRITE, 145 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 146 | if (mem == MAP_FAILED) { 147 | fprintf(stderr, "mmap failed\n"); 148 | abort(); 149 | } 150 | return (struct space){(uintptr_t)mem, size}; 151 | } 152 | void destroy_space(struct space space) { 153 | munmap((void*)space.start, space.size); 154 | } 155 | #endif 156 | 157 | void init_heap(struct gc_heap* heap, struct space space) { 158 | if (align(space.size, kPageSize) != space.size) { 159 | fprintf(stderr, "heap size (%lu) must be a multiple of %lu\n", space.size, 160 | kPageSize); 161 | abort(); 162 | } 163 | heap->space = space; 164 | heap->base = heap->to_space = heap->hp = space.start; 165 | heap->from_space = heap->limit = heap->hp + space.size / 2; 166 | } 167 | 168 | static ALWAYS_INLINE uintptr_t heap_ptr(struct gc_heap* heap) { 169 | #if defined(NDEBUG) && defined(__GNUC__) 170 | // Clang and GCC support this; TCC does not 171 | return (uintptr_t)__builtin_assume_aligned((void*)heap->hp, kObjectAlignment); 172 | #else 173 | assert(is_size_aligned(heap->hp) && "need 3 bits for tagging"); 174 | return heap->hp; 175 | #endif 176 | } 177 | 178 | struct gc_obj* copy(struct gc_heap* heap, struct gc_obj* obj) { 179 | size_t size = heap_object_size(obj); 180 | struct gc_obj* new_obj = (struct gc_obj*)heap_ptr(heap); 181 | memcpy(new_obj, obj, size); 182 | forward(obj, new_obj); 183 | heap->hp += size; 184 | assert(is_size_aligned(heap->hp) && "need 3 bits for tagging"); 185 | return new_obj; 186 | } 187 | 188 | void flip(struct gc_heap* heap) { 189 | heap->base = heap->hp = heap->from_space; 190 | heap->from_space = heap->to_space; 191 | heap->to_space = heap->hp; 192 | heap->limit = heap->hp + heap->space.size / 2; 193 | } 194 | 195 | struct object* heap_tag(uintptr_t addr) { 196 | return (struct object*)(addr | (uword)1ULL); 197 | } 198 | 199 | #ifdef __TINYC__ 200 | // libc defines __attribute__ as an empty macro if the compiler is not GCC or 201 | // GCC < 2. We know tcc has supported __attribute__(section(...)) for 20+ years 202 | // so we can undefine it. 203 | // See tinycc-devel: 204 | // https://lists.nongnu.org/archive/html/tinycc-devel/2018-04/msg00008.html and 205 | // my StackOverflow question: https://stackoverflow.com/q/78638571/569183 206 | #undef __attribute__ 207 | #endif 208 | 209 | extern char __start_const_heap[] 210 | #ifdef __APPLE__ 211 | __asm("section$start$__DATA$const_heap") 212 | #endif 213 | ; 214 | extern char __stop_const_heap[] 215 | #ifdef __APPLE__ 216 | __asm("section$end$__DATA$const_heap") 217 | #endif 218 | ; 219 | 220 | bool in_const_heap(struct gc_obj* obj) { 221 | return (uword)obj >= (uword)__start_const_heap && 222 | (uword)obj < (uword)__stop_const_heap; 223 | } 224 | 225 | void visit_field(struct object** pointer, struct gc_heap* heap) { 226 | if (!is_heap_object(*pointer)) { 227 | return; 228 | } 229 | struct gc_obj* from = as_heap_object(*pointer); 230 | if (in_const_heap(from)) { 231 | return; 232 | } 233 | struct gc_obj* to = is_forwarded(from) ? forwarded(from) : copy(heap, from); 234 | *pointer = heap_tag((uintptr_t)to); 235 | } 236 | 237 | static bool in_heap(struct gc_heap* heap, struct gc_obj* obj) { 238 | return (uword)obj >= heap->base && (uword)obj < heap->hp; 239 | } 240 | 241 | void assert_in_heap(struct object** pointer, struct gc_heap* heap) { 242 | if (!is_heap_object(*pointer)) { 243 | return; 244 | } 245 | struct gc_obj* obj = as_heap_object(*pointer); 246 | if (in_const_heap(obj)) { 247 | return; 248 | } 249 | if (!in_heap(heap, obj)) { 250 | fprintf(stderr, "pointer %p not in heap [%p, %p)\n", obj, 251 | (void*)heap->to_space, (void*)heap->hp); 252 | abort(); 253 | } 254 | } 255 | 256 | static NEVER_INLINE void heap_verify(struct gc_heap* heap) { 257 | assert(heap->base <= heap->hp); 258 | trace_roots(heap, assert_in_heap); 259 | uintptr_t scan = heap->base; 260 | while (scan < heap->hp) { 261 | struct gc_obj* obj = (struct gc_obj*)scan; 262 | size_t size = heap_object_size(obj); 263 | uword end = scan + size; 264 | assert(is_size_aligned(end)); 265 | trace_heap_object(obj, heap, assert_in_heap); 266 | scan = end; 267 | } 268 | } 269 | 270 | void collect_no_verify(struct gc_heap* heap) { 271 | flip(heap); 272 | uintptr_t scan = heap->hp; 273 | trace_roots(heap, visit_field); 274 | while (scan < heap->hp) { 275 | struct gc_obj* obj = (struct gc_obj*)scan; 276 | size_t size = heap_object_size(obj); 277 | uword end = scan + size; 278 | assert(is_size_aligned(end)); 279 | trace_heap_object(obj, heap, visit_field); 280 | scan = end; 281 | } 282 | // TODO(max): If we have < 25% heap utilization, shrink the heap 283 | #ifndef NDEBUG 284 | // Zero out the rest of the heap for debugging 285 | memset((void*)scan, 0, heap->limit - scan); 286 | #endif 287 | } 288 | 289 | void collect(struct gc_heap* heap) { 290 | #ifndef NDEBUG 291 | heap_verify(heap); 292 | #endif 293 | collect_no_verify(heap); 294 | #ifndef NDEBUG 295 | heap_verify(heap); 296 | #endif 297 | } 298 | 299 | #if defined(__builtin_expect) 300 | #define LIKELY(x) __builtin_expect(!!(x), 1) 301 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) 302 | #else 303 | #define LIKELY(x) x 304 | #define UNLIKELY(x) x 305 | #endif 306 | #define ALLOCATOR __attribute__((__malloc__)) 307 | 308 | #ifndef STATIC_HEAP 309 | static NEVER_INLINE void heap_grow(struct gc_heap* heap) { 310 | struct space old_space = heap->space; 311 | struct space new_space = make_space(old_space.size * 2); 312 | #ifndef NDEBUG 313 | heap_verify(heap); 314 | #endif 315 | init_heap(heap, new_space); 316 | collect_no_verify(heap); 317 | #ifndef NDEBUG 318 | heap_verify(heap); 319 | #endif 320 | destroy_space(old_space); 321 | } 322 | #endif 323 | 324 | uword make_tag(uword tag, uword size_bytes) { 325 | assert(size_bytes <= 0xffffffff); 326 | return (size_bytes << kBitsPerByte) | tag; 327 | } 328 | 329 | byte obj_tag(struct gc_obj* obj) { return (obj->tag & 0xff); } 330 | 331 | bool obj_has_tag(struct gc_obj* obj, byte tag) { return obj_tag(obj) == tag; } 332 | 333 | static NEVER_INLINE ALLOCATOR struct object* allocate_slow_path( 334 | struct gc_heap* heap, uword tag, uword size) { 335 | // Outlining allocate_slow_path like this helps the compiler generate better 336 | // code in callers of allocate such as mklist. For some reason we have to 337 | // tail-duplicate allocate, too :( 338 | #ifndef STATIC_HEAP 339 | heap_grow(heap); 340 | #endif 341 | assert(is_size_aligned(size) && "need 3 bits for tagging"); 342 | if (UNLIKELY(heap->limit - heap->hp < size)) { 343 | fprintf(stderr, "out of memory\n"); 344 | abort(); 345 | } 346 | // NOTE: Keep in sync with allocate 347 | uintptr_t addr = heap_ptr(heap); 348 | uintptr_t new_hp = addr + size; 349 | assert(is_size_aligned(new_hp) && "need 3 bits for tagging"); 350 | heap->hp = new_hp; 351 | ((struct gc_obj*)addr)->tag = make_tag(tag, size); 352 | return heap_tag(addr); 353 | } 354 | 355 | static ALWAYS_INLINE ALLOCATOR struct object* allocate(struct gc_heap* heap, 356 | uword tag, uword size) { 357 | assert(is_size_aligned(size) && "need 3 bits for tagging"); 358 | // NOTE: Keep in sync with allocate_slow_path 359 | uintptr_t addr = heap_ptr(heap); 360 | uintptr_t new_hp = addr + size; 361 | assert(is_size_aligned(new_hp) && "need 3 bits for tagging"); 362 | if (UNLIKELY(heap->limit < new_hp)) { 363 | return allocate_slow_path(heap, tag, size); 364 | } 365 | // NOTE: Keep in sync with allocate_slow_path 366 | heap->hp = new_hp; 367 | ((struct gc_obj*)addr)->tag = make_tag(tag, size); 368 | return heap_tag(addr); 369 | } 370 | 371 | // Application 372 | 373 | #define FOREACH_TAG(TAG) \ 374 | TAG(TAG_LIST) \ 375 | TAG(TAG_CLOSURE) \ 376 | TAG(TAG_RECORD) \ 377 | TAG(TAG_STRING) \ 378 | TAG(TAG_VARIANT) 379 | 380 | enum { 381 | // All odd becase of the kNotForwardedBit 382 | #define ENUM_TAG(TAG) TAG = __COUNTER__ * 2 + 1, 383 | FOREACH_TAG(ENUM_TAG) 384 | #undef ENUM_TAG 385 | }; 386 | 387 | #define HEAP_ALIGNED __attribute__((__aligned__(kObjectAlignment))) 388 | 389 | struct list { 390 | struct gc_obj HEAD; 391 | struct object* first; 392 | struct object* rest; 393 | } HEAP_ALIGNED; 394 | 395 | typedef struct object* (*ClosureFn)(struct object*, struct object*); 396 | 397 | // TODO(max): Figure out if there is a way to do a PyObject_HEAD version of 398 | // this where each closure actually has its own struct with named members 399 | struct closure { 400 | struct gc_obj HEAD; 401 | ClosureFn fn; 402 | size_t size; 403 | struct object* env[]; 404 | }; // Not HEAP_ALIGNED; env is variable size 405 | 406 | struct record_field { 407 | size_t key; 408 | struct object* value; 409 | }; 410 | 411 | struct record { 412 | struct gc_obj HEAD; 413 | size_t size; 414 | struct record_field fields[]; 415 | }; // Not HEAP_ALIGNED; fields is variable size 416 | 417 | struct heap_string { 418 | struct gc_obj HEAD; 419 | size_t size; 420 | char data[]; 421 | }; // Not HEAP_ALIGNED; data is variable size 422 | 423 | struct variant { 424 | struct gc_obj HEAD; 425 | size_t tag; 426 | struct object* value; 427 | } HEAP_ALIGNED; 428 | 429 | size_t heap_object_size(struct gc_obj* obj) { 430 | size_t result = obj->tag >> kBitsPerByte; 431 | assert(is_size_aligned(result)); 432 | return result; 433 | } 434 | 435 | void trace_heap_object(struct gc_obj* obj, struct gc_heap* heap, 436 | VisitFn visit) { 437 | switch (obj_tag(obj)) { 438 | case TAG_LIST: 439 | visit(&((struct list*)obj)->first, heap); 440 | visit(&((struct list*)obj)->rest, heap); 441 | break; 442 | case TAG_CLOSURE: 443 | for (size_t i = 0; i < ((struct closure*)obj)->size; i++) { 444 | visit(&((struct closure*)obj)->env[i], heap); 445 | } 446 | break; 447 | case TAG_RECORD: 448 | for (size_t i = 0; i < ((struct record*)obj)->size; i++) { 449 | visit(&((struct record*)obj)->fields[i].value, heap); 450 | } 451 | break; 452 | case TAG_STRING: 453 | break; 454 | case TAG_VARIANT: 455 | visit(&((struct variant*)obj)->value, heap); 456 | break; 457 | default: 458 | fprintf(stderr, "unknown tag: %u\n", obj_tag(obj)); 459 | abort(); 460 | } 461 | } 462 | 463 | bool smallint_is_valid(word value) { 464 | return (value >= kSmallIntMinValue) && (value <= kSmallIntMaxValue); 465 | } 466 | 467 | #define _mksmallint(value) \ 468 | (struct object*)(((uword)(value) << kSmallIntTagBits) | kSmallIntTag) 469 | 470 | struct object* mksmallint(word value) { 471 | assert(smallint_is_valid(value)); 472 | return _mksmallint(value); 473 | } 474 | 475 | struct object* mknum(struct gc_heap* heap, word value) { 476 | (void)heap; 477 | return mksmallint(value); 478 | } 479 | 480 | bool is_num(struct object* obj) { return is_small_int(obj); } 481 | 482 | bool is_num_equal_word(struct object* obj, word value) { 483 | assert(smallint_is_valid(value)); 484 | return obj == mksmallint(value); 485 | } 486 | 487 | word num_value(struct object* obj) { 488 | assert(is_num(obj)); 489 | return ((word)obj) >> 1; // sign extend 490 | } 491 | 492 | bool is_list(struct object* obj) { 493 | if (is_empty_list(obj)) { 494 | return true; 495 | } 496 | return is_heap_object(obj) && obj_has_tag(as_heap_object(obj), TAG_LIST); 497 | } 498 | 499 | struct list* as_list(struct object* obj) { 500 | assert(is_list(obj)); 501 | return (struct list*)as_heap_object(obj); 502 | } 503 | 504 | struct object* list_first(struct object* obj) { 505 | assert(!is_empty_list(obj)); 506 | return as_list(obj)->first; 507 | } 508 | 509 | struct object* list_rest(struct object* list) { 510 | assert(!is_empty_list(list)); 511 | return as_list(list)->rest; 512 | } 513 | 514 | struct object* mklist(struct gc_heap* heap) { 515 | struct object* result = allocate(heap, TAG_LIST, sizeof(struct list)); 516 | as_list(result)->first = empty_list(); 517 | as_list(result)->rest = empty_list(); 518 | return result; 519 | } 520 | 521 | bool is_closure(struct object* obj) { 522 | return is_heap_object(obj) && obj_has_tag(as_heap_object(obj), TAG_CLOSURE); 523 | } 524 | 525 | struct closure* as_closure(struct object* obj) { 526 | assert(is_closure(obj)); 527 | return (struct closure*)as_heap_object(obj); 528 | } 529 | 530 | struct object* mkclosure(struct gc_heap* heap, ClosureFn fn, 531 | size_t num_fields) { 532 | uword size = sizeof(struct closure) + num_fields * kPointerSize; 533 | assert(is_size_aligned(size)); 534 | struct object* result = allocate(heap, TAG_CLOSURE, size); 535 | as_closure(result)->fn = fn; 536 | as_closure(result)->size = num_fields; 537 | // Assumes the items will be filled in immediately after calling mkclosure so 538 | // they are not initialized 539 | return result; 540 | } 541 | 542 | ClosureFn closure_fn(struct object* obj) { return as_closure(obj)->fn; } 543 | 544 | void closure_set(struct object* closure, size_t i, struct object* item) { 545 | struct closure* c = as_closure(closure); 546 | assert(i < c->size); 547 | c->env[i] = item; 548 | } 549 | 550 | struct object* closure_get(struct object* closure, size_t i) { 551 | struct closure* c = as_closure(closure); 552 | assert(i < c->size); 553 | return c->env[i]; 554 | } 555 | 556 | struct object* closure_call(struct object* closure, struct object* arg) { 557 | ClosureFn fn = closure_fn(closure); 558 | return fn(closure, arg); 559 | } 560 | 561 | bool is_record(struct object* obj) { 562 | return is_heap_object(obj) && obj_has_tag(as_heap_object(obj), TAG_RECORD); 563 | } 564 | 565 | struct record* as_record(struct object* obj) { 566 | assert(is_record(obj)); 567 | return (struct record*)as_heap_object(obj); 568 | } 569 | 570 | struct object* mkrecord(struct gc_heap* heap, size_t num_fields) { 571 | uword size = sizeof(struct record) + num_fields * sizeof(struct record_field); 572 | assert(is_size_aligned(size)); 573 | struct object* result = allocate(heap, TAG_RECORD, size); 574 | as_record(result)->size = num_fields; 575 | // Assumes the items will be filled in immediately after calling mkrecord so 576 | // they are not initialized 577 | return result; 578 | } 579 | 580 | size_t record_num_fields(struct object* record) { 581 | return as_record(record)->size; 582 | } 583 | 584 | void record_set(struct object* record, size_t index, 585 | struct record_field field) { 586 | struct record* r = as_record(record); 587 | assert(index < r->size); 588 | r->fields[index] = field; 589 | } 590 | 591 | struct object* record_get(struct object* record, size_t key) { 592 | struct record* r = as_record(record); 593 | struct record_field* fields = r->fields; 594 | for (size_t i = 0; i < r->size; i++) { 595 | struct record_field field = fields[i]; 596 | if (field.key == key) { 597 | return field.value; 598 | } 599 | } 600 | return NULL; 601 | } 602 | 603 | bool is_string(struct object* obj) { 604 | if (is_small_string(obj)) { 605 | return true; 606 | } 607 | return is_heap_object(obj) && obj_has_tag(as_heap_object(obj), TAG_STRING); 608 | } 609 | 610 | struct heap_string* as_heap_string(struct object* obj) { 611 | assert(is_string(obj)); 612 | return (struct heap_string*)as_heap_object(obj); 613 | } 614 | 615 | struct object* mkstring_uninit_private(struct gc_heap* heap, size_t count) { 616 | assert(count > kMaxSmallStringLength); // can't fill in small string later 617 | uword size = align_size(sizeof(struct heap_string) + count); 618 | struct object* result = allocate(heap, TAG_STRING, size); 619 | as_heap_string(result)->size = count; 620 | return result; 621 | } 622 | 623 | struct object* mkstring(struct gc_heap* heap, const char* data, uword length) { 624 | if (length <= kMaxSmallStringLength) { 625 | return mksmallstring(data, length); 626 | } 627 | struct object* result = mkstring_uninit_private(heap, length); 628 | memcpy(as_heap_string(result)->data, data, length); 629 | return result; 630 | } 631 | 632 | static ALWAYS_INLINE uword string_length(struct object* obj) { 633 | if (is_small_string(obj)) { 634 | return small_string_length(obj); 635 | } 636 | return as_heap_string(obj)->size; 637 | } 638 | 639 | char string_at(struct object* obj, uword index) { 640 | if (is_small_string(obj)) { 641 | return small_string_at(obj, index); 642 | } 643 | return as_heap_string(obj)->data[index]; 644 | } 645 | 646 | bool is_variant(struct object* obj) { 647 | if (is_immediate_variant(obj)) { 648 | return true; 649 | } 650 | return is_heap_object(obj) && obj_has_tag(as_heap_object(obj), TAG_VARIANT); 651 | } 652 | 653 | struct variant* as_variant(struct object* obj) { 654 | assert(is_variant(obj)); 655 | assert(is_heap_object(obj)); // This only makes sense for heap variants. 656 | return (struct variant*)as_heap_object(obj); 657 | } 658 | 659 | struct object* mkvariant(struct gc_heap* heap, size_t tag) { 660 | struct object* result = allocate(heap, TAG_VARIANT, sizeof(struct variant)); 661 | as_variant(result)->tag = tag; 662 | return result; 663 | } 664 | 665 | size_t variant_tag(struct object* obj) { 666 | if (is_immediate_variant(obj)) { 667 | return immediate_variant_tag(obj); 668 | } 669 | return as_variant(obj)->tag; 670 | } 671 | 672 | struct object* variant_value(struct object* obj) { 673 | if (is_immediate_variant(obj)) { 674 | return hole(); 675 | } 676 | return as_variant(obj)->value; 677 | } 678 | 679 | void variant_set(struct object* variant, struct object* value) { 680 | as_variant(variant)->value = value; 681 | } 682 | 683 | #define MAX_HANDLES 4096 684 | 685 | struct handle_scope { 686 | struct object*** base; 687 | }; 688 | 689 | static struct object** handle_stack[MAX_HANDLES]; 690 | static struct object*** handles = handle_stack; 691 | #ifndef NDEBUG 692 | // Only used to check for handle stack overflow. 693 | static struct object*** handles_end = &handle_stack[MAX_HANDLES]; 694 | #endif 695 | 696 | void pop_handles(void* local_handles) { 697 | handles = ((struct handle_scope*)local_handles)->base; 698 | } 699 | 700 | #define HANDLES() \ 701 | struct handle_scope local_handles __attribute__((__cleanup__(pop_handles))); \ 702 | local_handles.base = handles; 703 | #define GC_PROTECT(x) \ 704 | assert(handles != handles_end); \ 705 | (*handles++) = (struct object**)(&x) 706 | #define GC_HANDLE(type, name, val) \ 707 | type name = val; \ 708 | GC_PROTECT(name) 709 | 710 | void trace_roots(struct gc_heap* heap, VisitFn visit) { 711 | for (struct object*** h = handle_stack; h != handles; h++) { 712 | visit(*h, heap); 713 | } 714 | } 715 | 716 | struct gc_heap heap_object; 717 | struct gc_heap* heap = &heap_object; 718 | 719 | struct object* num_add(struct object* a, struct object* b) { 720 | // NB: doesn't use pointers after allocating 721 | return mknum(heap, num_value(a) + num_value(b)); 722 | } 723 | 724 | struct object* num_sub(struct object* a, struct object* b) { 725 | // NB: doesn't use pointers after allocating 726 | return mknum(heap, num_value(a) - num_value(b)); 727 | } 728 | 729 | struct object* num_mul(struct object* a, struct object* b) { 730 | // NB: doesn't use pointers after allocating 731 | return mknum(heap, num_value(a) * num_value(b)); 732 | } 733 | 734 | struct object* list_cons(struct object* item, struct object* list) { 735 | HANDLES(); 736 | GC_PROTECT(item); 737 | GC_PROTECT(list); 738 | struct object* result = mklist(heap); 739 | as_list(result)->first = item; 740 | as_list(result)->rest = list; 741 | return result; 742 | } 743 | 744 | struct object* heap_string_concat(struct object* a, struct object* b) { 745 | uword a_size = string_length(a); 746 | uword b_size = string_length(b); 747 | assert(a_size + b_size > kMaxSmallStringLength); 748 | HANDLES(); 749 | GC_PROTECT(a); 750 | GC_PROTECT(b); 751 | struct object* result = mkstring_uninit_private(heap, a_size + b_size); 752 | for (uword i = 0; i < a_size; i++) { 753 | as_heap_string(result)->data[i] = string_at(a, i); 754 | } 755 | for (uword i = 0; i < b_size; i++) { 756 | as_heap_string(result)->data[a_size + i] = string_at(b, i); 757 | } 758 | return result; 759 | } 760 | 761 | static ALWAYS_INLINE struct object* small_string_concat(struct object* a_obj, 762 | struct object* b_obj) { 763 | // a: CBAT 764 | // b: FEDT 765 | // result: FEDCBAT 766 | assert(is_small_string(a_obj)); 767 | assert(is_small_string(b_obj)); 768 | uword length = small_string_length(a_obj) + small_string_length(b_obj); 769 | assert(length <= kMaxSmallStringLength); 770 | uword result = ((uword)b_obj) & ~(uword)0xFFULL; 771 | result <<= small_string_length(a_obj) * kBitsPerByte; 772 | result |= ((uword)a_obj) & ~(uword)0xFFULL; 773 | result |= length << kImmediateTagBits; 774 | result |= kSmallStringTag; 775 | struct object* result_obj = (struct object*)result; 776 | assert(!is_heap_object(result_obj)); 777 | assert(is_small_string(result_obj)); 778 | return result_obj; 779 | } 780 | 781 | ALWAYS_INLINE static struct object* string_concat(struct object* a, 782 | struct object* b) { 783 | if (is_empty_string(a)) { 784 | return b; 785 | } 786 | if (is_empty_string(b)) { 787 | return a; 788 | } 789 | uword a_size = string_length(a); 790 | uword b_size = string_length(b); 791 | if (a_size + b_size <= kMaxSmallStringLength) { 792 | return small_string_concat(a, b); 793 | } 794 | return heap_string_concat(a, b); 795 | } 796 | 797 | bool string_equal_cstr_len(struct object* string, const char* cstr, uword len) { 798 | assert(is_string(string)); 799 | if (string_length(string) != len) { 800 | return false; 801 | } 802 | for (uword i = 0; i < len; i++) { 803 | if (string_at(string, i) != cstr[i]) { 804 | return false; 805 | } 806 | } 807 | return true; 808 | } 809 | 810 | extern const char* record_keys[]; 811 | extern const char* variant_names[]; 812 | 813 | struct object* print(struct object* obj) { 814 | if (is_num(obj)) { 815 | printf("%ld", num_value(obj)); 816 | } else if (is_list(obj)) { 817 | putchar('['); 818 | while (!is_empty_list(obj)) { 819 | print(list_first(obj)); 820 | obj = list_rest(obj); 821 | if (!is_empty_list(obj)) { 822 | putchar(','); 823 | putchar(' '); 824 | } 825 | } 826 | putchar(']'); 827 | } else if (is_record(obj)) { 828 | struct record* record = as_record(obj); 829 | putchar('{'); 830 | for (size_t i = 0; i < record->size; i++) { 831 | printf("%s = ", record_keys[record->fields[i].key]); 832 | print(record->fields[i].value); 833 | if (i + 1 < record->size) { 834 | fputs(", ", stdout); 835 | } 836 | } 837 | putchar('}'); 838 | } else if (is_closure(obj)) { 839 | fputs("", stdout); 840 | } else if (is_string(obj)) { 841 | putchar('"'); 842 | for (uword i = 0; i < string_length(obj); i++) { 843 | putchar(string_at(obj, i)); 844 | } 845 | putchar('"'); 846 | } else if (is_variant(obj)) { 847 | putchar('#'); 848 | printf("%s ", variant_names[variant_tag(obj)]); 849 | print(variant_value(obj)); 850 | } else if (is_hole(obj)) { 851 | fputs("()", stdout); 852 | } else { 853 | assert(is_heap_object(obj)); 854 | fprintf(stderr, "unknown tag: %u\n", obj_tag(as_heap_object(obj))); 855 | abort(); 856 | } 857 | return obj; 858 | } 859 | 860 | struct object* println(struct object* obj) { 861 | print(obj); 862 | putchar('\n'); 863 | return obj; 864 | } 865 | 866 | #ifndef MEMORY_SIZE 867 | #define MEMORY_SIZE 4096 868 | #endif 869 | 870 | // Put something in the const heap so that __start_const_heap and 871 | // __stop_const_heap are defined by the linker. 872 | #ifdef __APPLE__ 873 | #define CONST_HEAP const __attribute__((section("__DATA,const_heap"))) 874 | #else 875 | #define CONST_HEAP const __attribute__((section("const_heap"))) 876 | #endif 877 | CONST_HEAP 878 | __attribute__((used)) struct heap_string private_unused_const_heap = { 879 | .HEAD.tag = TAG_STRING, .size = 11, .data = "hello world"}; 880 | -------------------------------------------------------------------------------- /util/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM caddy as web 2 | COPY . . 3 | RUN echo ":8000" > /etc/caddy/Caddyfile 4 | RUN echo "rewrite /repl /util/repl.html" >> /etc/caddy/Caddyfile 5 | RUN echo "rewrite /compilerepl /util/compilerepl.html" >> /etc/caddy/Caddyfile 6 | RUN echo "rewrite /style.css /util/style.css" >> /etc/caddy/Caddyfile 7 | RUN echo "log" >> /etc/caddy/Caddyfile 8 | RUN echo "file_server" >> /etc/caddy/Caddyfile 9 | 10 | FROM alpine:latest as build 11 | RUN printf -- '-m\nscrapscript\n...' > .args 12 | RUN wget https://cosmo.zip/pub/cosmos/bin/assimilate 13 | RUN wget https://cosmo.zip/pub/cosmos/bin/ape-x86_64.elf 14 | RUN wget https://cosmo.zip/pub/cosmos/bin/python 15 | RUN wget https://cosmo.zip/pub/cosmos/bin/zip 16 | RUN chmod +x assimilate 17 | RUN chmod +x ape-x86_64.elf 18 | RUN chmod +x python 19 | RUN chmod +x zip 20 | RUN mkdir Lib 21 | COPY scrapscript.py Lib/ 22 | COPY compiler.py Lib/ 23 | COPY runtime.c Lib/ 24 | COPY cli.c Lib/ 25 | RUN ./ape-x86_64.elf ./python -m compileall -b Lib/scrapscript.py Lib/compiler.py 26 | RUN mv python scrapscript.com 27 | RUN ./ape-x86_64.elf ./zip -r scrapscript.com Lib .args 28 | RUN ./ape-x86_64.elf ./assimilate ./scrapscript.com 29 | RUN echo "Testing..." 30 | RUN ./scrapscript.com apply "1+2" 31 | 32 | # Set up the container 33 | FROM scratch as main 34 | COPY --from=build scrapscript.com . 35 | ENTRYPOINT ["./scrapscript.com"] 36 | -------------------------------------------------------------------------------- /util/build-com: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | PREV="$(pwd)" 4 | DIR="$(mktemp -d)" 5 | cp scrapscript.py "$DIR" 6 | cp compiler.py "$DIR" 7 | cp runtime.c "$DIR" 8 | cp cli.c "$DIR" 9 | cd "$DIR" 10 | printf -- '-m\nscrapscript\n...' > .args 11 | wget https://cosmo.zip/pub/cosmos/bin/python 12 | wget https://cosmo.zip/pub/cosmos/bin/zip 13 | chmod +x python 14 | chmod +x zip 15 | ./python -m compileall -b scrapscript.py compiler.py 16 | mkdir Lib 17 | cp scrapscript.pyc Lib/scrapscript.pyc 18 | cp compiler.pyc Lib/compiler.pyc 19 | cp runtime.c Lib/runtime.c 20 | cp cli.c Lib/cli.c 21 | cp python scrapscript.com 22 | ./zip -r scrapscript.com Lib .args 23 | echo "Testing..." 24 | ./scrapscript.com apply "1+2" 25 | cd "$PREV" 26 | cp "$DIR"/scrapscript.com . 27 | -------------------------------------------------------------------------------- /util/compilerepl.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Scrapscript Web REPL 7 | 8 | 9 | 10 | 14 | 18 | 22 | 23 | 24 | 25 |
26 |
27 |

See scrapscript.org for a slightly 28 | out of date language reference.

29 |

This REPL is completely client-side and works by running 30 | scrapscript.py in the 31 | browser using Pyodide.

32 |
33 |
34 | 35 | 36 |
37 |
38 | Output: 39 |
40 |
41 | 42 |
43 | 172 | 219 |
220 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /util/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | try hitting /repl or /compilerepl 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /util/repl.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Scrapscript Web REPL 7 | 8 | 9 | 10 | 14 | 18 | 22 | 23 | 24 | 25 |
26 |
27 |

See scrapscript.org for a slightly 28 | out of date language reference.

29 |

This REPL is completely client-side and works by running 30 | scrapscript.py in the 31 | browser using Pyodide.

32 |
33 |
34 | 35 | 36 |
37 |
38 | Output: 39 |
40 |
41 | 42 |
43 | 168 | 209 |
210 | 212 | 213 | 214 | -------------------------------------------------------------------------------- /util/scrapscript.js: -------------------------------------------------------------------------------- 1 | let input = ` 2 | t1 = 123 |> await ! 3 | t2 = 456 |> await ! 4 | 1 + f a * b 5 | ? 1 + f 2 == 3 6 | ? 1.0 + 2.0 == 3.0 7 | ? a == 2 8 | ? [ ] == f [ ] 9 | ? ( ) == ( ) 10 | ? 1 >+ [ f 2 , 3 , 4 ] == [ 1 , 2 , f 3 , 4 ] 11 | ? ( 1 + 2 ) == ( 1 + ( 1 + 1 ) ) 12 | ? ~~aGVsbG8gd29ybGQ= == ~~aGVsbG8gd29ybGQ= 13 | ? ~~64'aGVsbG8gd29ybGQ= == ~~64'aGVsbG8gd29ybGQ= 14 | . a : $$int = 2 15 | . a : $$int 16 | . a = 2 17 | . b = 4 - f c 18 | . c = 1 19 | . _ = f "hello" ++ "!" 20 | . f = x -> x 21 | . _ = h 1 2 22 | . h = a -> b -> a + b 23 | . _ = { a = 1 , b = "x" } 24 | . _ = { r = _ } -> ( ) 25 | . _ = | { r = _ } -> ( ) 26 | . _ = { r = _ , ... } -> ( ) 27 | . { r = r , ... } = { r = 123 , .. k } 28 | . _ : { q : int , ... } 29 | . _ : { q : int } 30 | . _ = { a = 2 , b = "y" , .. k } 31 | . k = { a = 1 , b = "x" , c = 3 } 32 | . _ = | "a" -> 1 | "b" -> 2 | "c" -> 3 | x -> 0 33 | . _ = g 6 34 | . g = | 1 -> 1 | n -> n * g ( n - 1 ) 35 | . greet = x -> "hello\`x\`!" 36 | . _ = scoop :: chocolate ( ) 37 | . scoop : # vanilla ( ) # chocolate ( ) # strawberry ( ) 38 | . _ = p :: point { x = 3 , y = 4 } |> # point _ -> 999 39 | . _ : p -> ( ) = # point _ -> ( ) 40 | . _ : p -> ( ) = | # point _ -> ( ) 41 | . _ : p = p :: point { x = 3 , y = 4 } 42 | . p : # point { x : int , y : int } 43 | . _ = tuple :: triplet { x = 1.0 , y = "A" , z = ~2B } |> | # pair _ -> "B" | # triplet { y = y , ... } -> y 44 | . _ = { z = 888 } |> { z = z , ... } -> z 45 | . tuple : x => y => z => # pair { x : x , y : y } # triplet { x : x , y : y , z : z } 46 | . _ = $123456 1 2 47 | . _ = $sha1'123456 1 2 48 | . _ = $$add 1 2 49 | . _ : $$int 50 | `; 51 | 52 | // input = `scoop :: chocolate |> | # vanilla _ -> 111 | # chocolate _ -> 222 | # strawberry _ -> 333 . scoop : # vanilla ( ) # chocolate ( ) # strawberry ( )`; 53 | 54 | // input = `( | # vanilla _ -> 111 | # chocolate _ -> 222 | # strawberry _ -> 333 ) scoop :: chocolate . scoop : # vanilla ( ) # chocolate ( ) # strawberry ( )`; 55 | 56 | // input = `scoop :: chocolate ( ) . scoop : # vanilla ( ) # chocolate ( ) # strawberry ( )`; 57 | 58 | // input = `r . { r = r , ... } = { r = 123 , .. k } . k = { }`; 59 | 60 | // input = `f { r = 234 } . f = { r = _ } -> ( )`; 61 | 62 | // input = `f x . f = | 123 -> 456 | y -> y . x = 0`; 63 | 64 | // input = `f x . f = | 1 -> 1 | n -> n * f ( n - 1 ) . x = 6`; 65 | 66 | // input = `f 1 2 . f = a -> b -> a + b`; 67 | 68 | // TODO: Make this a proper tokenizer that handles strings with blankspace. 69 | const tokenize = x => x.replace(/ *--[^\n]*/g, '').trim().split(/[\s\n]+/g) 70 | 71 | const tokens = tokenize(input); 72 | 73 | const lp = n => ({pl: n, pr: n - 0.1}); 74 | const rp = n => ({pl: n, pr: n + 0.1}); 75 | const np = n => ({pl: n, pr: n + 0}); 76 | const xp = n => ({pl: n, pr: 0}); 77 | const ps = { 78 | "::": lp(2000), 79 | "": rp(1000), 80 | ">>": lp(14), 81 | "^": rp(13), 82 | "*": lp(12), "/": lp(12), "//": lp(12), "%": lp(12), 83 | "+": lp(11), "-": lp(11), 84 | "**": rp(10), ">*": rp(10), "++": rp(10), ">+": rp(10), 85 | "==": np(9), "/=": np(9), "<": np(9), ">": np(9), "<=": np(9), ">=": np(9), 86 | "&&": rp(8), 87 | "||": rp(7), 88 | "#": lp(5.5), 89 | "=>": lp(5.11), 90 | "->": lp(5), 91 | "|": rp(4.5), ":": lp(4.5), 92 | "|>": lp(4.11), 93 | "=": rp(4), 94 | "!": lp(3), ".": rp(3), "?": rp(3), 95 | ",": xp(1), "]": xp(1), "}": xp(1), 96 | } 97 | function parse(ts, p = 0) { 98 | const x = ts.shift(); 99 | if (x === undefined) throw new Error("unexpected end of input"); 100 | let l; if (false) {} 101 | else if (x === "|") { 102 | const expr = parse(ts, 5); // TODO: make this work for larger arities 103 | if (expr.op !== "->") throw new Error("must be function"); 104 | l = new Fun([[expr.l, expr.r]]); 105 | while(ts[0] === "|") { 106 | ts.shift() 107 | const expr_ = parse(ts, 5); // TODO: make this work for larger arities 108 | if (expr_.op !== "->") throw new Error("must be function"); 109 | l.branches.push([expr_.l, expr_.r]); 110 | } 111 | } 112 | else if (x === "#") { 113 | l = new Uni({}); 114 | do { 115 | const {l:l_,op,r} = parse(ts, 6); 116 | if (op !== '') throw new Error(`TODO: parsing error`); 117 | l.types[l_.label] = r; 118 | } while (ts[0] === "#" && ts.shift()) 119 | } 120 | else if (ps[x]) {} 121 | else if (x === "(") { 122 | l = ts[0] === ")" ? new Hole() : parse(ts, 0); 123 | ts.shift(); 124 | } 125 | else if (x === "[") { 126 | l = []; 127 | if (ts[0] === "]") ts.shift(); 128 | else do { 129 | l.push(parse(ts, 2)); 130 | } while(ts.shift() !== "]"); 131 | } 132 | else if (x === "{") { 133 | l = new Rec(); 134 | if (ts[0] === "}") ts.shift(); 135 | else do { 136 | const {l:l_,op,r} = parse(ts, 2); 137 | if (l_?.label === "...") {} // TODO 138 | else if (op === "=") l.data[l_.label ?? ".."] = r; 139 | else if (op === "..") l.fills.push(r); 140 | } while(ts.shift() !== "}"); 141 | } 142 | else if (x === "...") { l = new Var("..."); } 143 | else if (x === "..") { l = new Expr({}, "..", parse(ts, 2)); } 144 | else if (x.match(/^[0-9]+$/)) l = parseInt(x); 145 | else if (x.match(/^[0-9]+[0-9.]*$/)) l = parseFloat(x); 146 | else if (x.match(/^".+"$/)) l = JSON.parse(x); 147 | else if (x.match(/^[_a-z][a-z0-9]*$/)) l = new Var(x); 148 | else if (x.match(/^\$(sha1')?[a-z0-9]+$/)) l = new Var(x); 149 | else if (x.match(/^\$\$[a-z0-9]+$/)) l = new Var(x); 150 | else if (x.match(/^~[^~ ]+$/)) l = new Bytes([x]); 151 | else if (x.match(/^~~[^~ ]+$/)) l = new Bytes(x); 152 | else if (x.match(/^\$::[a-z]+$/)) l = new Rock(x); 153 | else throw new Error(`bad token: ${x}`); 154 | while (true) { 155 | let op = ts[0]; 156 | if (!op || op === ")" || op === "]") break; 157 | if (!ps[op]) op = ""; 158 | const {pl, pr} = ps[op]; 159 | // console.log(l, op, p, pl, pr); 160 | if (pl < p) break; 161 | if (op !== "") ts.shift(); 162 | l = new Expr(l, op, parse(ts, pr)); 163 | } 164 | return l; 165 | }; 166 | 167 | const ast = parse(tokens); 168 | 169 | function Var(label) {this.label = label;} 170 | function Expr(l, op, r) {this.l = l; this.op = op; this.r = r;} 171 | function Hole() {} 172 | function Bytes(x) {this.x = x;} 173 | function Fun(branches, ctx = {}, xs = []) {this.branches = [...branches]; this.ctx = {...ctx}; this.xs = [...xs];} 174 | function Rec(data = {}, fills = []) {this.data = {...data}; this.fills = [...fills];} 175 | function Uni(types, t = null, x = null) {this.types = {...types}; this.t = t; this.x = x;} 176 | function Rock(label) {this.label = label;} 177 | 178 | // TODO: null matches need to bubble up 179 | const match = (arg,x) => { 180 | const type = Object.getPrototypeOf(arg).constructor.name 181 | if (type === "Number") return arg === x ? {} : null; 182 | else if (type === "Var") return { [arg.label]: x }; 183 | else if (type === "Rec") { const envs = Object.entries(arg.data).map(([k,v]) => match(v,x.data[k])); if (envs.some(x => x === null)) return null; else return Object.assign({}, ...envs); } 184 | else if (type === "Uni") return arg.types[x.t] ? match(arg.types[x.t],x.x) : null; 185 | else if (type === "Expr") if (arg.op === ":") return match(arg.l,x); else throw new Error("TODO: unexpected expression"); 186 | // TODO: return null if no match 187 | else throw new Error(`TODO: match ${type}`); 188 | }; 189 | const ops = { 190 | "!": (env,l,r) => eval(env, r), // TODO: skipping left eval for now 191 | ".": (env,l,r) => eval({ ...env, ...eval(env,r).env }, l), 192 | "?": (env,l,r) => {if (eval(env,r).y !== true) throw new Error(`bad assertion: ${JSON.stringify(r)}`); return eval(env, l);}, 193 | "=": (env,l,r) => ({ env: { ...env, ...Object.fromEntries(Object.entries(match(l,r)).map(([k,v])=>[k,eval(env,v).y])) } }), 194 | "+": (env,l,r) => ({ y: eval(env,l).y + eval(env,r).y }), 195 | "-": (env,l,r) => ({ y: eval(env,l).y - eval(env,r).y }), 196 | "*": (env,l,r) => ({ y: eval(env,l).y * eval(env,r).y }), 197 | ":": (env,l,r) => ({ env: { ...env, [l.label]: eval(env,r).y } }), 198 | "::": (env,l,r) => ({ y: new Uni(eval(env,l).y.types, r.label) }), 199 | "==": (env,l,r) => ({ y: JSON.stringify(eval(env,l).y) === JSON.stringify(eval(env,r).y) }), 200 | ">+": (env,l,r) => ({ y: [eval(env,l).y].concat(eval(env,r).y) }), 201 | "++": (env,l,r) => ({ y: eval(env,l).y.concat(eval(env,r).y) }), 202 | "->": (env,l,r) => ({ y: new Fun([[l,r]], env) }), 203 | "=>": (env,l,r) => eval(env,r), 204 | "|>": (env,l,r) => ops[""](env,r,l), 205 | "": (env,l,r) => { 206 | const type = Object.getPrototypeOf(l).constructor.name 207 | if (false) {} 208 | else if (["Var","Expr"].includes(type)) 209 | return ops[""](env, eval(env, l).y, r); 210 | else if (type === "Uni") 211 | return { y: new Uni(l.types, l.t, eval(env,r).y) }; 212 | else if (type === "Fun") { 213 | l = new Fun(l.branches, l.ctx, l.xs.concat([r])); 214 | if (l.branches[0].length - 1 > l.xs.length) return l; 215 | for (const branch of l.branches) { 216 | const envs = l.xs.map((x,i) => match(branch[i],eval(env,x).y)); 217 | if (envs.some(x => !x)) continue; 218 | return eval(Object.assign({}, env, l.ctx, ...envs), branch[branch.length - 1]); 219 | } 220 | throw new Error(`no match found`); 221 | } else throw new Error(`TODO: APPLY ${type} ${l} ${r} ${env}`); 222 | }, 223 | }; 224 | const eval = (env,x) => { 225 | console.log(x); 226 | const type = Object.getPrototypeOf(x).constructor.name 227 | if (false) {} 228 | else if (["Number","String","Boolean","Bytes","Hole","Rock"].includes(type)) 229 | return {y:x}; 230 | else if (type === "Var") 231 | if (env[x.label]) return {y:env[x.label]}; 232 | else throw new Error(`TODO: ${x.label} not found`); 233 | else if (type === "Fun") 234 | return {y:x}; // TODO: anything else? 235 | else if (type === "Uni") 236 | return {y:x}; // TODO: eval all the sub data? 237 | else if (type === "Expr") 238 | if (!ops[x.op]) throw new Error(`TODO: op ${x.op}`); 239 | else return ops[x.op](env,x.l,x.r); 240 | else if (type === "Array") 241 | return {y:x.map(x_=>eval(env,x_).y)}; 242 | else if (type === "Rec") 243 | return { y: new Rec(Object.fromEntries(Object.entries(x.data).map(([k,v]) => [k,eval(env,v).y])), x.fills.map(v => eval(env,v).y)) }; 244 | else throw new Error(`TODO: EVAL ${type} ${x} ${env}`); 245 | }; 246 | 247 | const env = { 248 | "$123456": eval({}, parse(tokenize("a -> b -> a + b"))).y, 249 | "$sha1'123456": eval({}, parse(tokenize("a -> b -> a + b"))).y, 250 | "$$add": eval({}, parse(tokenize("a -> b -> a + b"))).y, 251 | "$$int": eval({}, parse(tokenize("# int ( )"))).y, 252 | }; 253 | console.log(eval(env,ast).y); 254 | -------------------------------------------------------------------------------- /util/style.css: -------------------------------------------------------------------------------- 1 | html { 2 | font-size: 100%; 3 | background-color: black; 4 | } 5 | body { 6 | font-family: "Nunito Sans", sans-serif; 7 | font-weight: 400; 8 | line-height: 1.75; 9 | margin: 0; 10 | max-width: 55rem; 11 | } 12 | p { 13 | margin-bottom: 1rem; 14 | } 15 | div { 16 | color: #d4d4d4; 17 | } 18 | 19 | a:link { 20 | text-decoration: none; 21 | text-decoration-thickness: 1px; 22 | text-underline-offset: 0.15rem; 23 | text-decoration-style: dotted; 24 | } 25 | a { 26 | color: #7ec699; 27 | } 28 | 29 | ul, 30 | ol { 31 | padding-left: 1.5rem; 32 | } 33 | 34 | blockquote { 35 | color: #d4d4d4; 36 | border-left: 2px dotted #666; 37 | margin-left: 0.5rem; 38 | padding-left: 1.5rem; 39 | margin-bottom: 2rem; 40 | } 41 | 42 | h1, 43 | h2, 44 | h3, 45 | h4, 46 | h5 { 47 | margin: 3rem 0 1.38rem; 48 | font-family: "Rubik", sans-serif; 49 | font-weight: 500; 50 | line-height: 1.3; 51 | text-transform: capitalize; 52 | } 53 | h1 { 54 | font-weight: 700; 55 | text-transform: uppercase; 56 | margin-top: 0; 57 | letter-spacing: 1px; 58 | } 59 | h2 { 60 | margin-top: 5rem; 61 | padding-top: 2rem; 62 | border-top: 2px dotted #666; 63 | } 64 | h3 { 65 | margin-top: 3rem; 66 | } 67 | 68 | h1 { 69 | font-size: 4.209rem; 70 | } 71 | h2 { 72 | font-size: 3.157rem; 73 | } 74 | h3 { 75 | font-size: 2.369rem; 76 | } 77 | h4 { 78 | font-size: 1.777rem; 79 | } 80 | h5 { 81 | font-size: 1.333rem; 82 | } 83 | small, 84 | .text_small { 85 | font-size: 0.75rem; 86 | } 87 | 88 | header > div, 89 | footer > div { 90 | padding: 2rem 2rem; 91 | display: flex; 92 | align-items: center; 93 | gap: 1rem; 94 | text-transform: uppercase; 95 | font-weight: 600; 96 | line-height: 2; 97 | font-size: 0.875rem; 98 | max-width: 35rem; 99 | margin: 0 auto; 100 | } 101 | header a, 102 | footer a { 103 | color: #cc99cd; 104 | text-decoration: none; 105 | max-height: 1.75rem; 106 | } 107 | header img, 108 | footer img { 109 | height: 1.75rem; 110 | -webkit-filter: invert(1); 111 | filter: invert(1); 112 | padding-right: 0.5rem; 113 | } 114 | footer { 115 | margin-bottom: 6rem; 116 | } 117 | 118 | main { 119 | max-width: 35rem; 120 | margin: 0 auto; 121 | background-color: black; 122 | } 123 | 124 | a:hover { 125 | opacity: 0.8; 126 | } 127 | 128 | #output { 129 | display: flex; 130 | flex-direction: column; 131 | gap: 0.5em; 132 | margin: 0.8em 0; 133 | box-sizing: border-box; 134 | } 135 | 136 | pre { 137 | overflow-x: auto; 138 | } 139 | code, 140 | pre, 141 | code[class*="language-"], 142 | pre[class*="language-"] { 143 | font-family: "Fira Code", Monaco, Menlo, Consolas, "Courier New", monospace; 144 | position: relative; 145 | font-size: 0.8rem; 146 | } 147 | pre[class*="language-"] { 148 | padding: 0.5em 0.5em; 149 | margin: 0em; 150 | z-index: 2; 151 | } 152 | code, 153 | pre > code { 154 | font-size: 0.8rem; 155 | line-height: 1.42; 156 | -webkit-text-size-adjust: 100%; 157 | } 158 | pre > code { 159 | font-size: 0.8rem; 160 | margin-left: 2.5%; 161 | display: block; 162 | } 163 | 164 | :not(pre) > code[class*="language-"], pre[class*="language-"] { 165 | background: #2d2d2d; 166 | border-radius: 5px 5px 0px 0px; 167 | } 168 | pre[class*="language-"] + pre[class*="language-"].result { 169 | background-color: #202020; 170 | position: relative; 171 | opacity: 0.9; 172 | border-radius: 0px 0px 5px 5px; 173 | } 174 | 175 | pre[class*="language-"].result { 176 | z-index: 1; 177 | } 178 | input[type="text"], textarea { 179 | background-color: #2d2d2d; 180 | outline: none; 181 | color: #ccc; 182 | } 183 | 184 | #prompt-string { 185 | font-family: "Nunito Sans", sans-serif; 186 | font-weight: bold; 187 | display: inline-block; 188 | padding-right: 0.4em; 189 | } 190 | 191 | #input { 192 | font-family: "Fira Code", Monaco, Menlo, Consolas, "Courier New", monospace; 193 | background-color: #2d2d2d; 194 | color: #d4d4d4; 195 | border: none; 196 | padding: 0.5em 1em; 197 | border-radius: 5px 0px 0px 5px; 198 | transition: background-color 200ms; 199 | width: 20em; 200 | } 201 | 202 | #submit-input { 203 | font-family: "Nunito Sans", sans-serif; 204 | background-color: #3a3a3a; 205 | color: #c0c0c0; 206 | border: none; 207 | padding: 0.5em 1.2em; 208 | font-weight: bold; 209 | border-radius: 0px 5px 5px 0px; 210 | transition: background-color 200ms; 211 | } 212 | #submit-input:hover { 213 | cursor: pointer; 214 | background-color: #434343; 215 | } 216 | #submit-input:active { 217 | background-color: #3a3a3a; 218 | } 219 | 220 | #clear-local-storage { 221 | font-family: "Nunito Sans", sans-serif; 222 | background-color: #2d2d2d; 223 | color: #c0c0c0; 224 | border: none; 225 | padding: 0.5em 1.2em; 226 | font-weight: bold; 227 | border-radius: 5px; 228 | transition: background-color 200ms; 229 | } 230 | #clear-local-storage:hover { 231 | cursor: pointer; 232 | background-color: #434343; 233 | } 234 | --------------------------------------------------------------------------------