├── .gitmodules ├── c ├── src │ ├── glas_internal.h │ ├── main.c │ └── minunit.h ├── glas └── Makefile ├── glas-hs ├── README.md ├── .gitignore ├── src │ ├── AST.hs │ ├── Lib.hs │ ├── Prog.hs │ └── Val.hs ├── Setup.hs ├── test │ └── Spec.hs ├── app │ └── Main.hs ├── CHANGELOG.md ├── stack.yaml.lock ├── LICENSE ├── package.yaml ├── glas-hs.cabal └── stack.yaml ├── .gitignore ├── docs ├── GlasNotebooks.md ├── GlasCLI.md ├── GlasLang.md ├── GlasGUI.md ├── GlasImpl.md ├── GlasObject.md ├── GlasNamespaces.md ├── GlasDesign.md ├── GlasProg.md └── GlasApps.md └── README.md /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /c/src/glas_internal.h: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /glas-hs/README.md: -------------------------------------------------------------------------------- 1 | # glas-hs 2 | -------------------------------------------------------------------------------- /glas-hs/.gitignore: -------------------------------------------------------------------------------- 1 | .stack-work/ 2 | *~ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .ionide/ 2 | .vscode/ 3 | bin/ 4 | build/ 5 | .fake 6 | -------------------------------------------------------------------------------- /glas-hs/src/AST.hs: -------------------------------------------------------------------------------- 1 | module AST ( 2 | 3 | ) where 4 | 5 | 6 | -------------------------------------------------------------------------------- /glas-hs/Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /glas-hs/test/Spec.hs: -------------------------------------------------------------------------------- 1 | main :: IO () 2 | main = putStrLn "Test suite not yet implemented" 3 | -------------------------------------------------------------------------------- /glas-hs/app/Main.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Lib 4 | 5 | main :: IO () 6 | main = someFunc 7 | -------------------------------------------------------------------------------- /glas-hs/src/Lib.hs: -------------------------------------------------------------------------------- 1 | module Lib 2 | ( someFunc 3 | ) where 4 | 5 | someFunc :: IO () 6 | someFunc = putStrLn "someFunc" 7 | -------------------------------------------------------------------------------- /glas-hs/src/Prog.hs: -------------------------------------------------------------------------------- 1 | module Prog ( 2 | P(..) 3 | ) where 4 | 5 | data P 6 | = Pass 7 | | Fail 8 | | Do P P 9 | 10 | -------------------------------------------------------------------------------- /c/glas: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # script to build and run glas 4 | # execute with GLAS_RELEASE defined for release mode. 5 | 6 | cd -- "$(dirname -- $0)" 7 | 8 | if [ -v GLAS_RELEASE ]; then 9 | make release && ./bin/release/glas "$@" 10 | else 11 | make debug && ./bin/debug/glas "$@" 12 | fi 13 | -------------------------------------------------------------------------------- /glas-hs/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog for `glas-hs` 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to the 7 | [Haskell Package Versioning Policy](https://pvp.haskell.org/). 8 | 9 | ## Unreleased 10 | 11 | ## 0.1.0.0 - YYYY-MM-DD 12 | -------------------------------------------------------------------------------- /glas-hs/stack.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/topics/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | sha256: 95817c7fe8ae8ca6e8f4ecfd3e876b3edc4e5fbba9f8d8f757e87b484f9bfe6b 10 | size: 726101 11 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/24/13.yaml 12 | original: 13 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/24/13.yaml 14 | -------------------------------------------------------------------------------- /c/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | 3 | CFLAGS_BASE = -Wall -Wextra -Werror -std=c11 -I api/ -fvisibility=hidden 4 | LINK_FLAGS = 5 | DEBUG_CFLAGS = $(CFLAGS_BASE) -g -Og -DDEBUG 6 | RELEASE_CFLAGS = $(CFLAGS_BASE) -O3 -DNDEBUG 7 | 8 | .PHONY: all debug release clean 9 | 10 | all: release 11 | debug: bin/debug/glas 12 | release: bin/release/glas 13 | clean: 14 | rm -rf build 15 | rm -rf bin 16 | 17 | SOURCES = main.c glas.c 18 | OBJECTS = $(SOURCES:.c=.o) 19 | EXTRA_DEPS = api/glas.h src/glas_internal.h Makefile 20 | 21 | bin/debug/glas: $(addprefix build/debug/,$(OBJECTS)) 22 | @mkdir -p bin/debug 23 | $(CC) $(DEBUG_CFLAGS) $^ -o $@ 24 | 25 | build/debug/%.o: src/%.c $(EXTRA_DEPS) 26 | @mkdir -p build/debug 27 | $(CC) $(DEBUG_CFLAGS) -c $< -o $@ 28 | 29 | bin/release/glas: $(addprefix build/release/,$(OBJECTS)) 30 | @mkdir -p bin/release 31 | $(CC) $(RELEASE_CFLAGS) $^ -o $@ 32 | 33 | build/release/%.o: src/%.c $(EXTRA_DEPS) 34 | @mkdir -p build/release 35 | $(CC) $(RELEASE_CFLAGS) -c $< -o $@ 36 | 37 | 38 | -------------------------------------------------------------------------------- /glas-hs/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2025 David Barbour 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 21 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 24 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /glas-hs/package.yaml: -------------------------------------------------------------------------------- 1 | name: glas-hs 2 | version: 0.1.0.0 3 | github: "dmbarbour/glas-hs" 4 | license: BSD-3-Clause 5 | author: "David Barbour" 6 | maintainer: "dmbarbour@gmail.com" 7 | copyright: "2025 David Barbour" 8 | 9 | extra-source-files: 10 | - README.md 11 | - CHANGELOG.md 12 | 13 | # Metadata used when publishing your package 14 | # synopsis: Short description of your package 15 | # category: Web 16 | 17 | # To avoid duplicated efforts in documentation and dealing with the 18 | # complications of embedding Haddock markup inside cabal files, it is 19 | # common to point users to the README.md file. 20 | description: Please see the README on GitHub at 21 | 22 | dependencies: 23 | - base >= 4.7 && < 5 24 | 25 | ghc-options: 26 | - -Wall 27 | - -Wcompat 28 | - -Widentities 29 | - -Wincomplete-record-updates 30 | - -Wincomplete-uni-patterns 31 | - -Wmissing-export-lists 32 | - -Wmissing-home-modules 33 | - -Wpartial-fields 34 | - -Wredundant-constraints 35 | 36 | library: 37 | source-dirs: src 38 | dependencies: 39 | - vector 40 | - bytestring 41 | 42 | executables: 43 | glas-hs-exe: 44 | main: Main.hs 45 | source-dirs: app 46 | ghc-options: 47 | - -threaded 48 | - -rtsopts 49 | - -with-rtsopts=-N 50 | dependencies: 51 | - glas-hs 52 | 53 | tests: 54 | glas-hs-test: 55 | main: Spec.hs 56 | source-dirs: test 57 | ghc-options: 58 | - -threaded 59 | - -rtsopts 60 | - -with-rtsopts=-N 61 | dependencies: 62 | - glas-hs 63 | -------------------------------------------------------------------------------- /docs/GlasNotebooks.md: -------------------------------------------------------------------------------- 1 | # Interactive Development 2 | 3 | Extending a read-eval-print loop (REPL) to support graphical inputs and outputs, live coding, and fancy comments essentially results in a [notebook interface](https://en.wikipedia.org/wiki/Notebook_interface). In my vision for glas systems, this sort of interactive development experience should be the default. Even for applications that present a conventional front-end, we might provide access to the notebook view. 4 | 5 | In the notebook metaphor, a module might represent a page or chapter or widget. Pages could be inlined or hyperlinked into a view. Ideally, we can automatically construct a table of contents, or mark some content for latent access as an appendix. Not every syntax is suitable for presenting source alongside runtime output. However, even a ".txt" file benefits from a good editable projection, and a larger notebook page could let users immediately observe the outcome of changing the text. 6 | 7 | ## Implementation Thoughts 8 | 9 | ### Source Setters 10 | 11 | We can use the abstract Src type at runtime to support setters. 12 | 13 | ### Auxilliary Output 14 | 15 | Tables of contents, table of illustrations or interactions, navigation support (e.g. 'prev' and 'next' buttons), etc.. 16 | 17 | Use namespace Aggregation patterns for these. 18 | 19 | ### Cooperative Work 20 | 21 | Consider sophisticated Src setters that support cooperative development? Probably a distant future feature. 22 | 23 | ### Avoiding Bloat 24 | 25 | Move most projectional editor logic into shared libraries. Don't generate this logic per module compiled. 26 | 27 | ### Annotation-based Hooks? 28 | 29 | See *Debug Views* in [glas programs](GlasProg.md). 30 | 31 | This might prove a convenient way to integrate views into a notebook, relying on the 'sys.refl.view.\*' API to bind it into an application GUI. 32 | 33 | -------------------------------------------------------------------------------- /glas-hs/glas-hs.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: 2.2 2 | 3 | -- This file has been generated from package.yaml by hpack version 0.37.0. 4 | -- 5 | -- see: https://github.com/sol/hpack 6 | 7 | name: glas-hs 8 | version: 0.1.0.0 9 | description: Please see the README on GitHub at 10 | homepage: https://github.com/dmbarbour/glas-hs#readme 11 | bug-reports: https://github.com/dmbarbour/glas-hs/issues 12 | author: David Barbour 13 | maintainer: dmbarbour@gmail.com 14 | copyright: 2025 David Barbour 15 | license: BSD-3-Clause 16 | license-file: LICENSE 17 | build-type: Simple 18 | extra-source-files: 19 | README.md 20 | CHANGELOG.md 21 | 22 | source-repository head 23 | type: git 24 | location: https://github.com/dmbarbour/glas-hs 25 | 26 | library 27 | exposed-modules: 28 | AST 29 | Lib 30 | Prog 31 | Val 32 | other-modules: 33 | Paths_glas_hs 34 | autogen-modules: 35 | Paths_glas_hs 36 | hs-source-dirs: 37 | src 38 | ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints 39 | build-depends: 40 | base >=4.7 && <5 41 | , bytestring 42 | , vector 43 | default-language: Haskell2010 44 | 45 | executable glas-hs-exe 46 | main-is: Main.hs 47 | other-modules: 48 | Paths_glas_hs 49 | autogen-modules: 50 | Paths_glas_hs 51 | hs-source-dirs: 52 | app 53 | ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N 54 | build-depends: 55 | base >=4.7 && <5 56 | , glas-hs 57 | default-language: Haskell2010 58 | 59 | test-suite glas-hs-test 60 | type: exitcode-stdio-1.0 61 | main-is: Spec.hs 62 | other-modules: 63 | Paths_glas_hs 64 | autogen-modules: 65 | Paths_glas_hs 66 | hs-source-dirs: 67 | test 68 | ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints -threaded -rtsopts -with-rtsopts=-N 69 | build-depends: 70 | base >=4.7 && <5 71 | , glas-hs 72 | default-language: Haskell2010 73 | -------------------------------------------------------------------------------- /glas-hs/stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # https://docs.haskellstack.org/en/stable/configure/yaml/ 6 | 7 | # A 'specific' Stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # snapshot: lts-23.0 12 | # snapshot: nightly-2024-12-13 13 | # snapshot: ghc-9.8.4 14 | # 15 | # The location of a snapshot can be provided as a file or url. Stack assumes 16 | # a snapshot provided as a file might change, whereas a url resource does not. 17 | # 18 | # snapshot: ./custom-snapshot.yaml 19 | # snapshot: https://example.com/snapshots/2024-01-01.yaml 20 | snapshot: 21 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/24/13.yaml 22 | 23 | # User packages to be built. 24 | # Various formats can be used as shown in the example below. 25 | # 26 | # packages: 27 | # - some-directory 28 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 29 | # subdirs: 30 | # - auto-update 31 | # - wai 32 | packages: 33 | - . 34 | # Dependency packages to be pulled from upstream that are not in the snapshot. 35 | # These entries can reference officially published versions as well as 36 | # forks / in-progress versions pinned to a git hash. For example: 37 | # 38 | # extra-deps: 39 | # - acme-missiles-0.3 40 | # - git: https://github.com/commercialhaskell/stack.git 41 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 42 | # 43 | # extra-deps: [] 44 | 45 | # Override default flag values for project packages and extra-deps 46 | # flags: {} 47 | 48 | # Extra package databases containing global packages 49 | # extra-package-dbs: [] 50 | 51 | # Control whether we use the GHC we find on the path 52 | # system-ghc: true 53 | # 54 | # Require a specific version of Stack, using version ranges 55 | # require-stack-version: -any # Default 56 | # require-stack-version: ">=3.3" 57 | # 58 | # Override the architecture used by Stack, especially useful on Windows 59 | # arch: i386 60 | # arch: x86_64 61 | # 62 | # Extra directories used by Stack for building 63 | # extra-include-dirs: [/path/to/dir] 64 | # extra-lib-dirs: [/path/to/dir] 65 | # 66 | # Allow a newer minor version of GHC than the snapshot specifies 67 | # compiler-check: newer-minor 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Glas 2 | 3 | Glas is a general language system. Glas is designed for scalable, reproducible, and extensible software systems. Glas also reinvisions the application model to simplify concurrency, cache management, and live coding or continuous deployment. 4 | 5 | ## Glas Overview 6 | 7 | Glas has several non-conventional features: 8 | 9 | Glas supports **user-defined syntax**, guided by file extensions. To compute the module value for a file named `foo.xyz`, the compiler will use a program defined by the module named `language-xyz`. It is possible to develop alternative syntax, DSLs, integrate `.json` files as modules, or support projectional editing via specialized syntax. 10 | 11 | Glas supports **user-defined compilers**. When modules compute binary values, those binaries can be extracted. Thus, it is possible to 'compile' meme images or documents. Compiling a program involves processing a homoiconic representation into an executable binary, then extracting. 12 | 13 | Glas supports **large, incremental builds**. Large values support structure sharing across builds by content-addressed storage, i.e. using secure-hashes as value references. Work sharing across similar builds can be supported by explicit memoization. 14 | 15 | Glas will use explicit [**acceleration**](https://en.wikipedia.org/wiki/Hardware_acceleration) for high-performance computing. For example, we could simulate an abstract CPU, then replace by actual CPU to implement compression or cryptography algorithms. Acceleration of Kahn Process Networks could support distributed builds. 16 | 17 | Glas favors a [**transaction machine application model**](docs/GlasApps.md) that is more amenable to live coding and distributed overlay programs than the conventional `int main(string[] args)` app model. This comes with some optimization challenges, but I'm optimistic that it can provide a better basis for applications. 18 | 19 | See the [design doc](docs/GlasDesign.md) for more detail. 20 | 21 | ## Project Goals 22 | 23 | The concrete goal is to bootstrap a command-line utility named `glas` with support for user-defined syntax, compilation of modules, extraction of binaries, content-addressed storage, incremental builds, continuous builds, and usable acceleration (ideally for CPU and KPN). 24 | 25 | A minimal bootstrap implementation will be written in F#, with a motive to leverage JIT compilation for performance. The bootstrap will not support acceleration or stowage unless these prove necessary. 26 | 27 | ## Status of Language 28 | 29 | Glas has been re-envisioned several times, so it's been a slow start. KPNs were dropped from the initial program model because they're too complicated. Backtracking was reintroduced to simplify conditional and loop combinators. 30 | 31 | But at this point I'm ready to start programming. 32 | 33 | -------------------------------------------------------------------------------- /glas-hs/src/Val.hs: -------------------------------------------------------------------------------- 1 | module Val ( 2 | Val, zero, 3 | 4 | -- Basic Construction 5 | 6 | 7 | -- TODO: 8 | -- ofInt, 9 | --mkL, mkR, mkP, 10 | 11 | 12 | -- bitstring manipulations 13 | is_bits, has_bits, 14 | bits_len, bits_invert, bit_cons, bits_head, bits_tail, 15 | has_byte, byte_cons, byte_head, bytes_tail, 16 | 17 | 18 | ) where 19 | 20 | import Data.Word 21 | import Data.Bits 22 | import qualified Data.Vector.Strict as VS 23 | import qualified Data.ByteString as BS 24 | 25 | -- Stem encodes 0..63 bits 26 | -- 1000..0 0 bits 27 | -- a100..0 1 bit 28 | -- ab10..0 2 bits 29 | -- abcd..1 63 bits 30 | -- 0000..0 unused 31 | type Stem = Word64 32 | 33 | -- | the basic glas data type (no external refs) 34 | -- Note: this data is strict; no laziness in glas data. 35 | data Val = V !Stem !Node 36 | data Node 37 | = Leaf 38 | | Stem64 !Word64 !Node 39 | | Branch !Val !Val 40 | -- optimized lists 41 | | Arr !(VS.Vector Val) 42 | | Bin !BS.ByteString 43 | | Concat !Node !Node 44 | | Take !Int !Node 45 | 46 | empty_stem :: Stem 47 | empty_stem = (1 `shiftL` 63) 48 | 49 | stem_is_full :: Stem -> Bool 50 | stem_is_full s = ((s .&. 1) == 1) 51 | 52 | stem_is_empty :: Stem -> Bool 53 | stem_is_empty s = (s == empty_stem) 54 | 55 | stem_lenbit :: Int -> Stem 56 | stem_lenbit n = 1 `shiftL` (63 - n) 57 | 58 | -- invert all bits in stem 59 | invert_stem :: Stem -> Stem 60 | invert_stem s = 61 | let mask = (stem_lenbit (stem_len s)) - 1 in 62 | ((complement s) .&. (complement mask)) .|. (s .&. mask) 63 | 64 | 65 | stem_len2 :: Stem -> Int -> Int 66 | stem_len2 s n = 67 | if (0 == (0x1 .&. s)) then 68 | n 69 | else 70 | (n + 1) 71 | 72 | stem_len4 :: Stem -> Int -> Int 73 | stem_len4 s n = 74 | if (0 == (0x3 .&. s)) then 75 | stem_len2 (s `shiftR` 2) n 76 | else 77 | stem_len2 s (n + 2) 78 | 79 | stem_len8 :: Stem -> Int -> Int 80 | stem_len8 s n = 81 | if (0 == (0xF .&. s)) then 82 | stem_len4 (s `shiftR` 4) n 83 | else 84 | stem_len4 s (n + 4) 85 | 86 | stem_len16 :: Stem -> Int -> Int 87 | stem_len16 s n = 88 | if (0 == (0xFF .&. s)) then 89 | stem_len8 (s `shiftR` 8) n 90 | else 91 | stem_len8 s (n + 8) 92 | 93 | stem_len32 :: Stem -> Int -> Int 94 | stem_len32 s n = 95 | if (0 == (0xFFFF .&. s)) then 96 | stem_len16 (s `shiftR` 16) n 97 | else 98 | stem_len16 s (n + 16) 99 | 100 | stem_len :: Stem -> Int 101 | stem_len s = 102 | if (0 == (0xFFFFFFFF .&. s)) then 103 | stem_len32 (s `shiftR` 32) 0 104 | else 105 | stem_len32 s 32 106 | 107 | -- | the zero value, also puns as empty list, empty dict 108 | zero :: Val 109 | zero = V empty_stem Leaf 110 | 111 | -- | test whether Val is simply a bitstring 112 | is_bits :: Val -> Bool 113 | is_bits (V _ n) = is_bits_node n 114 | 115 | is_bits_node :: Node -> Bool 116 | is_bits_node n = case n of 117 | Leaf -> True 118 | Stem64 _ n' -> is_bits_node n' 119 | _ -> False 120 | 121 | -- | test whether Val has at least one stem bit (bits_len > 0) 122 | has_bits :: Val -> Bool 123 | has_bits (V s n) = if stem_is_empty s then has_bits_node n else True 124 | 125 | has_bits_node :: Node -> Bool 126 | has_bits_node n = case n of 127 | Stem64 _ _ -> True 128 | _ -> False 129 | 130 | -- | return number of continuous bits before a leaf or branch 131 | bits_len :: Val -> Int 132 | bits_len (V s n) = bits_len_node (stem_len s) n 133 | 134 | bits_len_node :: Int -> Node -> Int 135 | bits_len_node ct n = case n of 136 | Stem64 _ n' -> bits_len_node (64 + ct) n' 137 | _ -> ct 138 | 139 | -- | invert all bits indicated in bits_len 140 | bits_invert :: Val -> Val 141 | bits_invert (V s n) = V (invert_stem s) (bits_invert_node [] n) 142 | 143 | bits_invert_node :: [Word64] -> Node -> Node 144 | bits_invert_node l n = case n of 145 | Stem64 s n' -> bits_invert_node (s:l) n' 146 | _ -> foldl' (\t s -> Stem64 (complement s) t) n l 147 | 148 | bit_cons :: Bool -> Val -> Val 149 | bit_cons b (V s n) = 150 | let s' = (s `shiftR` 1) .|. (if b then (1 `shiftL` 63) else 0) in 151 | if stem_is_full s then 152 | V empty_stem (Stem64 s' n) 153 | else 154 | V s' n 155 | 156 | stem_head :: Stem -> Bool 157 | stem_head s = ((1 `shiftL` 63) .&. s) /= 0 158 | 159 | bits_head :: Val -> Bool 160 | bits_head (V s n) = 161 | if stem_is_empty s then 162 | case n of 163 | Stem64 s' _ -> stem_head s' 164 | _ -> error "bit_head: no bits" 165 | else 166 | stem_head s 167 | 168 | bits_tail :: Val -> Val 169 | bits_tail (V s n) = 170 | if stem_is_empty s then 171 | case n of 172 | Stem64 s' n' -> 173 | V ((s' `shiftL` 1) .|. 1) n' 174 | _ -> error "bit_tail: no bits" 175 | else 176 | V (s `shiftL` 1) n 177 | -------------------------------------------------------------------------------- /docs/GlasCLI.md: -------------------------------------------------------------------------------- 1 | # Glas Command Line Interface 2 | 3 | The glas executable generally takes the first command line argument as a switch to interpret the remainder of the arguments. There are a few options for running applications from different sources, and some ad-hoc operations to help users understand and control the glas system. 4 | 5 | glas --run AppName Args To App 6 | glas --script SourceRef Args To App 7 | glas --script.FileExt FilePath Args To App 8 | glas --cmd.FileExt "Source Text" Args To App 9 | glas --cache CacheOp 10 | glas --conf ConfigOp 11 | ... etc. ... 12 | 13 | A simple syntactic sugar supports user-defined operations: 14 | 15 | glas opname Args 16 | # implicitly rewrites to 17 | glas --run cli.opname Args 18 | 19 | My vision for early use of glas systems is that end users mostly operate through user-defined operations. To avoid cluttering the command line with runtime switches, we push runtime options into the configuration file, application settings, or (rarely) OS environment variables. 20 | 21 | ## Configuration 22 | 23 | The glas executable will read a user configuration based on a `GLAS_CONF` environment variable, loading the specified file as a [namespace](GlasNamespaces.md). If unspecified, the default location is `"~/.config/glas/conf.glas"` in Linux or `"%AppData%\glas\conf.glas"` on Windows. 24 | 25 | A typical user configuration will inherit from a community or company configuration from DVCS, then override some definitions for the user's projects, preferences, and resources. Each DVCS repository becomes a boundary for curation, security, and version control. A community configuration will define hundreds of shared libraries and applications in 'env.\*', relying on lazy loading and shared caching for performance. 26 | 27 | To mitigate risk of naming conflict, the runtime will recognize configuration options under 'conf.\*'. The glas executable may expect definitions for shared heap storage, RPC registries, rooted trust for public key infrastructure, and so on. An extensible executable may support user-defined accelerators, optimizers, and typecheckers via the user configuration. We could maximize portability by asking a configuration to generate an adapter based on application settings and runtime version info. 28 | 29 | ## Running Applications 30 | 31 | Applications are typically defined within the user configuration. But we'll also support scripting, where we generate applications in context of a configured environment. See [glas applications](GlasApps.md) for details on how an application is defined. 32 | 33 | * **--run AppName**: Usually refers to 'env.AppName.app' defined in the configuration namespace. As a special case, '--run .' refers to the toplevel configuration 'app'. 34 | * **--script FilePath**: Process indicated file in context of configured front-end compiler (based on file extension). The expected result is a module that defines 'app'. Link this module in context of the configured environment. 35 | * **--script.FileExt FilePath**: as '--script' except we select a front-end compiler based on a given file extension, ignoring the actual extension. Useful in context of Linux shebang lines. 36 | * **--cmd.FileExt SourceText**: as '--script.FileExt' except we also provide the script text as a command-line argument. We might present this as a read-only virtual file. 37 | 38 | *Note:* There are no command-line arguments for tuning runtime features such as verbose modes or garbage collection. Instead, we favor application 'settings' or `GLAS_*` environment variables in these roles to mitigate command-line clutter. 39 | 40 | ## Installing Applications 41 | 42 | In context of lazy loading and DVCS sources, we must generally be 'online' to run glas applications. Further, even if sources are local, there is latency on first run due to compilation. To mitigate, we can support 'installing' applications ahead of time, maintaining a local cache of compiled code. 43 | 44 | This might be expressed with a few command-line options to maintain a `".config/glas/installs"` file or folder, listing installs and checking for updates, much like apt and similar tools. 45 | 46 | Relatedly, compilation should eventually be separated and shared, e.g. by configuring proxy compilers and trusted PKI signatures. 47 | 48 | ## Trusting Application 49 | 50 | I propose to assign trust to providers of source code, leveraging PKI infrastructure. User, company, or community configurations may trust individual developers or certificate authorities with access to security-sensitive features such as FFI. Source folders shall include signed manifests and certificates within `".pki/"` subfolders. 51 | 52 | In theory, trust can be scoped. For example, when signing a manifest or certificate, a developer might indicate they trust code with GUI but not full FFI or network access. Unfortunately, I cannot imagine developers precisely scoping trust to individual sources. It seems more feasible, in practice, to trust providers, e.g. a given developer is trusted with GUI. 53 | 54 | Attenuation of trust is achieved through annotation of trusted libraries or frameworks. For example, a user trusts the provider of library X with FFI access, but annotations for some definitions within library X may relax the requirement that the caller is trusted with FFI, optionally introducing a requirement that the caller is instead trusted with GUI. Leveraging abstract data types, we might distinguish trust requirements for *opening* a file from further operations on the abstract file. 55 | 56 | A compiler can validate trust annotations and attenuations much like type annotations. Trusted providers can develop trusted sandboxes for untrusted applications. Applications can be developed with various trust assumptions, e.g. building upon code from popularly trusted providers versus using 'sys.ffi.\*' directly. Trust is relatively fine-grained compared to trusting full applications. 57 | 58 | ## Built-in Tooling 59 | 60 | The glas executable may be extended with useful built-in tools. Some tools that might prove useful: 61 | 62 | * **--conf** - inspect and debug the configuration, perhaps initialize one 63 | * **--apt** - install, uninstall, update defined applications and libraries 64 | * **--cache** - inspect cached resources, manual management options 65 | * **--db** - query, browse, watch, or edit persistent data in the shared heap 66 | 67 | Functions available via built-in CLI tools should be accessible through applications, even if only through 'sys.refl.\*' reflection APIs. Also, I hope to keep the glas executable relatively small, enough to bootstrap but pushing most logic into the module system and cached compiled code. 68 | 69 | ## Glas Shell 70 | 71 | In context of live coding and projectional editing, applications may provide their own IDE. See [glas notebooks](GlasNotebooks.md). The user configuration is no exception, it could provide 'app' for self editing. 72 | 73 | We could feasibly leverage this as a basis for a [shell](https://en.wikipedia.org/wiki/Shell_(computing)) in glas systems. Instead of running individual glas applications from the command line, run the shell and modify it. To 'run' an application would then be to integrate it with the shell. 74 | 75 | In practice, we'll often want instanced shells, such that common edits like running the application are local to an instance. We could feasibly introduce command-line tools to run instances, or rely on conventions for instancing internally within the shell. 76 | 77 | ## Implementation Roadmap 78 | 79 | The initial implementation of the glas executable must be developed outside the glas system. This implementation will lack many features, especially the optimizations that would let transaction loops scale beyond a simple event dispatch loop. Fortunately, simple event dispatch loops are still very useful, and we can fully utilize a system between FFI, accelerators, and sparks. We also have access to conventional threaded applications. 80 | 81 | Ideally, we'll eventually bootstrap the glas executable within the glas system. Early forms of bootstrap could generate C or LLVM, but I hope to swiftly eliminate external dependencies and integrate relevant optimizations into the glas libraries. 82 | 83 | 84 | -------------------------------------------------------------------------------- /c/src/main.c: -------------------------------------------------------------------------------- 1 | /** 2 | * An implementation of the glas command-line interface. 3 | * Copyright (C) 2025 David Barbour 4 | * 5 | * This program is free software: you can redistribute it and/or modify 6 | * it under the terms of the GNU General Public License as published by 7 | * the Free Software Foundation, either version 3 of the License, or 8 | * (at your option) any later version. 9 | * 10 | * This program is distributed in the hope that it will be useful, 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | * GNU General Public License for more details. 14 | * 15 | * You should have received a copy of the GNU General Public License 16 | * along with this program. If not, see . 17 | */ 18 | 19 | 20 | #define GLAS_VER "0.1" 21 | #define GLAS_HELP_STR \ 22 | "A pre-bootstrap implementation of the glas CLI\n"\ 23 | "\n"\ 24 | "Environment Vars:\n"\ 25 | " GLAS_CONF - file path to configuration\n"\ 26 | " default is ~/.config/glas/conf.glas\n"\ 27 | "\n"\ 28 | "Commands:\n"\ 29 | " glas --run AppName Arg*\n"\ 30 | " run application 'env.AppName.app' defined in user config\n"\ 31 | " glas --script(.FileExt) FilePath Arg*\n"\ 32 | " run application defined as 'app' after compiling file\n"\ 33 | " if FileExt is specified, actual file extension ignored\n"\ 34 | " glas --cmd.FileExt ScriptText Arg*\n"\ 35 | " equivalent to --script.FileExt with file of given text\n"\ 36 | " glas --extract BinaryName\n"\ 37 | " load definition of BinaryName defined in user config\n"\ 38 | " if this is a binary, print to standard output\n"\ 39 | " glas --bit TestName*\n"\ 40 | " run built-in tests. If no TestName, runs all tests.\n"\ 41 | "" 42 | 43 | #include 44 | #include 45 | #include 46 | #include 47 | 48 | static char* my_strdup(char const* s) { 49 | if(NULL == s) { s = ""; } 50 | return strcpy((char*)malloc(strlen(s) + 1), s); 51 | } 52 | #define strdup my_strdup 53 | 54 | typedef enum glas_act { 55 | GLAS_ACT_HELP = 0, 56 | // getting started 57 | GLAS_ACT_BUILT_IN_TEST, 58 | GLAS_ACT_EXTRACT_BINARY, 59 | // getting ambitious 60 | GLAS_ACT_RUN, 61 | GLAS_ACT_RUN_SCRIPT, 62 | GLAS_ACT_RUN_CMD, 63 | // TBD: 64 | // build app without running 65 | // binary extraction from config 66 | // run apps via script or command 67 | // 68 | GLAS_ACT_UNRECOGNIZED, 69 | } glas_act; 70 | 71 | typedef struct { 72 | glas_act action; 73 | char* app_src; 74 | char const* script_lang; 75 | int argc_rem; 76 | char const* const* argv_rem; 77 | } glas_cli_options; 78 | 79 | void glas_free_cli_options(glas_cli_options* pOpts) { 80 | free(pOpts->app_src); 81 | free(pOpts); 82 | } 83 | 84 | // just brute forcing this for now. Maybe make it elegant later. 85 | glas_cli_options* glas_cli_parse_args(int argc, char const* const* argv) { 86 | #define CLI_ARG_STEP(N) do { argc -= N; argv += N; } while(0) 87 | CLI_ARG_STEP(1); // skip executable name 88 | glas_cli_options* result = (glas_cli_options*) malloc(sizeof(glas_cli_options)); 89 | memset(result, 0, sizeof(glas_cli_options)); 90 | if ((argc < 1) || (0 == strcmp("--help", argv[0]))) { 91 | result->action = GLAS_ACT_HELP; 92 | if(argc >= 1) { 93 | CLI_ARG_STEP(1); 94 | } 95 | } else if(0 == strcmp("--bit", argv[0])) { 96 | result->action = GLAS_ACT_BUILT_IN_TEST; 97 | CLI_ARG_STEP(1); 98 | } else if((0 == strcmp("--extract", argv[0])) && (argc == 2)) { 99 | result->action = GLAS_ACT_EXTRACT_BINARY; 100 | size_t const buflen = strlen(argv[1]) + 32; 101 | char buf[buflen]; 102 | snprintf(buf, buflen, "env.%s", argv[1]); 103 | result->app_src = strdup(buf); 104 | CLI_ARG_STEP(2); 105 | } else if((0 == strcmp("--run", argv[0])) && (argc > 1)) { 106 | result->action = GLAS_ACT_RUN; 107 | size_t const buflen = strlen(argv[1]) + 32; 108 | char buf[buflen]; 109 | if('.' == argv[1][0]) { 110 | if(0 == argv[1][1]) { 111 | result->app_src = strdup("app"); 112 | } else { 113 | snprintf(buf, buflen, "%s.app", argv[1]+1); 114 | result->app_src = strdup(buf); 115 | } 116 | } else { 117 | snprintf(buf, buflen, "env.%s.app", argv[1]); 118 | result->app_src = strdup(buf); 119 | } 120 | CLI_ARG_STEP(2); 121 | } else if (argv[0][0] != '-') { 122 | // syntactic sugar 'opname' => --run 'cli.opname'. 123 | // but I don't want to allocate here. 124 | result->action = GLAS_ACT_RUN; 125 | size_t const buflen = 32 + strlen(argv[0]); 126 | char buf[buflen]; 127 | snprintf(buf, buflen, "env.cli.%s.app", argv[0]); 128 | result->app_src = strdup(buf); 129 | CLI_ARG_STEP(1); 130 | } else if((0 == strncmp("--script.", argv[0], 9)) && (argc > 1)) { 131 | result->action = GLAS_ACT_RUN_SCRIPT; 132 | result->script_lang = argv[0] + 9; 133 | result->app_src = strdup(argv[1]); // alt realpath, but not good for debug 134 | CLI_ARG_STEP(2); 135 | } else if((0 == strcmp("--script", argv[0])) && (argc > 1)) { 136 | result->action = GLAS_ACT_RUN_SCRIPT; 137 | result->app_src = strdup(argv[1]); 138 | CLI_ARG_STEP(2); 139 | } else if((0 == strncmp("--cmd.", argv[0], 6)) && (argc > 1)) { 140 | result->action = GLAS_ACT_RUN_CMD; 141 | result->script_lang = argv[0] + 6; 142 | result->app_src = strdup(argv[1]); 143 | CLI_ARG_STEP(2); 144 | } else { 145 | result->action = GLAS_ACT_UNRECOGNIZED; 146 | } 147 | result->argc_rem = argc; 148 | result->argv_rem = argv; 149 | #undef CLI_ARG_STEP 150 | return result; 151 | } 152 | 153 | int glas_cli_bit(int argc, char const* const* argv); 154 | int glas_cli_extract(char const* src); 155 | 156 | int main(int argc, char const* const* argv) 157 | { 158 | int result = 0; 159 | glas_cli_options* pOpt = glas_cli_parse_args(argc, argv); 160 | //glas_cli_print_options(pOpt); 161 | 162 | if((GLAS_ACT_HELP == pOpt->action) || 163 | (GLAS_ACT_UNRECOGNIZED == pOpt->action)) 164 | { 165 | fprintf(stdout, "glas version %s\n", GLAS_VER); 166 | fprintf(stdout, "%s", GLAS_HELP_STR); 167 | } else if(GLAS_ACT_BUILT_IN_TEST == pOpt->action) { 168 | result = glas_cli_bit(pOpt->argc_rem, pOpt->argv_rem); 169 | } else if(GLAS_ACT_EXTRACT_BINARY == pOpt->action) { 170 | result = glas_cli_extract(pOpt->app_src); 171 | } else { 172 | fprintf(stdout, "command not yet supported!\n"); 173 | } 174 | 175 | 176 | #if 0 177 | 178 | 179 | if(GLAS_ACTION_RUN == opt.action) { 180 | size_t const full_src_len = strlen(opt.app_src) + 32; 181 | char full_name[full_src_len]; 182 | snprintf(full_name, full_src_len, "env.%s.app", opt.app_src); 183 | glas_run(grt, full_name, opt.argc_app, opt.argv_app); 184 | } else if(GLAS_ACTION_RUN_CLI_APP == opt.action) { 185 | size_t const full_src_len = strlen(opt.app_src) + 32; 186 | char full_name[full_src_len]; 187 | snprintf(full_name, full_src_len, "env.cli.%s.app", opt.app_src); 188 | glas_run(grt, full_name, opt.argc_app, opt.argv_app); 189 | } else { 190 | glas_destroy(grt); 191 | return -1; 192 | } 193 | #endif 194 | 195 | fflush(stdout); 196 | glas_free_cli_options(pOpt); 197 | return result; 198 | } 199 | 200 | 201 | int glas_cli_extract(char const* src) { 202 | (void) src; 203 | glas* g = glas_thread_new(); 204 | 205 | 206 | glas_thread_exit(g); 207 | return -1; 208 | } 209 | 210 | //#include "minunit.h" 211 | 212 | 213 | int glas_cli_bit(int argc, char const* const* argv) { 214 | (void) argc; (void) argv; 215 | int tests_failed = 0; 216 | if(!glas_rt_run_builtin_tests()) { 217 | ++tests_failed; 218 | fprintf(stdout, "glas runtime built-in tests failed\n"); 219 | } 220 | return tests_failed; 221 | } 222 | 223 | -------------------------------------------------------------------------------- /docs/GlasLang.md: -------------------------------------------------------------------------------- 1 | # Glas Language 2 | 3 | This document describes the primary '.glas' syntax. 4 | 5 | The primary goal is to develop a synax *I'm* happy with, personally. I hope my taste in syntax is something many others will appreciate, but user-defined syntax is available to mitigate. Anyhow, this document will be heavily driven by *feels*. 6 | 7 | ## Design Notes 8 | 9 | - Users will mostly define operations of type `Env -> Program`. This allows the each call to receive a caller-controllable view of the caller's environment. The received arguments could either bind to a standard prefix, or be accessed via keyword. 10 | - It would be convenient to support lightweight binding of data stack inputs and outputs to local registers. 11 | 12 | - Avoid deep horizontal syntax and its causes: 13 | - parameter lists. Parameter lists hinder refactoring within larger contexts, and also add to horizontal depth. 14 | - structured output, e.g. deep hierarchical configs or namespaces. Provide a lightweight means to define things and operate within a deep structure or namespace. Perhaps borrow location annotations from TOML? 15 | 16 | - Avoid explicit stack shuffling. It requires too much attention as programs grow more complicated. 17 | - instead, a lightweight syntax *similar* to parameter lists could move items into local registers. 18 | - unlike conventional parameter lists, these little operations may be composed and refactored. 19 | 20 | - Eliminate need for escape characters. Not in texts, not in names, etc.. 21 | - Users may still develop macros to explicitly postprocess a name or text containing escapes. 22 | - postprocessing must never be implicit, which is what causes escapes to 'explode' when, e.g. embedding a program source as text within another program. 23 | 24 | - Multi-line texts should be supported directly without a lot of syntactic cruft. 25 | - Perhaps each newline is indented one space? 26 | 27 | - Clear boundaries for syntax errors. 28 | - e.g. clear 'sections' that can be processed independently. 29 | - ability to decide per section whether it's a full 'error' or just a warning. 30 | 31 | - user-extensible syntax, ideally users can define keyword-like behaviors 32 | - this may include tags like the location annotations from TOML. 33 | - may need to maintain 'compiler context' in the private '%.\*' space. 34 | 35 | - macros without special calling conventions (don't like the Rust '!') 36 | - tagged definitions can help with this 37 | - ability to define new syntax section types, 38 | 39 | - Inheritance and override of modules and apps, especially of mutually recursive definitions. 40 | - introduced implicit '%self' to help 41 | - recursive definitions by default 42 | - OO inspirations here, minus state 43 | 44 | - integrate nicely with REPL and notebook-style programming 45 | - ability to 'extend' a program by simply adding more content at the end 46 | - clear boundaries for editing prior 'commands' 47 | - toplevel namespace may need access to 'effects' in this context, but we don't have access to runtime effects in the primitive namespace. 48 | - option one: integrate via compile-time effects, mostly fetching data but stable publish-subscribe is also viable. Very awkward. 49 | - option two: implicit construction of an 'app' per module that represents behavior as a REPL. 50 | - for notebooks, must support GUI output and interaction 51 | - intriguingly, could model REPL and notebook 'outputs' via reflection (similar to logging), and user inputs as debugging. This allows transparently running a REPL or notebook without the overheads of user interaction. 52 | 53 | - type annotations: get the syntax working asap, even if they aren't fully checked. 54 | - support user definitions of type descriptions (distinct definition tag from programs, etc.) 55 | - support higher-order type description, in general, e.g. `Env -> TypeDesc`. 56 | - Similar for user-defined annotations. 57 | - ensure opportunity exists for fixed-width numbers and unboxed arrays and such 58 | - define a reasonable collection of types to get started. 59 | 60 | - flexible definitions - not limited to programs, can define anything the namespace AST supports 61 | - tag definitions for flexible integration, but this may be convention (not enforced by syntax) 62 | - for abstract data types at compile time, extend seal/unseal to work with Src or plain old data 63 | 64 | - idea: a program is a vertical sequence of 'blocks'. 65 | - block start is marked by a line starting without indentation, 66 | - every following line within the block must be indented by SP. 67 | - empty lines add implicit SP. 68 | - each block is 'compiled' independently 69 | - text input as binary, removing the extra SP, normalizing newlines 70 | - each block returns AST representation, perhaps tagged `Env -> Env` 71 | - simplifies caching, separates linking 72 | - block compiler internally dispatched by first word 73 | - or initial character, for punctuation 74 | - block compiler is constant within each file 75 | - implicitly reprogrammable across files via shadowing '%env.lang.\*' 76 | - but we could separately bind block compiler to shared library, too 77 | - parallel processing of blocks 78 | - each gets its own local copy of the '%\*' namespace 79 | - may provide location within file to the block compiler 80 | - might be worth providing '%\*' namespace independently to each block 81 | - plus some extra line number info 82 | - could process blocks of definitions lazily per file 83 | - compiler will adapt each block, could be based on the tag 84 | - adapter may be sensitive to context, instructions from prior blocks 85 | - can logically insert operations before blocks 86 | - treat eof as a final block per file? 87 | - dubious! conflict with open inheritance and overrides 88 | - instead, externalize; apply finalizer before fixpoint 89 | - added tentative '%fin', but must see test if it works 90 | 91 | - aggregation - ability to build up flexible tables via overrides 92 | - clear access to 'prior' definitions when shadowing/overriding 93 | - access to 'final' definitions via implicit module fixpoint 94 | - Church-encoded lists (or writer monad) of tagged AST elements 95 | 96 | - annotations - attaching them to a definition or similar 97 | - easiest to include annotations within the definition block 98 | - could add header annotations if the 'define' block knows where to look 99 | - could add footer annotations if we're willing to name the target 100 | 101 | - Kahn Process Networks (KPNs) or dataflow languages 102 | - it isn't difficult to compile KPNs into glas coroutines 103 | - local registers for queues withinin composite processes 104 | - it seems feasible to prove confluence within KPN up to input 105 | 106 | - logic and constraint systems 107 | - it's feasible to express constraints via aggregation 108 | - this could be supported as a design pattern 109 | 110 | - refactoring of pattern matching 111 | - develop a decent syntax for pattern matching-like behavior 112 | - it is feasible to build DSLs around %br, %bt, and %sel ops 113 | - this supports flexible refactoring and composition of pattern-matching 114 | - there aren't many languages that support this effectively! 115 | - closest I can think of is active patterns (F#) or view patterns (Haskell) 116 | - but neither of those comes all that close 117 | 118 | - unit types: I want them, but I'm still not sure how to model them in context 119 | - associative registers don't propagate nicely through data stack ops 120 | - 121 | 122 | ## Imports 123 | 124 | See [namespaces](GlasNamespaces.md) for the mechanics. This section is more about the syntax. Questions: 125 | 126 | How to nicely represent a DVCS resource? An inline rep seems ugly. It might be more convenient to describe the resource separately from importing it. This would also provide the opportunity to develop libraries that 'index' other repos. 127 | 128 | Do we default to closed fixpoints or open composition? I don't like 'extends' or 'mix' for inheritance. But 'include' seems acceptable and familiar. We may need to clarify that we're not including raw source text, just the final `Env -> Env` op, but we can design for the two to be roughly equivalent in glas syntax (modulo '%src' and '%arg.\*'). 129 | 130 | include Src 131 | include Src at Prefix 132 | import Src 133 | import Src as Prefix 134 | from Src import alias-list 135 | 136 | In addition, we might add some arguments? This could feasibly be expressed as an alias list, too. Perhaps optional keyword 'with' just after Src, that takes some expression of an Env of args. As a special case, bind a prefix to args with an `prefix as *` special syntax. 137 | 138 | include Src with input-alias-list 139 | from Src with alias-list import ... 140 | 141 | We might also want the ability to treat import as a first-class definition within the namespace. OTOH, this is also true for most other features of glas. Perhaps we can support a generic solution here for binding modules into a definition. 142 | 143 | Aside from these options, we might want the option to separate import of a module from immediate integration, i.e. treating the import as an expression. 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /docs/GlasGUI.md: -------------------------------------------------------------------------------- 1 | # Graphical User Interface for Glas 2 | 3 | It is feasible to support a conventional GUI interface, but that isn't a good fit for my vision of glas systems. My big idea is *users participate in transactions*. 4 | 5 | ## Transactional User Interaction 6 | 7 | It isn't easy for human users to participate in transactions! The biggest problems are that humans are slow to respond, slow transactions are disrupted by background events, disrupted transactions are repeated, and humans also don't like repeating themselves. 8 | 9 | To solve this, we introduce a user agent to handle fast response and repetition. But the user must be provided tools to see what the user agent sees, such as data and queries, and control how the user agent responds to queries on the user's behalf. 10 | 11 | This involves *reflection* on the user agent, together with manipulation of user variables. Reflection allows users to observe aborted transactions. This provides a basis for read-only views or to withhold approval until the user has time to understand the information on display. User variables might be rendered as knobs, sliders, toggles, and text boxes. 12 | 13 | Reasonable modes for user participation: 14 | 15 | * *read-only view* - never commits - The user agent continuously or infrequently renders the GUI then aborts. Not *necessarily* read-only in context of reflection APIs, but should be safe and cacheable like HTTP GET. 16 | * *live action* - always commits - The user agent continuously renders the GUI and commits when possible. 17 | * *approved action* - controlled commit - The transaction is aborted unless the user explicitly submits or commits, in some way that is clearly under control of the user. The GUI system tracks user observations and may present a summary of relevant changes for approval in proximity to a submit button. To account for continuous background updates, a user agent may track tolerances for 'irrelevant' changes based on user policy, user action, and app recommendations. 18 | 19 | The *approved action* mode gives users the most stake in each transaction. Approving a summary of relevant changes even simulates a read-write conflict analysis. However, it's slow and deliberate, not suitable for every context. The *live action* mode is close to [immediate mode GUI](https://en.wikipedia.org/wiki/Immediate_mode_GUI) and can be used for almost any conventional GUI design, while the *read-only view* is suitable for maintaining user awareness. 20 | 21 | In context of *live action* mode, we may need to buffer or latch user inputs. For example, pushing a button sets a 'button-pushed' variable to 'true' until it is read and reset. The button would continue to render in a depressed state while 'button-pushed' remains true. 22 | 23 | ### Mitigating Glitches 24 | 25 | If users observe all transactions in which a user agent participates, they will certainly observe some transactions that are ultimately aborted due to concurrent read-write conflicts. A subset of these may exhibit 'glitches' where rendered values are inconsistent (e.g. due to reading cached values from multiple remote systems). 26 | 27 | A transactional GUI system can easily skip rendering of transactions that might be inconsistent, but there is a small cost to latency (to wait for consistency checks) and a small to large cost to frame-rate (because skipping bad 'frames' due to inconistency) depending on level of concurrent interference. This can be mitigated through app design (buffers and queues, history for views) or runtime support (rendering older snapshots for read-only views, precise conflict analysis). 28 | 29 | Alternatively, we can modify applications to reduce severity of known glitches. This would be closer to convention with non-transactional GUI today. 30 | 31 | An adjacent issue is that *asynchronous* interactions - where feedback is not logically 'instantaneous' within a transaction - may appear to be glitchy if presented as synchronous to the user. In this case, I think the problem is more about managing user expectations (e.g. report actual status of pending requests) or meeting them (e.g. use RPC to complete actions synchronously in GUI transaction). 32 | 33 | ## Integration 34 | 35 | gui : FrameRef? -> [user, system] unit 36 | 37 | An application's 'gui' method is repeatedly called in separate transactions. On each call, it queries the user agent and renders some outputs. 38 | 39 | In general, the queries and rendered outputs may be stable, subject to incremental computing. However, some 'frames' may be less stable than others. To support these cases (TBD) 40 | 41 | In some cases, we may 'fork' the GUI with non-deterministic choice, which a user agent might render in terms of multiple windows. We render without commit; the final 'commit' decision is left to the user through the user agent. 42 | 43 | A user agent can help users systematically explore different outcomes. This involves heuristically maintaining history, checkpoints and bookmarks based on which values are observed. An application can help, perhaps suggesting alternatives to a query or using naming conventions to guide heuristics (e.g. distinguishing navigation and control). 44 | 45 | *Note:* It is feasible to introduce a notion of user-local state or session-local state. However, it is not clear to me how such state would be integrated between sessions, other than as queries. A few exceptions include passing files and such over the GUI, e.g. drag and drop, which may require special attention. 46 | 47 | ## Navigation Variables 48 | 49 | UserAgents might broadly distinguish a few 'roles' for variables. Navigation variables would serve a role similar to HTTP URLs, with the user agent maintaining a history and providing a 'back' button. Writing to navigation variables would essentially represent navigating to a new location upon commit, albeit limited to the same 'gui' interface. 50 | 51 | Other potential roles would be inventory or equipment, influencing how the user interacts with the world. In any case, I think we could and should develop a more coherent metaphor than clipboards and cookies. 52 | 53 | ## Rendering Temporal Media and Large Objects 54 | 55 | An application may ask a user agent to 'render' a video for user consumption. As a participant in the transaction, a user should have the tools to comprehend this video before committing to anything. 56 | 57 | One of the best ways to understand a video is to play it. Of course, other very useful tools would include the ability to search it (find people or particular objects), read dialogues, present video frames side by side, apply filters, slow motion, fast forward, reverse, etc.. Ideally, the GUI system provides a whole gamut of tools that can be applied to any video. 58 | 59 | The same idea should apply to any large 'object' presented to the user within a transaction. For example, if the user agent is asked to render an entire 'database' as a value the user should have suitable tools to browse, query, and graph database values to obtain some comprehension of them. Rendering of very large objects is feasible between content-addressed references and content distribution networks. 60 | 61 | Ideally, user agents are extensible such that, if they lack the necessary tools, users can easily download the tools they need. We could develop some conventions for recommending certain tools to understand a large object. Further, an application can also support users in understanding large objects. 62 | 63 | ## Non-deterministic Choice and GUI 64 | 65 | For isolated transactions, repetition is equivalent to replication. Fair non-deterministic choice can be replicated to evaluate both forks in parallel. Assuming the transactions do not have a read-write conflict, they can both commit. This optimization is leveraged for task-based concurrency of transaction loops. 66 | 67 | This will impact GUI. If an application makes a non-deterministic choice, it will potentially affect what is rendered to the user. Assuming the user agent is aware of the choice, this could be rendered using multiple frames (tabs, windows, etc.) or more adventurously rendered as an overlay or branching conversation. 68 | 69 | Ideally, the user should have some control over the non-deterministic choice. This allows a *read-only view* to focus on frames that receive user attention, and *approved action* to efficiently approve a specific branch instead of waiting for it to cycle around. 70 | 71 | This can be understood as a form of participation: users can ignore and abort forks that aren't of interest to them, or explore the options in a controlled manner instead of randomly. Control over non-deterministic choice must be integrated with both the runtime and distributed transactions. Fortunately, this is a feature we'll also want for many other reasons: scheduling transactions, debugging, loop fusion, etc.. 72 | 73 | ## Multi-User Transactions 74 | 75 | The API directly supports multi-user systems where each user is performing independent transactions. That should be sufficient for most use cases. However, what if we want a 'multi-user transaction' in the sense of multiple users participating in one transaction? 76 | 77 | To support a multi-user transaction, we could model a 'multi-user agent'. If the users do not share a physical room, the multi-user agent could be placed into a virtual room created for the task. If the application is not multi-user aware, we could use a normal user agent and the virtual room could instead implement handoff protocols. 78 | -------------------------------------------------------------------------------- /c/src/minunit.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 David Siñuela Pastor, siu.4coders@gmail.com 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining 5 | * a copy of this software and associated documentation files (the 6 | * "Software"), to deal in the Software without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be 13 | * included in all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 19 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 21 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | */ 23 | #ifndef MINUNIT_MINUNIT_H 24 | #define MINUNIT_MINUNIT_H 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #if defined(_WIN32) 31 | #include 32 | #if defined(_MSC_VER) && _MSC_VER < 1900 33 | #define snprintf _snprintf 34 | #define __func__ __FUNCTION__ 35 | #endif 36 | 37 | #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) 38 | 39 | /* Change POSIX C SOURCE version for pure c99 compilers */ 40 | #if !defined(_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 200112L 41 | #undef _POSIX_C_SOURCE 42 | #define _POSIX_C_SOURCE 200112L 43 | #endif 44 | 45 | #include /* POSIX flags */ 46 | #include /* clock_gettime(), time() */ 47 | #include /* gethrtime(), gettimeofday() */ 48 | #include 49 | #include 50 | #include 51 | 52 | #if defined(__MACH__) && defined(__APPLE__) 53 | #include 54 | #include 55 | #endif 56 | 57 | #if __GNUC__ >= 5 && !defined(__STDC_VERSION__) 58 | #define __func__ __extension__ __FUNCTION__ 59 | #endif 60 | 61 | #else 62 | #error "Unable to define timers for an unknown OS." 63 | #endif 64 | 65 | #include 66 | #include 67 | 68 | /* Maximum length of last message */ 69 | #define MINUNIT_MESSAGE_LEN 1024 70 | /* Accuracy with which floats are compared */ 71 | #define MINUNIT_EPSILON 1E-12 72 | 73 | /* Misc. counters */ 74 | static int minunit_run = 0; 75 | static int minunit_assert = 0; 76 | static int minunit_fail = 0; 77 | static int minunit_status = 0; 78 | 79 | /* Timers */ 80 | static double minunit_real_timer = 0; 81 | static double minunit_proc_timer = 0; 82 | 83 | /* Last message */ 84 | static char minunit_last_message[MINUNIT_MESSAGE_LEN]; 85 | 86 | /* Test setup and teardown function pointers */ 87 | static void (*minunit_setup)(void) = NULL; 88 | static void (*minunit_teardown)(void) = NULL; 89 | 90 | /* Definitions */ 91 | #define MU_TEST(method_name) static void method_name(void) 92 | #define MU_TEST_SUITE(suite_name) static void suite_name(void) 93 | 94 | #define MU__SAFE_BLOCK(block) do {\ 95 | block\ 96 | } while(0) 97 | 98 | /* Run test suite and unset setup and teardown functions */ 99 | #define MU_RUN_SUITE(suite_name) MU__SAFE_BLOCK(\ 100 | suite_name();\ 101 | minunit_setup = NULL;\ 102 | minunit_teardown = NULL;\ 103 | ) 104 | 105 | /* Configure setup and teardown functions */ 106 | #define MU_SUITE_CONFIGURE(setup_fun, teardown_fun) MU__SAFE_BLOCK(\ 107 | minunit_setup = setup_fun;\ 108 | minunit_teardown = teardown_fun;\ 109 | ) 110 | 111 | /* Test runner */ 112 | #define MU_RUN_TEST(test) MU__SAFE_BLOCK(\ 113 | if (minunit_real_timer==0 && minunit_proc_timer==0) {\ 114 | minunit_real_timer = mu_timer_real();\ 115 | minunit_proc_timer = mu_timer_cpu();\ 116 | }\ 117 | if (minunit_setup) (*minunit_setup)();\ 118 | minunit_status = 0;\ 119 | test();\ 120 | minunit_run++;\ 121 | if (minunit_status) {\ 122 | minunit_fail++;\ 123 | printf("F");\ 124 | printf("\n%s\n", minunit_last_message);\ 125 | }\ 126 | (void)fflush(stdout);\ 127 | if (minunit_teardown) (*minunit_teardown)();\ 128 | ) 129 | 130 | /* Report */ 131 | #define MU_REPORT() MU__SAFE_BLOCK(\ 132 | double minunit_end_real_timer;\ 133 | double minunit_end_proc_timer;\ 134 | printf("\n\n%d tests, %d assertions, %d failures\n", minunit_run, minunit_assert, minunit_fail);\ 135 | minunit_end_real_timer = mu_timer_real();\ 136 | minunit_end_proc_timer = mu_timer_cpu();\ 137 | printf("\nFinished in %.8f seconds (real) %.8f seconds (proc)\n\n",\ 138 | minunit_end_real_timer - minunit_real_timer,\ 139 | minunit_end_proc_timer - minunit_proc_timer);\ 140 | ) 141 | #define MU_EXIT_CODE minunit_fail 142 | 143 | /* Assertions */ 144 | #define mu_check(test) MU__SAFE_BLOCK(\ 145 | minunit_assert++;\ 146 | if (!(test)) {\ 147 | (void)snprintf(minunit_last_message, MINUNIT_MESSAGE_LEN, "%s failed:\n\t%s:%d: %s", __func__, __FILE__, __LINE__, #test);\ 148 | minunit_status = 1;\ 149 | return;\ 150 | } else {\ 151 | printf(".");\ 152 | }\ 153 | ) 154 | 155 | #define mu_fail(message) MU__SAFE_BLOCK(\ 156 | minunit_assert++;\ 157 | (void)snprintf(minunit_last_message, MINUNIT_MESSAGE_LEN, "%s failed:\n\t%s:%d: %s", __func__, __FILE__, __LINE__, message);\ 158 | minunit_status = 1;\ 159 | return;\ 160 | ) 161 | 162 | #define mu_assert(test, message) MU__SAFE_BLOCK(\ 163 | minunit_assert++;\ 164 | if (!(test)) {\ 165 | (void)snprintf(minunit_last_message, MINUNIT_MESSAGE_LEN, "%s failed:\n\t%s:%d: %s", __func__, __FILE__, __LINE__, message);\ 166 | minunit_status = 1;\ 167 | return;\ 168 | } else {\ 169 | printf(".");\ 170 | }\ 171 | ) 172 | 173 | #define mu_assert_int_eq(expected, result) MU__SAFE_BLOCK(\ 174 | int minunit_tmp_e;\ 175 | int minunit_tmp_r;\ 176 | minunit_assert++;\ 177 | minunit_tmp_e = (expected);\ 178 | minunit_tmp_r = (result);\ 179 | if (minunit_tmp_e != minunit_tmp_r) {\ 180 | (void)snprintf(minunit_last_message, MINUNIT_MESSAGE_LEN, "%s failed:\n\t%s:%d: %d expected but was %d", __func__, __FILE__, __LINE__, minunit_tmp_e, minunit_tmp_r);\ 181 | minunit_status = 1;\ 182 | return;\ 183 | } else {\ 184 | printf(".");\ 185 | }\ 186 | ) 187 | 188 | #define mu_assert_double_eq(expected, result) MU__SAFE_BLOCK(\ 189 | double minunit_tmp_e;\ 190 | double minunit_tmp_r;\ 191 | minunit_assert++;\ 192 | minunit_tmp_e = (expected);\ 193 | minunit_tmp_r = (result);\ 194 | if (fabs(minunit_tmp_e-minunit_tmp_r) > MINUNIT_EPSILON) {\ 195 | int minunit_significant_figures = 1 - log10(MINUNIT_EPSILON);\ 196 | (void)snprintf(minunit_last_message, MINUNIT_MESSAGE_LEN, "%s failed:\n\t%s:%d: %.*g expected but was %.*g", __func__, __FILE__, __LINE__, minunit_significant_figures, minunit_tmp_e, minunit_significant_figures, minunit_tmp_r);\ 197 | minunit_status = 1;\ 198 | return;\ 199 | } else {\ 200 | printf(".");\ 201 | }\ 202 | ) 203 | 204 | #define mu_assert_string_eq(expected, result) MU__SAFE_BLOCK(\ 205 | const char* minunit_tmp_e = expected;\ 206 | const char* minunit_tmp_r = result;\ 207 | minunit_assert++;\ 208 | if (!minunit_tmp_e) {\ 209 | minunit_tmp_e = "";\ 210 | }\ 211 | if (!minunit_tmp_r) {\ 212 | minunit_tmp_r = "";\ 213 | }\ 214 | if(strcmp(minunit_tmp_e, minunit_tmp_r) != 0) {\ 215 | (void)snprintf(minunit_last_message, MINUNIT_MESSAGE_LEN, "%s failed:\n\t%s:%d: '%s' expected but was '%s'", __func__, __FILE__, __LINE__, minunit_tmp_e, minunit_tmp_r);\ 216 | minunit_status = 1;\ 217 | return;\ 218 | } else {\ 219 | printf(".");\ 220 | }\ 221 | ) 222 | 223 | /* 224 | * The following two functions were written by David Robert Nadeau 225 | * from http://NadeauSoftware.com/ and distributed under the 226 | * Creative Commons Attribution 3.0 Unported License 227 | */ 228 | 229 | /** 230 | * Returns the real time, in seconds, or -1.0 if an error occurred. 231 | * 232 | * Time is measured since an arbitrary and OS-dependent start time. 233 | * The returned real time is only useful for computing an elapsed time 234 | * between two calls to this function. 235 | */ 236 | static double mu_timer_real(void) 237 | { 238 | #if defined(_WIN32) 239 | /* Windows 2000 and later. ---------------------------------- */ 240 | LARGE_INTEGER Time; 241 | LARGE_INTEGER Frequency; 242 | 243 | QueryPerformanceFrequency(&Frequency); 244 | QueryPerformanceCounter(&Time); 245 | 246 | Time.QuadPart *= 1000000; 247 | Time.QuadPart /= Frequency.QuadPart; 248 | 249 | return (double)Time.QuadPart / 1000000.0; 250 | 251 | #elif (defined(__hpux) || defined(hpux)) || ((defined(__sun__) || defined(__sun) || defined(sun)) && (defined(__SVR4) || defined(__svr4__))) 252 | /* HP-UX, Solaris. ------------------------------------------ */ 253 | return (double)gethrtime( ) / 1000000000.0; 254 | 255 | #elif defined(__MACH__) && defined(__APPLE__) 256 | /* OSX. ----------------------------------------------------- */ 257 | static double timeConvert = 0.0; 258 | if ( timeConvert == 0.0 ) 259 | { 260 | mach_timebase_info_data_t timeBase; 261 | (void)mach_timebase_info( &timeBase ); 262 | timeConvert = (double)timeBase.numer / 263 | (double)timeBase.denom / 264 | 1000000000.0; 265 | } 266 | return (double)mach_absolute_time( ) * timeConvert; 267 | 268 | #elif defined(_POSIX_VERSION) 269 | /* POSIX. --------------------------------------------------- */ 270 | struct timeval tm; 271 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) 272 | { 273 | struct timespec ts; 274 | #if defined(CLOCK_MONOTONIC_PRECISE) 275 | /* BSD. --------------------------------------------- */ 276 | const clockid_t id = CLOCK_MONOTONIC_PRECISE; 277 | #elif defined(CLOCK_MONOTONIC_RAW) 278 | /* Linux. ------------------------------------------- */ 279 | const clockid_t id = CLOCK_MONOTONIC_RAW; 280 | #elif defined(CLOCK_HIGHRES) 281 | /* Solaris. ----------------------------------------- */ 282 | const clockid_t id = CLOCK_HIGHRES; 283 | #elif defined(CLOCK_MONOTONIC) 284 | /* AIX, BSD, Linux, POSIX, Solaris. ----------------- */ 285 | const clockid_t id = CLOCK_MONOTONIC; 286 | #elif defined(CLOCK_REALTIME) 287 | /* AIX, BSD, HP-UX, Linux, POSIX. ------------------- */ 288 | const clockid_t id = CLOCK_REALTIME; 289 | #else 290 | const clockid_t id = (clockid_t)-1; /* Unknown. */ 291 | #endif /* CLOCK_* */ 292 | if ( id != (clockid_t)-1 && clock_gettime( id, &ts ) != -1 ) 293 | return (double)ts.tv_sec + 294 | (double)ts.tv_nsec / 1000000000.0; 295 | /* Fall thru. */ 296 | } 297 | #endif /* _POSIX_TIMERS */ 298 | 299 | /* AIX, BSD, Cygwin, HP-UX, Linux, OSX, POSIX, Solaris. ----- */ 300 | gettimeofday( &tm, NULL ); 301 | return (double)tm.tv_sec + (double)tm.tv_usec / 1000000.0; 302 | #else 303 | return -1.0; /* Failed. */ 304 | #endif 305 | } 306 | 307 | /** 308 | * Returns the amount of CPU time used by the current process, 309 | * in seconds, or -1.0 if an error occurred. 310 | */ 311 | static double mu_timer_cpu(void) 312 | { 313 | #if defined(_WIN32) 314 | /* Windows -------------------------------------------------- */ 315 | FILETIME createTime; 316 | FILETIME exitTime; 317 | FILETIME kernelTime; 318 | FILETIME userTime; 319 | 320 | /* This approach has a resolution of 1/64 second. Unfortunately, Windows' API does not offer better */ 321 | if ( GetProcessTimes( GetCurrentProcess( ), 322 | &createTime, &exitTime, &kernelTime, &userTime ) != 0 ) 323 | { 324 | ULARGE_INTEGER userSystemTime; 325 | memcpy(&userSystemTime, &userTime, sizeof(ULARGE_INTEGER)); 326 | return (double)userSystemTime.QuadPart / 10000000.0; 327 | } 328 | 329 | #elif defined(__unix__) || defined(__unix) || defined(unix) || (defined(__APPLE__) && defined(__MACH__)) 330 | /* AIX, BSD, Cygwin, HP-UX, Linux, OSX, and Solaris --------- */ 331 | 332 | #if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) 333 | /* Prefer high-res POSIX timers, when available. */ 334 | { 335 | clockid_t id; 336 | struct timespec ts; 337 | #if _POSIX_CPUTIME > 0 338 | /* Clock ids vary by OS. Query the id, if possible. */ 339 | if ( clock_getcpuclockid( 0, &id ) == -1 ) 340 | #endif 341 | #if defined(CLOCK_PROCESS_CPUTIME_ID) 342 | /* Use known clock id for AIX, Linux, or Solaris. */ 343 | id = CLOCK_PROCESS_CPUTIME_ID; 344 | #elif defined(CLOCK_VIRTUAL) 345 | /* Use known clock id for BSD or HP-UX. */ 346 | id = CLOCK_VIRTUAL; 347 | #else 348 | id = (clockid_t)-1; 349 | #endif 350 | if ( id != (clockid_t)-1 && clock_gettime( id, &ts ) != -1 ) 351 | return (double)ts.tv_sec + 352 | (double)ts.tv_nsec / 1000000000.0; 353 | } 354 | #endif 355 | 356 | #if defined(RUSAGE_SELF) 357 | { 358 | struct rusage rusage; 359 | if ( getrusage( RUSAGE_SELF, &rusage ) != -1 ) 360 | return (double)rusage.ru_utime.tv_sec + 361 | (double)rusage.ru_utime.tv_usec / 1000000.0; 362 | } 363 | #endif 364 | 365 | #if defined(_SC_CLK_TCK) 366 | { 367 | const double ticks = (double)sysconf( _SC_CLK_TCK ); 368 | struct tms tms; 369 | if ( times( &tms ) != (clock_t)-1 ) 370 | return (double)tms.tms_utime / ticks; 371 | } 372 | #endif 373 | 374 | #if defined(CLOCKS_PER_SEC) 375 | { 376 | clock_t cl = clock( ); 377 | if ( cl != (clock_t)-1 ) 378 | return (double)cl / (double)CLOCKS_PER_SEC; 379 | } 380 | #endif 381 | 382 | #endif 383 | 384 | return -1; /* Failed. */ 385 | } 386 | 387 | #ifdef __cplusplus 388 | } 389 | #endif 390 | 391 | #endif /* MINUNIT_MINUNIT_H */ 392 | -------------------------------------------------------------------------------- /docs/GlasImpl.md: -------------------------------------------------------------------------------- 1 | # Glas Runtime 2 | 3 | I hope to bootstrap the runtime swiftly! Unfortunately, I doubt I'll accomplish this. Thus, the pre-bootstrap implementation must perform adequately, and should be usable long term. A viable solution is to leverage JIT compilation. With this in mind, JVM and .NET are tempting targets. Alternatively, a low-level runtime like C is viable, write my own JIT and GC. At present, I lean towards the latter. 4 | 5 | A basic GC is not difficult to write. I can implement something simple to get started then return to it later. As for JIT, even a simple one will certainly be a pain to write. But I can still start with an interpreter. So, I propose to start with a C implementation and work from there. 6 | 7 | ## Data Representation 8 | 9 | It is feasible to allocate 'cons cells' the size of two pointers, like Scheme or Lisp, supporting branches and such. This would require uncomfortably squeezing a lot of data and GC logic into tagged pointers. Alternatively, we can support a more conventional tagged union, perhaps aligned to three (or four) pointers in size, providing plenty room for metadata per value. I'm inclined to the latter. 10 | 11 | ### Small Values 12 | 13 | We can squeeze many small values into an 8-byte pointer. 14 | 15 | * small bitstrings and leaf nodes 16 | * small binaries (up to 7 bytes) 17 | * small trees or shrubs? 18 | * small rational numbers? 19 | * 0..30 bit numerator 20 | * 1..30 bit denominator with implicit '1' bit prefix 21 | * cannot encode divide-by-zero, but not always normalized 22 | 23 | Assuming 8-byte alignment of pointers. 24 | 25 | lowbyte interpretation 26 | xxxxx000 pointer 27 | xxxxxx01 bitstring (0..61 bits) 28 | xxxxxx10 shrubs of 0,2,4..62 bits (see below) 29 | xxxxx011 small rationals 30 | nnn00111 binary (nnn: encodes 1 to 7 bytes) 31 | 11111111 special constants, e.g. claimed thunks 32 | 33 | 34 | ### Linearity and Ephemerality Header Bits 35 | 36 | To efficiently enforce linear types at runtime, we should maintain a linearity bit per allocation. We may similarly benefit from tracking ephemerality for escape analysis, to ensure data 'sealed' by a short-lived register is never stored to a longer-lived register. In the latter case, it seems sufficient to cover the RPC vs. database vs. runtime-instance vs. transaction-local cases, and simply not dynamically detect issues like escape between %local frames within a transaction or runtime. 37 | 38 | These bits can be constructed as a simple composition of the same header bits in component data. 39 | 40 | ### Binaries and Arrays 41 | 42 | Might be worth having a couple options here, based on whether we want to allocate in-arena or outside of it. Will need to consider my options. Might heuristically keep smaller binaries or arrays within the arena. Support for slices would be good, too. 43 | 44 | ### Shrubs 45 | 46 | We can encode small trees as a bitstring: 47 | 48 | 00 - leaf 49 | 01 - branch (left tree) (right tree) 50 | 10 - left stem (tree) 51 | 11 - right stem (tree) 52 | 53 | We can pad with '00'. 54 | 55 | The advantage of shrubs is the ability to encode a complex branching structure without pointers. The disadvantage is the lack of indexing, i.e. 'unpair' takes time linear in the size of the left tree. Despite this limitation, I think it would be very useful for in-pointer encodings. For larger structures, we could use a glob encoding for similar benefits. 56 | 57 | ### Globs 58 | 59 | A viable feature is to support 'globs' where we directly represent a glas data structure as a binary. This doesn't even need a separate header, instead tag small or large binary representations. An advantage is that a glob may be far more compact for complex tree structures. A disadvantage is that we cannot easily 'slice' a glob, we can only index into one, i.e. a complete glob has a 60 | 61 | ### Thunks 62 | 63 | I'll need lazy evaluation for at least the namespace layer. I eventually also want in the program layer, e.g. as an annotation to run 'pure' functions. 64 | 65 | I'll probably want to model explicit thunks for at least some use cases, such as lazy eval of the namespace. I'll likely need two types of thunks: one for program model, another for namespace layer. Due to the distinct program types. 66 | 67 | Ideally, GC may recognize and collapse completed thunks. GC could also recognize and complete 'selector' thunks, e.g. accessing a particular definition from an environment, or a particular field from a dict, when it is available. 68 | 69 | Waiting on thunks needs attention. In context of transactions, especially, a wait may be interrupted because we decide to backtrack. This requires some robust way to represent waits that can be canceled. Tying back directly to the host is probably a bad idea. An intermediate heap object that we can GC might work, at the cost of adding an allocation for every thunk wait. 70 | 71 | ## Note on callbacks 72 | 73 | Use `pthread_cleanup` to properly handle when a callback closes an OS thread, especially for callbacks into user code. 74 | 75 | ## TBD 76 | 77 | Try to get raylib and GUI FFIs working ASAP. 78 | 79 | ## Transactional Registers 80 | 81 | It isn't difficult to ensure updates across multiple registers are consistent via mutex for writes. But ensuring isolated reads, and thus ensuring a read-only transaction never aborts, is a bit more difficult. What are our options? 82 | 83 | One idea is horizontal versions. 84 | - A version has associated, sealed data for each register updated 85 | - A register has a strong reference to the most recent version 86 | - A version may have a weak-ref to previous version per register 87 | 88 | That is, so long as a register survives, its most recent version of data also survives. So long as a version survives, we can also access data from that version across all surviving registers. The cost is a fair bit of indirection. 89 | 90 | How does this help? We could test for consistency when first reading a register, i.e. check that there are no missed updates to vars previously read within the same transaction. This makes register reads relatively expensive, i.e. upon a transaction bringing a version into scope, it may invalidate prior reads to other registers, or we can opportunistically read an older version where there is no conflict (if it hasn't been garbage-collected yet). 91 | 92 | This makes reads more expensive, but it isn't too bad. 93 | 94 | We could optionally remove the data directly, instead a version just tracks which registers were updated, and perhaps the nature of those updates. This would prevent us from switching to older versions of data to allow a transaction to complete, but it would reduce indirection and simplify GC. 95 | 96 | 97 | 98 | 99 | 100 | A "version" captures updates to multiple registers, and the most recent version for each register is strongly referenced, but the version itself only has weakrefs back to the registers. 101 | 102 | One idea is to 103 | 104 | Atomic reads also need some attention: ideally, we can operate on a snapshot view of all 105 | registers, i.e. such that we never observe inconsistent states and abort read-only transactions. Perhaps we could capture version numbers within each register instead of data. 106 | 107 | 108 | 109 | 110 | 111 | 112 | ## Garbage Collection 113 | 114 | ### Allocators and Thread Local Storage 115 | 116 | I want nursery-style allocations, but I don't want a lot of fine-grained nurseries for `glas*` objects. A viable approach is to use thread-local storage for 'affinity' when allocating, i.e. where the `glas*` thread allocates depends on which OS thread it's using. 117 | 118 | By keeping a linked list of thread-local structures, when GC is performed, it can fetch back all the nurseries and force threads to grab new ones on their next allocation. 119 | 120 | Aside from allocators, TLS may prove convenient for write barriers, e.g. keeping a `glas_gc_scan*` per OS thread to avoid contention on a global scan list. Maybe move the semaphore here, too. 121 | 122 | ### Finalizers 123 | 124 | It is necessary to precisely recognize finalizers in the heap. However, I cannot afford a linked list, nor even an additional mark bit per object, so I'm thinking to use cards to track finalizers to a small region (e.g. 128 bytes, a quarter-bit per object), then rely on gcbits to mark unexecuted finalizers more precisely. 125 | 126 | 127 | 128 | ### Snapshot at the Beginning? 129 | 130 | Some thoughts on adapting SATB to glas. 131 | 132 | GHC's GC uses a 'snapshot at the beginning' (SATB) strategy for garbage collection. The idea, IIUC, is that a collection should only sweep garbage that was present (i.e. was already garbage) when the collection started. This ensures the collection is consistent with performing a full GC under stop-the-world. 133 | 134 | The trick is to extend to concurrent marking and mutation. New allocations obviously create new garbage. But we must touch none of that until a future GC. Mutations can transform older allocations into new garbage. But we must GC as if no mutation occurred. 135 | 136 | This will be supported by a write barrier. When the write barrier sees a mutation on a slot, it must take action depending on whether it's the first mutation on that slot in the GC cycle. If so, copy prior value, atomically claim the scan, write new value, then if the claim was successful, add prior value to a scan buffer. This is our basic barrier for SATB. Otherwise, just update. 137 | 138 | But now we have a new problem: How does the write barrier know whether a specific slot has been scanned? Some options: 139 | 140 | * hashtable - unpredictable size, poor locality, easy to understand 141 | * bitmaps - predictable size, good locality, easy to understand, but 1.6% memory overhead 142 | * other ideas? 143 | 144 | I lean towards bitmaps in this role. We'll need 1 bit per 8-byte slot (1.6% overhead, unless we can roll it into currently wasted space) for tracking scans. For a `glas_cell` we have a 32-bit header per 32-byte object, of which only 24 bits are currently assigned. I can dedicate 3 bits to support 3 slots per cell. For a `glas_thread` we could add an `_Atomic(uint64_t)* scan_bitmap` to every thread, allocating based on a thread's root offsets. If we allocate arrays, we could allocate extra space for scan bits walking backwards from the array data. 145 | 146 | We flip every GC cycle whether we interpret '1' as scanned or '0' as scanned. At the start of marking, all 'live' slots must begin with the same mark. Thus, slots for new allocations must also be marked as scanned. This is doable. For new cells, we can add bits from runtime-global state to our initial header, flipping this interpretation only while stopped. For new threads, we might need to block the runtime from flipping scan bits while threads are initializing, but a simple counter and semaphore is adequate. 147 | 148 | With this design, we'll need a separate 'write' function for registers, threads, and mutable arrays. These write functions can handle the write barrier, whether it's by GC flipping a `void (*write_reg)(glas_cell* reg, glas_cell* newval)` while GC is stopped, or branching on GC state. (I have no intuition for which option would perform better.) 149 | 150 | Some notes: 151 | 152 | * I still favor bump-pointer allocation, but I'm not married to it. This design doesn't rely on location to distinguish cells as 'new', thus I can also use fragmented allocation, e.g. using the prior marks. Mixed allocation modes are feasible. 153 | * New registers, thunks, etc. are initialized as 'scanned', thus don't need special handling within a GC cycle. 154 | * When writing to a slot that initially contains NULL - or perhaps small constant data embedded into pointer bitfields - pushing the prior value to a scan buffer becomes dropping the prior value. 155 | 156 | ### Compacting old gen? 157 | 158 | I'd like to occasionally compact a subset of old pages, e.g. to free up some heap address space. But at the moment, I don't have a good solution for moving data from old pages without breaking old-to-old links. 159 | 160 | One viable option is to track old-to-old references in cards, or perhaps to build up a special card for ref-to-compacting when performing marks. Or perhaps, while marking, we could record the 161 | 162 | 163 | // Problem: I want to compact a subset of old pages, but I cannot 164 | // find old-to-old references. One option is intra-gen cards, but 165 | // that adds overhead to a lot of allocations (whereas old-to-young 166 | // only impacts mutations, ) 167 | 168 | 169 | ### Arrays 170 | 171 | We can model big arrays, mutable or otherwise, as a foreign pointer to some `glas_roots*`. But use of roots in this role has a 25% overhead for the root offsets and may touch unreachable slices. 172 | 173 | We can simplify by focusing on immutable arrays, which don't need old-to-young tracking. Later, if I need mutable arrays, I can develop a dedicated structure. 174 | 175 | ### Thread Roots 176 | 177 | In context of concurrent marking, we ideally can capture thread roots efficiently. With a compacting GC, we might also move them around. 178 | 179 | One option is to simply encode thread roots as an array of sorts. The thread could flexibly use this array as structures and stacks, whatever it needs. The obvious weakness is that big arrays will take a while to scan, and threads are paused during scan. 180 | 181 | To mitigate, we could present roots as mutable data within the heap. But this requires too much structure. 182 | 183 | A viable alternative is to limit threads to, say, 32 roots. This ensures bounded initial scan work per thread. Threads themselves then decide how (and whether) to use these 'registers'. But this is constraining for no great reason: I'm the only one implementing various kinds of threads! 184 | 185 | Let's assume a `glas_thread` is wrapped in a larger structure, which provides a purpose-specific array or structure of registers. The thread points to this structure and specifies its size, and we insist its size is constant - never changes. GC is also responsible for destroying these threads, i.e. no risk of client removing roots while processing. This seems feasible. 186 | 187 | ### Thunks 188 | 189 | Thunks need special attention. Initially, a thunk must contain some representation of a computation. Later, some thread will 'claim' the thunk. Other threads may await the thunk, which requires some careful integration: 190 | 191 | - in context of transactions, the operation waiting on a thunk, or even the one performing it, may be aborted before the wait completes. Programmers can mitigate by designing tiny thunks. 192 | - to keep it simple, we can restrict thunks within programs to pure data-stack manipulation, i.e. thunk with arity. 193 | - threads waiting on a thunk should also get busy! thunks beget thunks, and they can start doing some work without waiting for just one thread to finish everything. But we'll need some way to identify which thunks are needed ahead of request. 194 | 195 | -------------------------------------------------------------------------------- /docs/GlasObject.md: -------------------------------------------------------------------------------- 1 | # Glas Object 2 | 3 | Glas Object, or 'glob', is a compact, indexed binary representation for tree-structured data. Primary use cases are data storage, communication, and caching. The focus is representation of dictionaries (radix trees) and lists (arrays, binaries, finger tree ropes), and structure sharing. 4 | 5 | Glas Object is intended to work well with content-addressed storage. A glob can reference other globs by secure hash (SHA3-512), and can work together with proxy caching via content delivery networks. However, it is also feasible to use globs within the glas module system, with access to other modules and procedural generation. This is supported by abstracting the *external reference* type. 6 | 7 | ## Desiderata 8 | 9 | * *indexed* - Data access to specific elements of lists and dicts does not need to parse or scan unrelated regions of the glob file. Dictionaries can be organized as radix trees. Lists can be organized as finger trees or ropes. 10 | * *compact* - Common data and binaries can be efficiently embedded. Can easily share common structure within a glob. 11 | * *scalable* - Larger-than-memory values can be loaded partially, leaving most data in content-addressed storage. All sizes use variable-sized numbers. Content-addressed storage is persistent, can support databases. 12 | * *simple* - Should not have a complicated parser. Composition and decomposition should be easy. A basic writer (i.e. without optimization passes) should be easy. 13 | * *extensible* - Space for new data types or representations to meet future needs. 14 | 15 | ## Encoding 16 | 17 | A parser for Glas Object will first read a header byte that indicates how to read the remainder of a node. The first byte of a glob binary is the 'root value' for the glob, and all relevant data in the glob must be accessible from the root. 18 | 19 | ### Basic Data 20 | 21 | Glas data is binary trees. Glas Object distinguishes leaves, stems, and branches. 22 | 23 | * *leaf* - a terminal node in the tree. 24 | * *stem* - a tree node with one child. 25 | * *branch* - a tree node with two children. 26 | 27 | Glas Object uses stems heavily to encode bitstrings. Numbers, symbols, etc. are encoded into as bitstrings. Thus, compaction of stems is essential. Additionally, we could save some bytes (one byte per symbol, number, or branch within a radix tree) by merging stem-leaf and stem-branch. 28 | 29 | The proposed encoding for Basic Nodes consumes 96 header types, and supports flexible encoding of large bitstring fragments. 30 | 31 | ttt0 abc1 - 3 stem bits in header 32 | ttt0 ab10 - 2 stem bits in header 33 | ttt0 a100 - 1 stem bits in header 34 | ttt0 1000 - 0 stem bits in header 35 | ttt0 0000 - reserved 36 | 37 | ttt1 fnnn . (bytes) - stems of 4 to 64 bits 38 | f - full or partial first byte (0 - partial) 39 | nnn - 1-8 bytes, msb to lsb 40 | 41 | partial first byte - encodes 1 to 7 bits 42 | abcdefg1 - 7 bits 43 | abcdef10 - 6 bits 44 | ... 45 | a1000000 - 1 bits 46 | 10000000 - 0 bits (unused in practice) 47 | 00000000 - unused 48 | 49 | ttt values: 50 | 001 - Stem-Leaf and Leaf (0x28) Nodes 51 | 010 - Stem Nodes and Nop (0x48) 52 | header . (child value) 53 | 011 - Stem-Branch and Branch (0x68) Nodes 54 | header . (offset to right child) . (left child) 55 | 56 | This compactly encodes symbols, numbers, composite variants, radix trees, etc.. 57 | 58 | *Note:* I've dropped support for shared stems via `ttt0 0000` because it was messy and inefficient. Currently there is no option to share common stems. But templated data could possibly fill this role (see *Potential Future Extensions*). 59 | 60 | ### Lists 61 | 62 | In glas systems, lists are conventionally encoded in a binary tree as a right-spine of branch nodes (pairs), terminating in a leaf node (unit value). This is a Lisp-like encoding of lists. 63 | 64 | /\ type List = (Value * List) | () 65 | a /\ 66 | b /\ a list of 5 elements 67 | c /\ [a, b, c, d, e] 68 | d /\ 69 | e () 70 | 71 | However, direct representation of lists is inefficient for many use-cases. Thus, glas runtimes support specialized representations for lists: binaries, arrays, and [finger-tree](https://en.wikipedia.org/wiki/Finger_tree) [ropes](https://en.wikipedia.org/wiki/Rope_(data_structure)). To protect performance, Glas Object also offers specialized list nodes: 72 | 73 | * *array* - header (0x0A) . (length - 1) . (array of offsets); represents a list of values of given length. Offsets are varnats, denormalized to the same width, all relative to the very last offset in the array. Width of offsets in the array is determined by looking at the first varnat. 74 | * *binary* - header (0x0B) . (length - 1) . (bytes); represents a list of bytes. Each byte represents a small, non-negative integer, 0..255. 75 | * *concat* - header (0x0C) . (offset to right value) . (left list); represents logical concatenation, substituting left list terminal with given right value (usually another list). 76 | * *drop* and *take* - see *Accessors*, support sharing slices of a list 77 | 78 | See *Encoding Finger-Tree Ropes* for a pattern to leverage concat effectively. Of course, the system is free to convert ropes into larger arrays or binaries if it doesn't need fine-grained structure sharing of list fragments within or between globs. 79 | 80 | ### External References 81 | 82 | External references are primarily intended for references between globs. 83 | 84 | * *external ref* - header (0x02) followed by a reference value. A reference value must be recognized as representing another value in context. We can logically substitute the external reference with the referenced value. 85 | 86 | Reference values in context of content-addressed storage: 87 | * *glob:SecureHash* - reference to content-addressed glob. SecureHash is usually a 64-byte binary representing the SHA3-512 of an external binary. 88 | * *bin:SecureHash* - reference to content-addressed binary data. Same SecureHash as for globs, but the referent is loaded as a binary instead of parsed as a glob. 89 | 90 | External references generalize as a *contextual extension* mechanism for Glas Object. For example, in context of a module system, we might use *local:ModuleName* and *global:ModuleName* instead of content-addressed *glob* and *bin* references. In context of streaming data or templates, we might introduce *var:Nat* to represent data that will arrive later in the stream or perhaps upon demand. 91 | 92 | *Note:* Establishing and maintaining the context is rarely free. Effective support for external references may involve access tokens for a [CDN](https://en.wikipedia.org/wiki/Content_delivery_network), protocols for content negotiation (analogous to HTTP Accept header), reference validation overheads, and so on. 93 | 94 | ### Internal References 95 | 96 | We can forward reference within a glob file. 97 | 98 | * *internal ref* - header (0x88) . (offset); i.e. the whole-value *accessor*. 99 | 100 | Internal references are mostly useful to improve structure sharing or compression of data. Also useful for the *Glob Headers* pattern, where a glob binary starts with an internal ref. 101 | 102 | ### Accessors 103 | 104 | Accessors support fine-grained structure sharing that preserves indexability and works in context of content-addressed storage. Essentially, we support slicing lists and indexing into records. 105 | 106 | * *path* - headers (0x80-0x9F) . (offset); uses stem-bit header (ttt=100) to encode a bitstring path. Equivalent to following that path into the target value as a radix tree. 107 | * *drop* - header (0x0D) . (length) . (offset to list); equivalent to path of length '1' bits. 108 | * *take* - header (0x0E) . (length) . (inline list value); equivalent to sublist of first length items from list. Although useful to slice lists, this is heavily used to cache list lengths for ropes. 109 | * *Note:* you can still reference a list 'inline' via internal reference (header 0x88). But the common use of take for caching rope sizes saves us by avoiding the unnecessary offset in most cases. 110 | 111 | Indexing a list is possible via composition of path and drop, but it shouldn't be needed frequently, so it isn't optimized. 112 | 113 | ### Annotations 114 | 115 | Support for ad-hoc comments within Glas Object. 116 | 117 | * *annotation* - header (0x01) . (offset to data) . (metadata); the metadata may be an arbitrary value. 118 | 119 | In practice, annotations at the glas object layer are written by a runtime when it's storing data then read by the runtime when it's loading data. Potential use cases include hints for accelerated runtime representations and tracking dataflow while debugging. User programs can potentially access these annotations via runtime reflection APIs. However, it's usually wiser to model annotations in the data layer if possible. 120 | 121 | ### Accelerated Representations 122 | 123 | We can extend *external references* to support logical representations of data. In this case, the reference contains all the information we need, but not in canonical form. For example, an unboxed floating point matrix might be represented as: 124 | 125 | (0x02) . matrix:(dim:[200,300], type:f32, data:Binary) 126 | 127 | When translated to canonical form, this might translate to a list of lists of 32-bit bitstrings. But a runtime could potentially use the unboxed representation directly. 128 | 129 | We can potentially introduce many more variants to support graphs, sets, etc.. And even matrices might benefit from logical transposition, lazy multiplication, etc.. This complicates content negotiation and the runtime. If parties fail to agree to an accelerated representation, they can still construct the canonical representation and add *annotations* they know to read the data back as a matrix. Of course, if conversion is very expensive, the transaction might be aborted on quota constraints. 130 | 131 | Eventually, as accelerated representations achieve status as de-facto standards, we can contemplate assigning dedicated headers in Glas Object to save a few bytes. 132 | 133 | ## Varnats, Lengths, and Offsets 134 | 135 | A varnat is encoded with a prefix '1*0' encoding a length in bytes, followed by 7 data bits per prefix bit, msb to lsb order. For example: 136 | 137 | 0nnnnnnn 138 | 10nnnnnn nnnnnnnn 139 | 110nnnnn nnnnnnnn nnnnnnnn 140 | ... 141 | 142 | In normal form, varnats use the smallest number of bytes to encode a value. It isn't an error to use more bytes, just not very useful within an immutable binary. In some where the minimum value is one instead of zero, we'll encode one less such that a single byte can encode 1 to 128. 143 | 144 | ## Summary of Node Headers 145 | 146 | 0x00 (never used) 147 | 0x01 Annotation 148 | 0x02 External Ref 149 | 150 | 0x0A Array 151 | 0x0B Binary 152 | 0x0C Concat 153 | 0x0D Drop 154 | 0x0E Take 155 | 156 | 0x20-0x3F Stem-Leaf and Leaf (0x28) 157 | 0x40-0x5F Stem Nodes (and Nop - 0x48) 158 | 0x60-0x7F Stem-Branch and Branch (0x68) 159 | 0x80-0x9F Index Path and Internal Ref (0x88) 160 | 161 | CURRENTLY UNUSED: 162 | 0x03-0x09 163 | 0x0F-0x1F 164 | 0xA0-0xFF 165 | 166 | PROPOSED: 167 | 0xA0-0xBF Small binaries (1-32 bytes) 168 | 169 | 170 | ## Conventions and Patterns 171 | 172 | Some ideas about how we might leverage Glas Object for more use cases. 173 | 174 | ### Encoding Finger Tree Ropes 175 | 176 | It is feasible to combine list-take (Size) and concatenation nodes in a way that provides hints of finger-tree structure to support tree-balancing manipulations. 177 | 178 | Concat (L1 ++ L2) 179 | Take (Size . List) 180 | 181 | Digit(k) 182 | k = 0 183 | Array 184 | Binary 185 | k > 0 186 | Larger Array or Binary (heuristic) 187 | Size . Node(k-1) 188 | Node(k) - two or three concatenated Digit(k) 189 | Digits(k) - one to four concatenated Digit(k) 190 | LDigits(k) - right assoc, e.g. (A ++ (B ++ C)) 191 | RDigits(k) - left assoc, e.g. ((A ++ B) ++ C) 192 | Rope(k) 193 | Empty Leaf 194 | Single Array | Binary | Node(k-1) 195 | Many Size . (LDigits(k) ++ (Rope(k+1) ++ RDigits(k))) 196 | 197 | This structure represents a 2-3 finger-tree rope, where '2-3' refers to the size of internal nodes. It is possible that wider nodes and more digits would offer superior performance, but most gains will likely be due to favoring larger binary or array fragments. 198 | 199 | ### Glob Headers 200 | 201 | As a simple convention, a glob binary that starts with an internal reference (0x88) is considered to have a header. The header should also be glas data, typically a record of form `(field1:Value1, field2:Value2, ...)`. 202 | 203 | 0x88 (offset to data) (header) (data) 204 | 205 | A header can be considered an annotation for the glob binary as a whole. Potential use cases include adding provenance metadata, glob extension or version information, or entropy for a convergence secret. 206 | 207 | ### Data Validation 208 | 209 | Validation of glob binaries can be expensive in context of very large data or accelerated representations. Nonetheless, it should be performed before we commit potentially invalid data into a database. That's our last good opportunity to abort a transaction without risk of long-term corruption. 210 | 211 | To mitigate validation overheads, a runtime might implicitly trust hashes it learns about from a trusted database or CDN. This trust would be expressed in the runtime configuration. Additionally, we can leverage glob headers or annotations to include proof hints or cryptographic signatures. Proof hints can reduce the cost to re-validate, while signatures might indicate a party you trust already performed the validation. 212 | 213 | ### Deduplication and Convergence Secret 214 | 215 | It is possible for a glas system to 'compress' data by generating the same glob binaries, with the same secure hash. This is mostly a good thing, but there are subtle attacks and side-channels. These attacks can be greatly mitigated via controlled introduction of entropy, e.g. [Tahoe's convergence secret](https://tahoe-lafs.readthedocs.io/en/latest/convergence-secret.html). 216 | 217 | ### Canonicalization 218 | 219 | Glas Object does not enforce strict canonicalization. Different writers may make different chunking and rope balancing decisions, which leads to different binary layouts and therefore different content hashes. What matters is *stability*: given the same runtime and heuristic configuration, encodings should be deterministic. Applications that require strict canonical hashes (e.g. cryptographic signatures) should define a higher‑level canonicalization pass on top of Glas Object. 220 | 221 | ## Proposed Extensions 222 | 223 | ### Small Arrays and Binaries 224 | 225 | We could encode length for small arrays or binaries directly in the header, e.g. `0xA(len)` for arrays and `0xB(len)` for binaries of lengths 1 to 16. However, it isn't clear that this is worthwhile, especially for arrays. Perhaps instead use 0xA0-0xBF all for binaries. 226 | 227 | ### Inline Arrays 228 | 229 | We could directly encode data within array slots of fixed sizes instead of offsets. Offsets would still be available via 0x88. The main advantage would be to eliminate the indirection overhead for arrays of smaller values. 230 | 231 | ### Log-Structured Merge Tree 'Updates' 232 | 233 | It is feasible to record 'patches' to lazily apply into a tree or list. This can be more efficient than use of Accessors to logically 'rebuild' a tree or list. 234 | 235 | ### Templated Structs 236 | 237 | Encode structures as an array-like structure where the header describes labels separately from the data. This allows reuse of labels for a common use case. 238 | 239 | -------------------------------------------------------------------------------- /docs/GlasNamespaces.md: -------------------------------------------------------------------------------- 1 | # Glas Namespaces 2 | 3 | In my vision for glas systems, huge namespaces define runtime configuration, shared libraries, and applications. Definitions can be distributed across dozens of DVCS repositories, referencing stable branches or hashes for horizontal version control. We rely on laziness to load and extract only what we need, and incremental compilation to reduce rework. 4 | 5 | ## Design Overview 6 | 7 | Lambda calculus can express namespaces, e.g. a `let x = X in Expr` becomes `((λx.Expr) X)`. However, namespaces are second-class in the lambda calculus. I propose to extend lambda calculus with first-class environments. 8 | 9 | First, we can reify the environment. The program can generate an abstract record `{ "x" = x, "y" = y, ... }` for all names in scope. For binding, we can introduce `((ν"Prefix".Body) Env)`, binding "PrefixName" in Body to "Name" in Env. This shadows a prefix like lambdas shadow names. A simple record accessor can be expressed via empty prefix `((ν.name) Env)`. For control, we introduce translations that modify a subprogram's view of its current environment, also influencing the reified environment key names. 10 | 11 | Lambda Calculus: 12 | 13 | * *application* - provide an argument 14 | * *arg binding* - bind argument to name in body 15 | * *name* - substitute definition of name in scope 16 | 17 | Namespace extensions: 18 | 19 | * *reification* - capture current view of environment 20 | * *env binding* - bind environment to prefix in body 21 | * *translation* - modify body's view of environment 22 | 23 | Utility extensions: 24 | 25 | * *annotations* - structured comments for instrumentation, optimization, verification 26 | * *data* - opaque to the calculus, but embedded for convenience 27 | * *ifdef* - flexible expression of defaults, optional defs, merge-union, mixins 28 | * *fixpoint* - a built-in fixpoint for lazy, recursive defs 29 | 30 | The runtime provides an initial environment of names, supporting a [program model](GlasProg.md) and various *Module* conventions (e.g. %src, %env.\*, %self.\*). 31 | 32 | ## Abstract Syntax Tree (AST) 33 | 34 | Namespaces encoded as structured glas data. This serves as an intermediate representation for namespaces and programs in glas systems. 35 | 36 | type AST = 37 | | Name # substitute definition 38 | | (AST, AST) # application 39 | | f:(Name, AST) # bind name in body, aka lambda 40 | | e:() # reifies current environment 41 | | b:(Prefix, AST) # bind argument to prefix in body 42 | | t:(TL, AST) # modify body's view of environment 43 | | a:(AST, AST) # annotation in lhs, target in rhs 44 | | d:Data # embedded glas data, opaque to AST 45 | | c:(Name,(AST,AST)) # ifdef conditional expression 46 | | y:AST # built-in fixpoint combinator 47 | type Name = binary excluding NULL 48 | type Prefix = any binary prefix of Name 49 | type TL = Map of Prefix to (Optional Prefix) as radix-tree dict 50 | 51 | The AST representation does not include closures, thunks, reified environments, etc. necessary for intermediate steps during evaluation. Those shall have ad hoc, abstract, runtime-specific representations. 52 | 53 | ## Evaluation 54 | 55 | Evaluation of an AST is a lazy, substitutive reduction in context of an environment that maps a subset of names to definitions (i.e. `type Env = Name -> optional AST` with caching). In most context, the initial environment is empty, thus insisting that AST terms are 'closed' or combinators, often of type `Env -> Env`. 56 | 57 | * application, lambdas, and names: as lazy lambda calculus evaluation 58 | 59 | * translation `t:(TL, Body)` - translates Body's view of names in the current environment through TL. Without reification, translation can serves a role for import aliases and access control. Translation becomes semantically significant insofar as it influences reification. 60 | * reification `e:()` - returns an abstract `Env` representing all names in scope, i.e. `{ "x" = x, "y" = y, ...}`, albeit lazily constructed. 61 | * Empty environment can be expressed as `t:({ "" => NULL }, e:())`. 62 | * Specified prefix can be selected by `t:({ "" => Prefix }, e:())`. 63 | * env binding - when applied `(b:(Prefix,Body), Env)`, binds Env to Prefix context of evaluating Body. That is, PrefixName now refers to Name in Env if defined. 64 | * Patch-based semantics: if Name is not defined in Env, fall back to prior definition. 65 | * Consider `t:({ Prefix => NULL }, b:(Prefix, Body))` to clear prefix before binding. 66 | * Record-selector pattern is `t:({""=>NULL}, b:("", Name))`, Env as first-class dict. 67 | 68 | * annotations `a:(Anno, Target)` - Semantically inert: logically evaluates as Target. In practice, we evaluate Anno to an abstract Annotation using compiler-provided Annotation constructors - by convention `%an.name` or `(%an.ctor Args)`. We then use this Annotation to guide instrumentation, optimization, or verification of Target. 69 | * data `d:Data` - evaluates to itself. Abstract to AST evaluation, but may be observed when applying primitive Names. 70 | * ifdef `c:(Name, (L, R))` - evaluates to L if Name is defined in current environment, otherwise R. 71 | * fixpoint - built-in fixpoint for convenient expression and efficient evaluation 72 | 73 | 74 | ### Reference Implementation 75 | 76 | TBD. Will write something up in Haskell or F# and get back on this. 77 | 78 | Some requirements or desiderata: 79 | 80 | * Lazy evaluation in general, necessary for lazy loading etc. 81 | * Memoized lazy partial evaluation within function bodies. 82 | 83 | Performance at this layer isn't essential for my use case, though no reason to avoid performance if it's a low-hanging fruit. But I'll be mostly focusing on the Program layer for performance of glas systems. 84 | 85 | ## Translation 86 | 87 | TL is a finite map of form `{ Prefix => Optional Prefix }`. To translate a name via TL, we find the longest matching prefix, then rewrite that to the output prefix. Alternatively, if the rhs has no prefix, we'll treat the name as undefined. 88 | 89 | The TL type works best with prefix-unique names, where no name is a prefix of another name. Consider that TL `{ "bar" => "foo" }` will convert `"bard"` to `"food"`, and it's awkward to target 'bar' alone. To mitigate, we logically add suffix `".."` to all names, and front-end syntax will discourage `".."` within user-defined names. The combination of logical suffix and front-end support allows translation of 'bar' together with 'bar.\*' via `{ "bar." => "foo." }` or 'bar' alone via `{ "bar.." => "foo.." }`. There is a possibility of translation *removing* the suffix that should be handled correctly by an evaluator's internal representation of names or environments. 90 | 91 | Sequential translations can be composed into a single map. Rough sketch: to compute A followed-by B, first extend A with redundant rules such that output prefixes in A match as many input prefixes in B as possible, then translate A's outuput prefixes as names (longest matching prefix). To normalize, optionally erase new redundancy. 92 | 93 | ## Loading Files 94 | 95 | Files are accessible through a few provided names. Loading files is staged, separate from runtime execution of the application program unless dynamic %eval is also enabled. 96 | 97 | * `(%load Src) : d:Data` - loads external resources at compile-time, returning opaque data. This operation may diverge if Src is malformed or unreachable. 98 | * `%src : Src` - by convention, this Src represents the file currently being compiled. It is expected that relative Src constructors take this Src as the root. 99 | * `%src.*` - constructors for abstract Src data. All constructors are relative to another Src. 100 | * `(%src.file FilePath Src) : Src` - evaluates to an abstract Src representing a FilePath relative to another Src. When loaded, returns an *optional binary*, treating 'does not exist' as a valid state. For other errors (e.g. unreachable, permissions issues) the loader diverges and logs a compile-time error. 101 | * Note: glas systems forbid `"../"` relative paths and absolute paths relative to relative paths. See *Controlling Filesystem Entanglement* for motivations. 102 | * Note: absolute paths are still contextually relative, e.g. absolute paths within DVCS repository are relative to repository root. However, initial 103 | * `(%src.dvcs.git URI Ver Src) : Src` - returns a Src representing a DVCS git repository. If loaded, returns unit. Use '%src.file' to access files starting at repo root. 104 | * `(%src.an Annotation Src) : Src` - here Annotation is represented by embedded data. It is not observed by the loader, but is available later in context of runtime reflection on sources, e.g. `sys.refl.src.*`. 105 | * `(%macro Program) : AST` - evaluate a pure, 0--1 arity program that returns an AST representation. The latter is validated, evaluated in an empty namespace (i.e. implicit `t:({ "" => NULL }, AST)` wrapper), then substituted for the macro node. Linking is stage-separated from macro eval, e.g. the returned AST typically has a type such as `Env -> Env` and expects a parameter for linking. 106 | 107 | It is feasible to extend Src, e.g. to include HTTP resources or content-addressed data. I might add Mercurial and other DVCS sources. But the above should be adequate for my initial use cases. 108 | 109 | It is feasible to relax determinism, e.g. %load and %macro can have non-deterministic outcomes in the general case. But it is difficult to efficiently evaluate a large, non-deterministic namespace. In practice, macros and sources should be deterministic, and we'll warn in most contexts if any non-determinism is detected. 110 | 111 | ### Controlling Filesystem Entanglement 112 | 113 | Many programming languages allow messy, ad hoc relationships between module system and filesystem, e.g. with "../" paths, absolute file paths, etc.. Glas systems restrict these relationships in order to simplify refactoring, extension, and sharing of code. 114 | 115 | First, we forbid parent-relative (`"../"`) paths in Src constructors, and absolute file paths may only be constructed relative to other absolute file paths (or DVCS repo root). This ensures folders can generally be treated as independent packages, easily shared by copying, excepting very few 'toplevel' folders (e.g. for the user configuration) which may reference other absolute paths within the local filesystem. 116 | 117 | Second, we hide files and subfolders whose names start with `"."`. For example, if a front-end compiler requests file `".git/config"` from a DVCS repo, this file is treated as non-existent regardless of whether it exists. This matches conventions for hidden or associated structure in the filesystem. Although front-end compilers cannot see these files and folders, a runtime may recognize and utilize `".glas/"` and `".pki/"` and so on for incremental compilation, signed manifests and certificates, etc.. 118 | 119 | Third, a front-end compiler cannot browse folders, cannot query folder contents. Although ability to browse is convenient for several use cases, alignment of code to folder structure eventually becomes a source of entanglement and embrittlement that hinders refactoring and non-invasive extension. Instead, users may construct indices within files. 120 | 121 | ### Affine File Dependencies 122 | 123 | In context of lazy loading, a dependency cycle isn't *necessarily* an error. But accidental cycles very easily become a source of errors. In context of live coding or projectional editing, merely sharing a file, loading it into multiple contexts, also has non-intuitive interactions. To mitigate these concerns, glas systems shall raise warnings or errors when a file is shared. These warnings may then be suppressed via explicit annotations. 124 | 125 | Because we discourage shared files, shared libraries, macros, templates, etc.. are fully modeled within the namespace. By convention, '%env.\*' serves the role of implicit parameters or pseudo-global namespace, propagated across modules. By default, this ultimately links to 'env.\*' in the user configuration, but user programs or projects may translate '%env.\*' to other targets within a scope. 126 | 127 | ## User-Defined Syntax 128 | 129 | As a supported convention, users may define front-end compilers per file extension in '%env.lang.FileExt'. To bootstrap this process, the glas executable initialy injects a built-in definition for '%env.lang.glas' for long enough to build and extract the configured 'env.lang.glas'. 130 | 131 | The front-end compiler is a 1--1 arity Program implementing a pure function of type `Binary -> Module`, where *Module* is described below. This compilation is performed via %macro nodes, while fetching sources is handled separately via %load. 132 | 133 | Importantly, output of a front-end compiler is plain old data. This prevents the compiler from linking its own environment into the compiled module, forcing a stage separation between compilation and linking. Any shared definitions must either be integrated into the compiler output (per module) or provided through shared libraries in '%env.\*'. 134 | 135 | ## Modules 136 | 137 | The basic module type is `Env -> Env`, albeit tagged for future extensibility (see *Tags*). The input environment should include primitive and conventional definitions under prefix '%', and may include a few user-defined names depending on how the module is integrated. A names deserve special attention: 138 | 139 | * '%env.\*' - implicit parameters or context. Should propagate implicitly across most imports, but definitions may be shadowed contextually. This serves as the foundation for shared libraries, e.g. '%env.libname.op'. Binds to 'env.\*' in the user configuration via fixpoint. 140 | * '%arg.\*' - explicit parameters. This allows a client to direct a module's behavior or specialize a module. It is feasible to import a module many times with different parameters. 141 | * '%self.\*' - open recursion. By externalizing fixpoint to the client, we can express modules in terms of inheritance, override, and mixin composition. 142 | * '%src.\*' - abstract location '%src' and constructors. When linking a module, the front-end compiler will shadow '%src' in scope. 143 | * '%.\*' - implicit 'private' space for the front-end compiler; starts empty 144 | 145 | The glas system provides the initial environment, including an initial '%env.\*', '%self.\*', '%src', and optionally providing runtime version info (or a method to query it) via '%arg.\*'. Front-end compilers must continue this pattern, though ideally the common functions (like wrapping an `Env -> Env` function to set '%src' and '%arg.\*', or common load steps) are shared between them. 146 | 147 | Usefully, modules are first-class within the namespace. We can define names to the result of loading a module, for example. 148 | 149 | ## Adapters and Tags 150 | 151 | It is useful to tag definitions, modules, etc. to support more flexible interpretation and integration. I propose to model tags as a Church-encoded variant, albeit leveraging first-class environments: 152 | 153 | template tag<"Tag"> = 154 | f:("Body", (f:("Adapters", 155 | ((b:("", "Tag"), "Adapters"), "Body")))) 156 | 157 | This receives an environment of Adapters, selects Tag, then applies to Body. This generalizes to inspecting adapters and picking one, or selecting multiple adapters non-deterministically. All definitions, modules, and other components should be tagged. Tags should roughly indicate integration, e.g. types and assumptions. A useful set of tags: 158 | 159 | * "data" - embedded data 160 | * "prog" - abstract program 161 | * "case" - conditional AST, e.g. body of '%cond' or '%loop' 162 | * "call" - `Env -> Def` - receive caller's environment, return another tagged definition 163 | * "module" - `Env -> Env` basic modules 164 | * "app" - `Env -> Env` basic applications 165 | 166 | I'll eventually want tags to support *Aggregation* patterns via Church-encoded lists, *Multiple Inheritance* via linearization and deduplication of inheritance graphs, and other useful features. Tags make it easy to introduce and integrate new types as needed, subject to de facto standardization. 167 | 168 | ## Multiple Inheritance 169 | 170 | The `Env->Env` type together with fixpoint (%self.\* or app.\*) easily models single inheritance, and can somewhat awkwardly model mixins. However, for more complicated cases, it requires manual linearization, i.e. deciding a consistent order in which `Env->Env` functions are composed while eliminating accidental redundancy. 171 | 172 | Support for multiple inheritance is feasible but requires an intermediate representation of the inheritance graph. This inheritance graph is processed to a linear sequence of `Env->Env` operations, then composed, ensuring shared ancestors appear only once and ensuring consistent or compatible order. The C3 linearization algorithm is relevant in this role. 173 | 174 | After glas systems mature a little, we can introduce tags to indicate when a module or app is represented by an inheritance graph. 175 | 176 | ## Controlling Shadows 177 | 178 | Shadowing of names can be useful, especially in context of *Aggregation* (see below). However, accidental shadowing can be a source of subtle errors. To mitigate this, I propose to report warnings or errors upon shadowing by default, then allow annotations to suppress warnings locally. 179 | 180 | ## Incremental Compilation 181 | 182 | Lazy evaluation can simplify incremental computing. Each thunk serves as a memo cell and tracks which thunks must be recomputed if its input ever changes. We can especially build a few thunks around %load and %macro nodes. 183 | 184 | For persistence, we must assign stable names to these thunks. In general, this could be a secure hash of everything potentially contributing to a given computation, e.g. code, arguments, perhaps compiler version (e.g. for built-ins). Unfortunately, it's easy to accidentally depend on irrelevant things, or to miss some implicit dependencies. To mitigate this, we must enable programmers to annotate code with a proposed stable-name generator. 185 | 186 | Whether we persist the *value* of a thunk may be heuristic, e.g. based on the relative size of that value and the estimated cost to recompute it. It's best to store small values with big compute costs, naturally. Like 42. For large values that are cheaply regenerated, we might omit the data and track some proxy for change - hash of data, ETAG, mtime for files, etc. Aside from this, we would track the set of dependent thunks that must be invalidated. 187 | 188 | ## Aggregation 189 | 190 | Language features such as multimethods, typeclasses, declaring HTTP routes, etc. require constructing and sharing tables. In context of functional programming, we can express iterative construction in terms of repeatedly shadowing a definition with an updated version. We can express sharing via fixpoint if we're careful to avoid datalock. 191 | 192 | An interesting opportunity is to use Church-encoded lists to aggregate tagged ASTs for later processing. This can be very flexible, and is directly analogous to the Writer monad. 193 | 194 | Aggregation across modules is hostile to lazy loading. But we could allow aggressive, automatic aggregation at the *application* layer. 195 | 196 | ## Hierarchy 197 | 198 | The proposed convention in glas is to represent hierarchical structure in a 'flat' namespace. There may be dotted paths in names, such as "foo.bar", but it's just one big name. The main alternative is to define "foo" as an Env, then treat syntax "foo.bar" as extracting "bar" from that Env. However, the flat namespace greatly simplifies access control and aliasing via translations, or updating 'deep' definitions. 199 | 200 | ## Indexed Modularity 201 | 202 | An interesting opportunity is to model modules as indexing other modules. This could be supported at multiple layers, e.g. a module that knows where all the good DVCS sources are, or one that provides access to a searchable collection of partially-evaluated shared libraries in in '%env.\*'. This is possible due to the first-class nature of glas modules and sources. 203 | -------------------------------------------------------------------------------- /docs/GlasDesign.md: -------------------------------------------------------------------------------- 1 | # Glas Design 2 | 3 | Glas is named in allusion to transparency of glass, human mastery over glass as a material, and the phased liquid-to-solid creation analogous to staged metaprogramming. It can also be read as a backronym for 'general language system', which is something glas aspires to be. Design goals orient around compositionality, extensibility, scalability, live coding, staged metaprogramming, and distributed systems programming. 4 | 5 | Interaction with the glas system is initially through a [command line interface](GlasCLI.md). 6 | 7 | ## Data 8 | 9 | The 'plain old data' type for glas is the finite, immutable binary tree. Trees can directly represent structured and indexed data and align well with needs for parsing and processing languages. They are convenient for persistent data structures via structure sharing, and content addressing for very large values. A relatively naive encoding: 10 | 11 | type Tree = ((1 + Tree) * (1 + Tree)) 12 | a binary tree is pair of optional binary trees 13 | 14 | This can generally encode a pair `(a, b)`, a choice `(a + b)`, or a leaf `()`. Alternatively, we could encode these options more directly as a sum type: 15 | 16 | type Tree = 17 | | Branch of Tree * Tree 18 | | Stem of (bool * Tree) # bool is left/right label 19 | | Leaf 20 | 21 | However, glas systems will often encode data into stems. Dictionaries such as `(height:180, weight:100)` can be encoded as [radix trees](https://en.wikipedia.org/wiki/Radix_tree), encoding the symbol into stem bits with a NULL separator from the data. An open variant type can be represented as a singleton dictionary. To support these encodings, we must compact stem bits. In practice, a runtime may represent arbitrary trees using something closer to: 22 | 23 | type Tree = (Stem * Node) # as struct 24 | type Stem = uint64 # encodes 0..63 bits 25 | type Node = 26 | | Leaf 27 | | Branch of Tree * Tree # branch point 28 | | Stem64 of uint64 * Node # all 64 bits! 29 | 30 | Stem Encoding (0 .. 63 bits) 31 | 10000..0 0 bits 32 | a1000..0 1 bit 33 | ab100..0 2 bits 34 | abc10..0 3 bits 35 | abcde..1 63 bits 36 | 00000..0 unused 37 | 38 | This allows for reasonable representation of labeled data. We may similarly encode integers into stems. However, we can further extend the Node to efficiently encode text or binary data, struct-like data, and other useful types. 39 | 40 | ### Integers 41 | 42 | Integers in glas systems are typically encoded as variable length bitstrings, msb to lsb, with negatives in one's complement: 43 | 44 | Integer Bitstring 45 | 4 100 46 | 3 11 47 | 2 10 48 | 1 1 49 | 0 // empty 50 | -1 0 51 | -2 01 52 | -3 00 53 | -4 011 54 | 55 | ### Lists, Arrays, Queues, Binaries 56 | 57 | Sequential structure in glas is usually encoded as a list. A list is either a `(head, tail)` pair or a leaf node, a non-algebraic encoding similar in style to Lisp or Scheme lists. 58 | 59 | type List a = (a * List a) | () 60 | 61 | /\ 62 | 1 /\ the list [1,2,3] 63 | 2 /\ 64 | 3 () 65 | 66 | Direct representation of lists is inefficient for many use-cases, such as random access, double-ended queues, or binaries. To enable lists to serve many roles, lists are often represented under-the-hood using [finger tree](https://en.wikipedia.org/wiki/Finger_tree) [ropes](https://en.wikipedia.org/wiki/Rope_%28data_structure%29). This involves extending the 'Node' type described earlier with logical concatenation and array or binary fragments. 67 | 68 | Binaries receive special handling because they're a very popular type at system boundaries (reading files, network communication, etc.). Logically, a binary is a list of small integers (0..255). For byte 14, we'd use `0b1110` not `0b00001110`. But under the hood, binaries will be encoded as compact byte arrays. 69 | 70 | ### Optional Data and Booleans 71 | 72 | The convention for encoding 'optional' data in glas is to use an empty list for no data, and a singleton list for some data. The convention for encoding Boolean is optional unit, i.e. empty list for 'false' and a singleton list containing the empty list for 'true'. 73 | 74 | For 'Either' types, we'll usually switch to symbolic data like `ok:Result | error:(text:Message, ...)`. 75 | 76 | ### Rationals, Vectors, and Matrices 77 | 78 | Rational numbers can be represented by a dict `(n,d)` of integers. A vector might be represented as a list of integers or rationals, and a matrix as a list of vectors of identical dimensions. We could add complex or hypercomplex numbers, too, e.g. as `(r,i)` or `(r,i,j,k)` dicts of rationals or integers. 79 | 80 | Arithmetic operators in glas systems should be overloaded to handle these different number types where it makes sense to do so. And to prevent rationals from growing out of control, we could include rounding steps where needed. 81 | 82 | We might be able to use floating point, but I'm uncertain we can make it deterministic across processors, which would conflict with some of my design goals. That said, a runtime can represent a useful subset of rational numbers as binary or decimal floating point internally. 83 | 84 | ## Namespaces and Programs 85 | 86 | A glas system is expressed as a modular [namespace](GlasNamespaces.md) defining *languages, libraries, applications, and adapters*. Typically, a user imports a community or company configuration from DVCS, then overrides definitions to integrate user-specific projects, preferences, and resources. A community configuration may be enormous, defining hundreds of applications; this is mitigated by lazy loading and caching. 87 | 88 | The [programs](GlasProg.md) are procedural in nature, but takes inspiration from functional and object-oriented programming. However, to simplify optimizations and live coding, we rely on algebraic effects and metaprogramming instead of first-class functions or objects. 89 | 90 | ### Languages 91 | 92 | The namespace supports user-defined syntax: to load a ".xyz" file, we'll search for '%env.lang.xyz' in the current scope. This serves as a front-end compiler, writing an intermediate representation for programs into the namespace. To get started, the glas executable provides at least one built-in compiler, usually for [".glas" files](GlasLang.md). The built-in compiler is used to bootstrap the user definition if possible. 93 | 94 | ### Libraries 95 | 96 | Shared libraries are a design pattern within the namespace. An application can assume '%env.lib.math.whatever' is already defined. If wrong, the error message is clear and the fix is easy: install the library. By convention, names with prefix '%' are implicitly propagated across imports, and we'll apply a default translation `"%env." => "env."` to the configuration namespace. Thus, we might install a library via import into 'env.lib.math' to share utility code with most applications. 97 | 98 | The main advantage of shared libraries is performance, avoiding redundant work across applications. The main disadvantage is customization: the application cannot override library definitions or change its links to other libraries. The disadvantage can be mitigated by translating links to alternative versions of specific libraries within some scope. 99 | 100 | ### Applications and Adapters 101 | 102 | This is detailed in [glas apps](GlasApps.md). But the general idea is that every application defines a purely functional, deterministic 'app.settings' function to guide integration. Depending on settings, the application may include 'app.\*' words such as 'app.main' for a conventional app, 'app.http' to receive HTTP requests (sharing debugger and RPC port), and 'app.step' for live-coding friendly transaction-loop applications. 103 | 104 | The runtime should not observe 'app.settings' directly. Instead, the runtime provides 'app.\*' words and runtime version information to the user configuration. The user configuration then generates a final adapter between runtime and application. This adapter is useful for portability, extensibility, and security. 105 | 106 | ## Distributed Systems Programming 107 | 108 | The transaction loop application model greatly simplifies programming of distributed systems. The application can be mirrored on every node, i.e. repeating the same 'step' function, handling 'http' and 'rpc' requests locally, caching or migrating state. The runtime can apply a heuristic optimization: abort a 'step' that is better initiated on another node. Similarly, repeating 'rpc' calls can be redirected to the relevant node. 109 | 110 | To fully leverage the distributed system, applications must be architected such that *most* transactions involve only one or two runtime nodes. During a network disruption, the application continues running locally, but some distributed transactions are blocked. This supports graceful degradation as the network fails, and resilient recovery as communication is restored. If necessary, an application may observe disruption indirectly via timeouts or reflection. 111 | 112 | Although transaction loops don't eliminate the need for design, they are flexible and forgiving of mistakes. We can always force a few distributed transactions rather than re-architecting. 113 | 114 | ## Live Coding 115 | 116 | To support live coding, a runtime might be configured or triggered to scan for source updates and switch to new code. There may be some behavior on switch, e.g. calling 'app.switch' in the updated code. If this fails, switching may be delayed until it succeeds to support a relatively smooth transition. 117 | 118 | The transaction-loop application model is designed to work nicely with live coding: we update 'app.step' between transactions. In contrast, a running 'app.main' application is a set of half-executed coroutines that cannot robustly be updated. At best, the runtime can swap namespaces atomically between '%yield' steps and typecheck the updated continuations. This may be sufficient if developers design their applications with live coding in mind. 119 | 120 | In a distributed runtime, we can usefully view 'code' as a set of read-mostly registers, allowing for read-only cache on many nodes without violating isolation. When sending data, the updated code that influenced that data must also be propagated to avoid a read-write conflict, but we can maintain transactional isolation even if code updates aren't instantaneous. 121 | 122 | ## Debugging 123 | 124 | Based on configuration, a glas runtime may open a TCP/UDP port for RPC, HTTP, and debugger access. While 'app.http' might handle most HTTP requests, a runtime can be configured to intercept a path to support debugging via browser or REST API. A runtime can provide generic debugging features. Application-specific debugger integration may be guided through 'app.settings' and annotations. 125 | 126 | ## Annotations 127 | 128 | a:(Annotation, Op) - annotated AST structure 129 | (%an.ctor Args) - Annotation nodes in AST 130 | 131 | As a general rule, annotations must not influence the formal behavior or 'meaning' of a program, but they may guide tooling and influence non-functional properties. Annotations are very useful for instrumentation, optimization, and validation of programs. 132 | 133 | Annotations may also be supported at other layers through simple conventions, e.g. 'foo.\#doc' in the namespace, a ".glas/" folder in a source package, or annotation nodes in [glas object](GlasObject.md). I don't have a strong use case for namespace annotations, though it may prove convenient when browsing a namespace. The ".glas/" folder could contain signed manifests to scope trusted code. 134 | 135 | ## Instrumentation 136 | 137 | Annotations should support users in logging, profiling, and tracing (for replay) of computations. A viable encoding: 138 | 139 | log (Chan, Message) { Operation } 140 | profile (Chan, Index) { Operation } 141 | trace (Chan, Cond) { Operation } 142 | 143 | This structure expresses logging 'over' an operation, in contrast to a one-off message event. This allows a runtime to maintain a log message periodically as state changes, or capture the most recent version of a message into a stack trace. The continuous nature allows us to contemplate opportunities such as 'animation' of a log. 144 | 145 | The Chan argument is intended to support configuration. This includes both providing configuration options (like verbosity) to the Message and to support disabling, routing, or translating entire volumes of logs. 146 | 147 | ## Optimization 148 | 149 | Annotations guide performance features - acceleration, caching, laziness, parallelism, JIT compilation, tail-call optimization, use of content-addressed storage, etc.. 150 | 151 | ### Acceleration 152 | 153 | There are many functions that are difficult to implement efficiently within the glas program model due to lack of static types or suitable 'primitive' operations. In these cases, we can provide a slower reference implementation, then use an annotation to ask a runtime to replace the reference implementation with a high-performance built-in. Example: 154 | 155 | a:((%an.accel (%accel.matrix.mul "double")), ReferenceImpl) 156 | 157 | A runtime may have limited built-in verification of ReferenceImpl behavior, but it isn't guaranteed. In practice, it is convenient to perform unit tests on the ReferenceImpl alongside the accelerated version, e.g. by having ReferenceImpl name a separate definition. 158 | 159 | The runtime is expected to use specialized data representations to support accelerated functions. For example, finger-tree ropes for accelerated list slice and append, or representing a matrix of floats as a binary with some dimension info. 160 | 161 | It is best to accelerate widely useful types - matrices, graphs, sets, relational databases, etc.. An intriguing possibility is to accelerate 'eval' of a reasonably 'safe' virtual-machine code that is accelerated via JIT compilation to run directly on CPU or GPGPU. This is much more flexible than accelerating specific functions. 162 | 163 | A relevant concern with acceleration is that not all hardware-supported operations are portable. This is especially the case for floating point computations, e.g. with variations in internal precision. Either our ReferenceImpl must account for the target processor, trade performance for portability, or be non-deterministic to cover all possible valid hardware. 164 | 165 | ### Thunks and Sparks 166 | 167 | A 'thunk' is an abstract representation of a deferred computation. A 'spark' is a thunk enqueued for evaluation by a pool of worker threads. Thunks and sparks can both simplify expression and supplement performance, i.e. express expensive computations where it's natural but perhaps drop the thunk before computing it, or use sparks to trigger expensive computations whose results will be necessary later without delaying the current transaction. 168 | 169 | A significant language design decision is whether thunks are implicit or explicit. Explicit thunks are more difficult to use, but easier to control, reason about, and efficiently implement - especially in context of divergence, error, orthogonal persistence, and remote procedure calls. For glas systems, I propose to model thunks explicitly. 170 | 171 | * `lazy { Expr }` - capture a computation into a thunk 172 | * `lazy.force(Thunk) : Result` - force a thunk to evaluate, or diverge 173 | * `lazy.spark(Thunk) : Thunk` - schedules thunk to evaluate in separate thread (returns Thunk) 174 | 175 | To keep implementation simple, I propose to initially restrict thunks to pure, atomic computations. In theory, we can eventually extend thunks to take a snapshot of registers read, or even generate output thunks for registers written. But users get 80% benefits for 20% implementation effort by restricting scope. 176 | 177 | ### Content-Addressed Storage 178 | 179 | To support larger-than-memory data, glas systems may leverage content-addressed storage to offload subtrees to higher-latency storage (e.g. disk or network). Like thunks, we benefit from modeling content-addressed data as explicit, abstract data. A viable API: 180 | 181 | * `cas.stow(Data) : StowedData` - wraps data for eventual transfer to high-latency storage. May be deferred, e.g. based on memory pressure 182 | * `cas.load(StowedData) : Data` - loads and unwraps stowed data. Load may be lazy after the runtime is confident lazy load will succeed (e.g. no concerns with network disruption or data validation) 183 | * `cas.need(StowedData) : StowedData` - advises runtime to have stowed data ready for a near-future 'load'. 184 | 185 | Content-addressed data interacts very nicely with memoization, persistent or read-mostly data structures, and persistent data storage. It also integrates easily with content delivery networks. 186 | 187 | ### Caching 188 | 189 | When applying a pure function to immutable data, we can use a secure hash as a lookup key. A persistent memoization table allows sharing work between applications. This isn't optimal - we could be including features of the data that aren't observed by the function - but it's a very simple basis for work sharing, and users can apply some extra processing to isolate relevant input prior to memoization. 190 | 191 | In glas systems, we'll rely on persistent memoization as a primary basis for incremental compilation. To work with large data, we should use content-addressed data to reduce the effective size of the argument. 192 | 193 | ### Program Rewrites? Defer. 194 | 195 | Mapping two *pure* functions over a list, e.g. `map f . map g`, is equivalent to mapping the composite function, `map (f . g)`. For map-reduce over a list, we can parallelize and distribute computation if the sum operation is associative. There are many similar observations on programs. However, it's difficult to *prove* such optimizations are safe. 196 | 197 | What can be done? One viable option is metaprogramming, letting users build a DSL that performs the optimizations when compiled further. Another is proof-carrying code, extending the program with proof hints. We could also use annotations as a sort of "trust me, bro" to the compiler, insisting a function is associative or commutative or monotonic or whatever without a proof. In the latter case, trust may be contingent on PKI, similar to application access to FFI. 198 | 199 | At the moment, I won't pursue these optimizations too far at the runtime layer, leaving it to DSLs and metaprogramming. But it's certainly an area where we could obtain some significant returns on investment as the glas system matures. 200 | 201 | ### Warmed Applications 202 | 203 | We can evaluate application-layer 'start' and 'step' operations at compile time, insofar as they don't immediately await response from FFI or other external sources. The compiler would simulate state, non-deterministic choice, and other runtime features, pausing computation for anything it cannot handle. The compiler can decide based on heuristic space-time tradeoffs whether to include initialized state and partially evaluated transactions in a compiled image. With guidance from application settings, a compiler could also perform a series of 'http' requests and cache the results. 204 | 205 | ## Validation 206 | 207 | Annotations can express assertions, type annotations, even proofs. I'll explore some of our opportunities here. 208 | 209 | ### Assertions and Automatic Testing 210 | 211 | Assertions are by far the simplest form of validation. 212 | 213 | assert(Chan, Cond, Message) { Operation } 214 | 215 | We might interpret an assertion over an operation as expressing an invariant. Based on configuration for Chan, Cond can be randomly sampled, automatically tested upon stack trace, or tested continuously for every relevant change in state. To avoid influencing observable behavior, Cond can be a read-only function or evaluated within a hierarchical transaction. When an assertion fails, we log the message and halt the transaction. 216 | 217 | If Cond is non-deterministic, we'll interpret that as conjunction: every possible condition *should* hold true. However, for performance reasons, we might not evaluate them all every time: we could randomly or heuristically sample several conditions each time we encounter the assertion. This may depend on configuration of Chan. 218 | 219 | In context of staged computing or partial evaluation, we can express static assertions. These may be evaluated before the application runs and effectively serve as unit tests. With non-deterministic conditions, we also get fuzz testing. In the glas program model, some staging and partial evaluation is aligned with the call graph via static parameters. This allows for some custom testing specific to the integration. 220 | 221 | ### Abstract Data Types 222 | 223 | Annotations can express that data should be abstract within a computation. However, it isn't always convenient to enforce types via static analysis. To support dynamic enforcement of abstract data types, we can extend the Node type: 224 | 225 | type Node = 226 | | ... # other Node types 227 | | Abstract of Key * Tree 228 | 229 | Based on annotations, a runtime can wrap and unwrap data with this 'Abstract' node, diverging on error. For robust security, we can use unforgeable things as keys, e.g. registers or abstract '%src' values. An intriguing opportunity: support keys with identity via weakrefs, then garbage-collect the sealed data when Key becomes unreachable. 230 | 231 | Based on static analysis, an optimizer can eliminate many wrap/unwrap actions, providing a robust basis for gradual typing. Reflection APIs may provide limited means to bypass abstractions. 232 | 233 | ### Scope Control Types 234 | 235 | In context of remote procedure calls and shared databases, it is often useful to control the scope of data. For example, we don't want open file handles escaping the runtime boundary. Scopes are most easily expressed as an extension to abstract types, such as files. For dynamic enforcement, we might represent runtime scope as an extra flag on the Abstract node's Key. 236 | 237 | However, efficient dynamic enforcement of scopes benefits from a O(1) lookup. Every node should cache metadata for whether it transitively includes runtime-scoped data. To support this efficiently, we could use [tagged pointers](https://en.wikipedia.org/wiki/Tagged_pointer), albeit only for a very small number of scopes. 238 | 239 | Fortunately, we don't need many scopes to cover most use-cases in glas systems. A useful hierarchy of scopes: 240 | 241 | * global scope - can send or receive over RPC 242 | * shared scope - can read or store to shared database 243 | * runtime scope - open files, network sockets 244 | * transaction scope - transaction-local data 245 | 246 | Whether we need all these scopes depends on the effects API and program model. For example, database scope is necessary only if we want abstract database references as first-class values within the database, or we could restrict namespace refs to compile-time. 247 | 248 | ### Linear Types 249 | 250 | Linear data is abstract data that cannot be arbitrarily copied or dropped. This is useful when modeling resources, protocols, or promises. Linear types are potentially useful to ensure a transaction is 'complete' upon commit, i.e. to check there are no unfulfilled promises. Like scope, linear types can be expressed as a flag on abstract data then enforced efficiently using tagged pointers. 251 | 252 | Linear types are extremely awkward in open systems: they cannot be enforced, and they shouldn't be enforced - it's unclear how to clean up after an application dies mid-protocol. At runtime scope, linear types interact awkwardly with transaction-loop optimizations, such as incremental computing: we're forced to repeatedly read the linear data from state, observe or manipulate it, write it back to state. The best opportunity for linear types is at transaction scope, to enforce that transaction-local protocols are completed before the transaction commits. 253 | 254 | *Note:* It is feasible to separate linearity into affine (no copy) and relevant (no drop) types. The use case for this isn't especially strong, but 255 | 256 | ### Units on Numbers? 257 | 258 | I want to express physical units on numbers - kilograms, newtons, meters, joules, etc. - and enforce safe use of units. However, I'm not certain of the best approach. Some options: 259 | 260 | * *staged computing* - model units as a static parameter and result. Likely to be awkward syntactically, but perhaps front-end language support can mitigate this. A big advantage compared to annotations is that this makes units accessible for 'print' statements and such. 261 | * *enum in accelerated number rep* - we're likely to accelerate our number types. It isn't too expensive to add an enum to this representation for units, covering most units encountered in practice, and verify across the basic arithmetic operations. This is probably the simplest short-term solution, though units would only be visible through a reflection API. 262 | * *static analysis* - add units to our type annotations, analyze at compile time. I'm reluctant on this option, mostly because I want to put off static analysis, but I want support for units relatively early. 263 | 264 | I'll need to think on this further. 265 | 266 | ### Proof-Carrying Code? 267 | 268 | I'm curious how well proofs can be supported via systematic annotations within programs. 269 | 270 | A reasonable question is what a 'proof' should look like. We could support some sort of user-defined reflective judgement over an AST and call graph, ideally while abstracting names and the namespace. No need to prove the prover works or terminates in general. We can let users define ever more provers to their own satisfaction. 271 | 272 | These judgements might be extended with an opportunity to annotate the AST or call graph for future passes or future proofs. 273 | 274 | ## Misc. Thoughts 275 | 276 | ### Program Search 277 | 278 | I'm interested in a style of metaprogramming where programmers express constraints on the program, both hard and soft, then we discover a program that meets these constraints. However, I don't have a good solution for this that ensures a deterministic outcome and supports incremental compilation. At the moment, probably best to leave this to a separate stage and isolate it within the namespace and call-graph? 279 | -------------------------------------------------------------------------------- /docs/GlasProg.md: -------------------------------------------------------------------------------- 1 | # Program Model for Glas 2 | 3 | The [namespace](GlasNamespaces.md) supports modules and user-defined front-end syntax. Programs are compiled to an AST structure built upon '%\*' primitives. This document describes a viable set of primitives for my vision of glas systems and some motivations for them. 4 | 5 | ## Proposed Program Primitives 6 | 7 | These primitives are constructors for an abstract data time, i.e. constructing a program does not execute it. The %macro and %load primitives are special exceptions, lazily evaluating at the namespace layer to support metaprogramming and modularity. 8 | 9 | *Notation:* `(F X Y Z)` desugars to `(((F,X),Y),Z)`, i.e. curried application. 10 | 11 | ### Control Flow 12 | 13 | * `(%do P1 P2)` - execute P1 then P2 in order. Associative. 14 | * `%pass` - the no-op. Does nothing. 15 | * `%fail` - voluntary failure. Used for bracktracking a branch condition, choice, or coroutine step. (In contrast, errors are treated as divergence, i.e. infinite loops observable only via reflection APIs.) 16 | * `(%cond Sel)` - supports if/then/else and pattern matching. The case selector, Sel, has a distinct AST structure to support sharing a common prefix. Sel constructors: 17 | * `(%br Cond Left Right)` - runs branch condition, Cond. If Cond fails, backtracks to run Right selector, otherwise processes Left selector. The full chain of branch conditions runs atomically. 18 | * `(%sel P)` - selected action. Commits prior chain of passing branch conditions, then runs P. 19 | * `%bt` - backtrack. Forces most recent passing Cond to fail. If no such Cond, i.e. if %bt is rightmost branch, behavior is context-dependent (e.g. error for %cond, exit for %loop). As a special rule, we optimize `(%br C %bt R) => R` regardless of whether C is divergent. 20 | * `(%loop Sel)` - Repeatedly runs Sel until it fails to select an action, then exits loop. Same Sel structure as %cond. Essentially a hybrid of while-do and pattern matching. 21 | * `(%co P1 P2)` - execute P1 and P2 as coroutines with a non-deterministic schedule. Associative. Each coroutine operates on its own stack but shares access to registers and methods. 22 | * Preemption: scheduler may freely abort a coroutine to select another. 23 | * Parallelism: run many, abort a subset to eliminate conflicts, aka [optimistic concurrency control](https://en.wikipedia.org/wiki/Optimistic_concurrency_control). 24 | * Fork-join behavior: %co yields repeatedly until coroutines terminate. We can optimize the case where %co is the final operation of a coroutine, thus no join required. 25 | * `%yield` - within a coroutine, commit operations since prior yield. Each yield-to-yield step is an atomic, isolated transaction that may abort via fail. 26 | * `(%atomic P)` - runs P within a hierarchical transaction, thus yielding within P does not yield from atomic and must be resumed within P. 27 | * *Note:* a chain of %br branch conditions, up to %sel, is implicitly atomic. 28 | * `(%ch P1 P2)` - non-deterministic choice of P1 or P2. Associative. Can be implemented by forking the transaction and evaluating all choices, but only one can commit. 29 | * Special case: in context of transaction loops, e.g. `while (Cond) { atomic Action; yield }`, repeated choice can optimize into a reactive form of concurrency. 30 | * `%error` - explicit divergence. Logically equivalent to an infinite no-yield loop, but much easier to optimize. Please compose with `%an.error.log` to attach a message! 31 | 32 | *Note:* For %do, %co, and %ch, it is *very tempting* to support a variable number of arguments, but directly doing so complicates semantics. A viable approach to variable arguments involves a front-end language Church-encoding lists of ASTs into an argument. 33 | 34 | ### Data Stack 35 | 36 | * `d:Data` - push data to top of data stack 37 | * `(%dip P)` - run P while hiding top element of data stack 38 | * `%swap` - exchange top two stack elements. i.e. "ab-ba" 39 | * `%copy` - copy top stack element, i.e. "a-aa". 40 | * `%drop` - drop top stack element, i.e. "a-". 41 | * `%mkp` - "ba-(a,b)" pair elements, right element starts on top 42 | * `%mkl` - rewrite top stack element to be left branch of tree 43 | * `%mkr` - rewrite top stack element to br right branch of tree 44 | * `%unp` - undoes mkp, fails if not a pair. 45 | * `%unl` - undoes mkl, fails if not a left branch 46 | * `%unr` - undoes mkr, fails if not a right branch 47 | 48 | ### Registers 49 | 50 | * `(%xch Register)` - exchange value of register and top item of data stack. 51 | * *Static analysis*: you can model this as also swapping *types* (or logical locations) between the register slot and the stack. That lets checkers propagate stack‑effect typing and enforce invariants. 52 | * *Optimization hint*: unconditional, atomic patterns such as `(%xch x; ... ; %xch x)` can be heavily optimized because logical locations are restored. 53 | * *Concurrency semantics*: For fine-grained conflict analysis, compiler built-in accelerators can define common patterns such as get and set, queue or bag reads and writes, indexed operations on arrays or dicts, or even support a few CRDTs. However, these interactions are not modeled as primitives. 54 | * `(%local RegOps)` - allocates a fresh register environment, passes it to `RegOp : Env -> Program`, runs Program, then clears the environment. The Env logically defines every Name to a unique register, but Program must use only a static, finite subset of these names. 55 | * `(%assoc R1 R2 RegOps)` - this binds an implicit environment of registers named by an ordered pair of registers `(R1, R2)`. The primary use case is abstract data environments: an API can use per-client space between client-provided registers and hidden API registers. 56 | 57 | ### Metaprogramming 58 | 59 | * `(%macro Builder)` - Builder represents a program of 0--1 arity, and is expected to return a closed-term AST representation on the data stack. This returned AST is validated, lazily evaluated in an empty environment, then substituted in place of the macro node. Because AST is closed term, external linking must be provided in context. 60 | * `(%eval Adapter)` - pop arbitrary Data from the stack, pass to Adapter - a namespace-layer function of type `d:Data -> Program`. Adapter typically includes %macro nodes for staged compilation of Data. The Program is subject to validation in context (e.g. verify type). Although dynamic eval is feasible, glas systems frequently forbid dynamic eval, requiring static Data argument (`%an.eval.static` by default). 61 | 62 | Non-deterministic metaprogramming is not *necessarily* an error, but it complicates reasoning and caching, requires expensive backtracking and heuristic search. Glas systems shall reject non-determinism in metaprogramming until they're mature enough to properly tackle these challenges. 63 | 64 | Both %macro and %eval serve at the boundary between namespace and program layers. There is also some metaprogramming possible purely in the namespace layer, e.g. we could build and process Church-encoded lists of ASTs. 65 | 66 | ### Modularity Extensions 67 | 68 | * `(%load Src)` - Load external resources at compile time. The result is embedded data that may be processed further via %macro. Errors are possible, e.g. if Src is malformed or unreachable, in which case this operation logically diverges. 69 | * `%src.*` - abstract Src constructors, e.g. to read local files, load from DVCS, search folders, possibly even look into a database. 70 | 71 | See [namespaces](GlasNamespaces.md) for details. 72 | 73 | ## Calling Conventions 74 | 75 | Definitions in the namespace should be tagged to indicate integration. A carefully designed set of tags can significantly simplify extension and metaprogramming. Proposed: 76 | 77 | * "data" - `Data` - embedded data, can integrate as program 78 | * "prog" - `Program` - abstract program, can integrate as program 79 | * "call" - `Env -> Def` - receives caller context (algebraic effects, pass-by-ref registers, etc.), returns another tagged definition. 80 | * We can develop further conventions around Env, e.g. supporting keyword or variable arguments. 81 | * "list" - Church-encoded list of tagged ASTs, useful for aggregations or variable arguments. Not necessarily homogeneous. 82 | 83 | In my vision, most definitions are tagged "call" and return "prog", except near the edges where we might have a lot of "prog" and "data" definitions. Use of "list" would be rare outside of aggregators and var-args, and requires specialized processing by an adapter. Eventually, we'll also have many non-callable tags, such as "type". We might also support multiple inheritance graphs in a generic way, and develop specialized tags for grammars, process networks, etc.. 84 | 85 | ## Annotations 86 | 87 | a:(Annotation, Op) # dedicated AST node 88 | 89 | Acceleration: 90 | * `(%an.accel Accelerator)` - performance primitives. Indicates that a compiler or interpreter should substitute Op for a built-in Accelerator. By convention, Accelerators have form `(%accel.OpName Args ...)` (or `%accel.OpName` if no arguments). Accelerators are not Programs, and are only useful in context of `%an.accel`. 91 | 92 | Instrumentation: 93 | * `(%an.log Chan MsgSel)` - printf debugging! Logging will *overlay* an Operation, automatically maintaining the message. The MsgSel type is sophisticated; see *Logging*. 94 | * `(%an.error.log Chan MsgSel)` - log a message only when Operation halts due to an obvious divergence error (such as '%error', assertion failure, or a runtime type error). 95 | * `(%an.assert Chan ErrorMsgGen)` - assertions are structured as logging an error message. If no error message is generated, the assertion passes. May reduce to warning. 96 | * `(%an.assert.static Chan ErrorMsgGen)` - assertion that must be computed at compile-time, otherwise it's a compile-time error. May reduce to compile-time warning with or without a runtime error. 97 | * `(%an.profile Chan BucketSel)` - record performance metadata such as entries and exits, time spent, yields, fails, and rework. Profiles may be aggregated into buckets based on BucketSel. 98 | * `(%an.trace Chan BucketSel)` - record information to support slow-motion replay of Operation. BucketSel helps control and organize traces. See *Tracing*. 99 | * `(%an.view Chan Viewer)` - support interactive debug views of a running application. See *Debug Views* 100 | * `(%an.chan.scope TL)` - apply a prefix-to-prefix translation to Chan names in Operation. 101 | 102 | Validation: 103 | * `(%an.arity In Out)` - express expected data stack arity for Op. In and Out must be non-negative integers. Serves as an extremely simplistic type description. 104 | * `%an.atomic.reject` - error if running Operation from within an atomic scope, including %atomic and %br conditions. Useful to detect errors early for code that diverges when run within a hierarchical transaction, e.g. waiting forever on a network response. 105 | * `%an.atomic.accept` - to support simulation of code containing %an.atomic.reject, e.g. with a simulated network, we can pretend that Operation is running outside a hierarchical transaction, albeit only up to external method calls. 106 | * `(%an.data.seal Key)` - operational support for abstract data types. For robust data sealing, Key should name a Register, Src (like '%src'), or other unforgeable identity. Sealed data cannot be observed until unsealed with a matching Key, usually symmetric. If the Key becomes unreachable (e.g. Register out of scope), the sealed data may be garbage collected, and this may be detectable via reflection APIs. Actual implementation is flexible, e.g. compile-time static analysis at one extreme, encryption at another, but simple wrappers is common. 107 | * `(%an.data.unseal Key)` - removes matching seal, or diverges 108 | * `(%an.data.seal.linear Key)` - a variant of seal that also marks sealed data as linear, i.e. no copy or drop until unsealed. Note: This does not fully guard against implicit drops, e.g. storing data into a register that falls out of scope. But a best and warnings are expected. 109 | * `(%an.data.unseal.linear Key)` - counterpart to a linear seal. If data is sealed linear, it must be unsealed linear. 110 | * `%an.data.static` - Indicates that top stack element should be statically computable. This may propagate requirements for static inputs back through a call graph. In context of conditionals, choice, coroutines, etc. the compiler can feasibly attempt to verify that all possible paths (up to a quota) share this result. 111 | * `%an.eval.static` - Indicates that all '%eval' steps in Operation must receive their AST argument at compile-time. This is the default for glas systems, but it can make intentions clearer to reiterate the constraint locally. 112 | * `(%an.type TypeDesc)` - Describes a partial type of Operation. Not limited to programs, so namespace-layer and higher-kinded types are also relevant. Can also support type inference in the context surrounding Operation. TypeDesc will have its own abstract data constructors in '%type.\*'. 113 | * `%an.det` - Annotates an `Env -> Program` structure. This expresses the intention that Program should be deterministic *up to Env*. A compiler should prove this or raise an error. 114 | * The simplest proof is that Program doesn't use '%co' or '%choice' or interact with mutable state (even indirectly) except through Env. 115 | * Ideally, we can also recognize simple confluence patterns, e.g. Kahn Process Networks, where coroutines communicate through queues with clear ownership (no races between two readers or between two writers). 116 | * Eventually, proof-of-confluence annotations may be viable. Not sure how feasible this is. 117 | 118 | Laziness: 119 | * `%an.lazy.thunk` - The simplest integration for lazy evaluation for laziness. Op must be pure, atomic, 1--1 arity, terminating - anything else is a type error, though perhaps only detected upon 'force'. Instead of computing immediately, we return a thunk representing the future result. 120 | * Non-deterministic Op is accepted, i.e. commit to a non-deterministic choice without observing the result. An intriguing opportunity is to only choose the value for a non-deterministic thunk after an observing transaction commits. This is formally valid with non-determinism. 121 | * `%an.lazy.force` - Op (usually %pass) must return a thunk at top of data stack. We force evaluation of the thunk before returning, placing the data result of evaluating that thunk on the stack. 122 | * Force diverges if computation represented by a thunk fails or diverges. This is considered a type error. 123 | * `%an.lazy.spark` - Op (usually %pass) must return a thunk at top of data stack. If the thunk has not already been computed or scheduled, we'll schedule that thunk for background computation by runtime worker threads. 124 | 125 | Content-addressed storage: 126 | * `%an.cas.stow` - Op (usually %pass) must return data of persistent or global ephemerality at top of stack. We wrap that data then lazily move it to external storage based on size, usage, and memory pressure. 127 | * `%an.cas.load` - Op (usually %pass) must return stowed data at top of stack. Loads and substitutes the actual data. Loading may be lazy, but only when the runtime is confident it can fully load the data (accounting for risks of network disruption and invalid representation). Diverges if the data cannot be loaded. Reflection APIs may offer a detailed view of errors. 128 | * `%an.cas.need` - Op (usually %pass) must return stowed data at top of stack. Tells runtime that this data will be needed in the near future. This enables the runtime to heuristically download, validate, etc. the data ahead of time so it's more available when needed. 129 | 130 | Incremental computing: 131 | * `(%an.memo MemoHint)` - memoize a computation. Useful memoization hints may include persistent vs. ephemeral, cache-invalidation heuristics, or refinement of a 'stable name' for persistence. TBD. 132 | * As a minimum viable product, we'll likely start by only supporting 'pure' functions, because that's a low-hanging, very tasty fruit. 133 | * `(%an.checkpoint Hints)` - when retrying a transaction, instead of recomputing from the start it can be useful to rollback partially and retry from there. In this context, a checkpoint suggests a rollback boundary. A compiler may heuristically eliminate unnecessary checkpoints, and Hints may guide heuristics. 134 | 135 | Guiding non-deterministic choice: 136 | * `(%an.cost Chan CostFn)` - (tentative) emits a heuristic 'cost'. CostFn has type `Env -> Program`, with Env providing access to Chan configuration options, ultimately returning a non-negative rational number on the data stack. Like logging, the Program also has implicit access to the host environment for dynamic costs. The only role of costs is to guide non-deterministic choice, disfavoring "high cost" options - or choices that will obviously lead to high costs later on. 137 | * Beyond tweaks by CostFn based on Chan configuration, a user configuration could amplify or suppress costs per channel, enabling an encoding of purpose and preference into channel names. 138 | * *Aside:* In theory, we could support non-monotonic costs to represent gains, too. But all the efficient search algorithms assume monotonicity. 139 | 140 | Future development: 141 | * type declarations. I'd like to get bidirectional type checking working in many cases relatively early on. 142 | * tail-call declarations. Perhaps not per call but rather an indicator that a subroutine can be optimized for static stack usage, optionally up to method calls. 143 | * stowage. Work with larger-than-memory values via content-addressed storage. 144 | * debug trace. Probably should wait until we have a clear idea of what a trace should look like. 145 | * debug views. Specialized projectional editors within debuggers. 146 | 147 | ### Logging 148 | 149 | a:((%an.log Chan MsgSel), Operation) 150 | 151 | type Chan is AST of form d:Name 152 | # naming conventions apply 153 | 154 | type MsgSel : Env -> Sel # where 155 | (%cond (%br %pass Sel %fail)) : Program 156 | 157 | type Msg is plain old glas data, often a Text 158 | 159 | Logging overlays Operation. When Operation is a no-op (`%pass`), this reduces to conventional one-off logging. However, more generally, we may recompute messages when Operation yields or halts on error, heuristically at checkpoints or stable failure. Periodic or random sampling is also viable, perhaps heuristically tuning frequency based on performance. This behavior may be configurable per Chan, a precision versus performance decision. 160 | 161 | Due to this overlay structure, it is useful to render logs as a time-varying tree structure. Instead of a simple stream of text, a log should be serialized as a stream of events on a tree, e.g. add and remove nodes. Non-deterministic choice adds another dimension to this tree, at least insofar as options are evaluated in parallel. 162 | 163 | Messages are computed within hierarchical transactions that are aborted after extracting the message. MsgSel may destructively inspect registers and data stack in scope, or invoke atomic operations as a 'what if'. However, data stack is a special case: it is in scope only when Operation is `%pass`, in which case MsgSel has the same access as the surrounding Program. 164 | 165 | The runtime provides the Env argument. Exact contents depend on runtime version and configuration-provided adapters, but should de facto stabilize. The role of Env is to provide reflection APIs, e.g. to read %src, inspect the call stack or data stack, or query configured Chan settings. Configured Chan settings are fully arbitrary, e.g. preferred language, accepted format, level of detail, and degree of sarcasm. Many queries can be completed at compile-time, allowing for partial evaluation and specialization of logging code. 166 | 167 | If MsgSel uses non-deterministic choice, the runtime may generate all possible messages (subject to configuration). With runtime support, users may configure a non-deterministic choice of Chan settings, such that we log multiple versions of messages. 168 | 169 | ### Tracing (TBD) 170 | 171 | a:((%an.trace Chan BucketSel), Operation) 172 | 173 | We can ask the runtime to record sufficient information to replay a computation. This is expensive, so we might configure tracing (per Chan) to perform random samples or something, switching between traced and untraced versions of the code. 174 | 175 | What information do we need? 176 | 177 | * input registers - initial, updates after yield 178 | * input register updates and return values from calling untraced methods 179 | * stream of non-deterministic choices and scheduling for replay 180 | * distinguish backtracked choices to allow skipping them 181 | * for long-running traces, heuristic checkpoints for timeline scrubbing 182 | * for convenience, complete representation of subprogram being traced 183 | * content-addressed for structure sharing 184 | * consider adding contextual stack of log messages etc. 185 | 186 | I think this won't be easy to implement, but it may be worthwhile. 187 | 188 | BucketSel is just a means to conditionally disable tracing. Similar to MsgSel except only evaluated once, and the returned bucket(s) are simply dynamic indices for lookup. 189 | 190 | ### Debug Views 191 | 192 | An intriguing opportunity: *interactive views* of running code. 193 | 194 | (%an.view Chan Viewer) 195 | 196 | Viewer may have type `Env -> Program` where the Env includes both channel configuration options and a view context of callbacks and registers. View callbacks support ad-hoc queries (level of detail, user preferences, content-negotiation) and a stream of writes (graphics and texts, GUI update commands, etc.). View registers are opaque to the user but held across requests, supporting persistence of navigation, progressive disclosure, or even retained-mode GUI (by tracking what has already been written). A client may fork, checkpoint, or freeze the view by controlling context. 197 | 198 | Like logging, the viewer program runs in a hierarchical transaction. By default, updates to the application are undone after the program returns, while updates to the view context are retained. However, we can introduce a 'commit' callback in Env to change behavior on a per-call basis. This essentially enables editing of local registers in a running application through integrated debug views. Such edits may be rejected, e.g. because the user doesn't agree, or due to read-write conflict with concurrent operations. The [Glas GUI](GlasGUI.md) design document describes some relevant patterns. 199 | 200 | The Chan can also serve a role of naming a view for discovery and integration. The compiler can warn if there is more than one view per Chan within an application. An application may serve as its own client through a reflection API (perhaps sys.refl.view.\*), thus serving debug views through non-debugger interfaces. 201 | 202 | ### Accelerators 203 | 204 | (%an.accel (%accel.OpName Args)) 205 | 206 | *Convention:* For pure representation transforms, such as asking a runtime to represent a list as an array under the hood, I express this as "acceleration" of a no-op. Representation transforms are naturally slower than a no-op, but exist only to support other accelerators. 207 | 208 | List Ops: 209 | * len 210 | * append 211 | * split 212 | * index get/set/swap 213 | 214 | Dict Ops: 215 | * insert 216 | * remove 217 | * count 218 | * keys 219 | 220 | Bitstring Ops: 221 | * len 222 | * invert 223 | * reverse 224 | * split 225 | * append 226 | 227 | Arithmetic 228 | * Sum 229 | * Product 230 | * Negation 231 | * Reciprocal 232 | 233 | Register Ops: 234 | 235 | * Cell 236 | * Get 237 | * Set 238 | * Queue 239 | * Read 240 | * Peek 241 | * Unread 242 | * Write 243 | * Bag 244 | * Put 245 | * Grab 246 | * Peek 247 | * KVDB, implicit 'register' per key but dynamic 248 | 249 | ## TBD 250 | 251 | 252 | ### In-Place Update? Defer. 253 | 254 | It is possible to support in-place update of 'immutable' data if we hold the only reference to its representation. This can be understood as an opportunistic optimization of garbage-collection: allocate, transfer, and collect in one step. In glas programs, this would be feasible with accelerators, such as a list update operator could swap a list element without reallocatng the list. This is especially useful if the list is represented by an array. 255 | 256 | However, pervasive use of transactions and backtracking complicates this optimization. It is convenient to capture a snapshot of registers so we can revert if necessary. Although this snapshot isn't a logical copy and thus doesn't conflict with linear types, it is a shared representation and thus does hinder in-place update. 257 | 258 | A viable alternative is to maintain a 'log' of updates to apply later. For example, a runtime could feasibly represent the updated list as a special `(update log, original list ref)` pair within runtime. This might generalize to [log-structured merge-tree (LSM trees)](https://en.wikipedia.org/wiki/Log-structured_merge-tree) [ropes](https://en.wikipedia.org/wiki/Rope_(data_structure)). 259 | 260 | This doesn't quite support the ideal of in-place update. We must allocate that log, and perhaps some metadata to track elements to process further upon commit. But perhaps we can still perform in-place update upon commit, and benefit from editing nearer to the tree root. This seems a viable approach. 261 | 262 | Meanwhile, we'll still support decent persistent data structures by default, e.g. finger-tree ropes still support O(log(N)) updates in the center, O(1) at the edges, and we can easily use a pair as a gap buffer. 263 | 264 | ### Tail Call Optimization 265 | 266 | I'd suggest unrolling a recursive loop a few frames then determining whether we can 'recycle' the stack locations. Ideally, TCO can be enforced via annotations, e.g. by specifying that a subprogram has a finite stack, or that an `Env -> Prog` is finite-up-to Env. 267 | 268 | ### Unit Types 269 | 270 | Attaching unit-types to number representations is inefficient. Recording them into type annotations makes units difficult to use, e.g. for printing values. An interesting possibility, however, is to track units for *registers*, aiming for static computation of unit registers. We could use '%assoc' to associate unit registers with a number register. 271 | 272 | ### Memoization 273 | 274 | In context of procedural programming, memoization involves recording a trace. This trace describes the computation performed (perhaps via hash), the data observed, and outputs written. To execute a memoized computation, we search for a matching trace then write the outputs directly. If no matching trace is found, we run the computation while recording the trace, then add it to the cache. 275 | 276 | We can improve memoization by making the traces more widely applicable, abstracting irrelevant details. For example, we might observe that a register contains 42, but a trace might match so long as a register value is greater than zero. 277 | 278 | However, even the simplest of traces can be useful if users are careful about where they apply memoization. We can memoize a subset of procedures that represent "pure" expressions or functions to support incremental compilation, monoidal indexing of structure, and similar use cases. 279 | 280 | ### Lazy Computation 281 | 282 | To get started with a simple implementation, I propose explicit thunks of 1--1 arity, pure (but optionally non-deterministic), atomic computations. Computation may fail or diverge, in which case forcing the thunk will diverge. 283 | 284 | In case of non-deterministic lazy computations, the outcome remains non-deterministic until a thread commits AFTER force. This allows for expression of lazy choice, lazy entanglement, and searching outcomes. 285 | 286 | Eventually, we might extend laziness to multiple stack inputs or read-only snapshots of registers. But doing so is difficult with explicit thunks. And I don't feel comfortable with a move to implicit thunks without proofs of timely (e.g. polynomial) termination, static analysis of linear type dataflow, and similar features. 287 | 288 | *Note:* Because lazy annotations influence observation of divergence, I'm tempted to move from annotations to primitives. 289 | 290 | ### Futures, Promises, Channels 291 | 292 | It isn't difficult to extend laziness with explicit 'holes', i.e. such that a program can allocate a `(future, promise)` pair. We'll need some integration with linear and non-linear data, i.e. allowing for linear and non-linear futures. This extends very naturally to channels, e.g. via including another future in the result, or by assigning a sequence of values to a promise. 293 | 294 | It isn't difficult to present holes as a program primitive. But holes are fundamentally impure: they introduce identity. I think it's probably better to model them as part of a runtime-provided effects API. 295 | 296 | ### Accelerators 297 | 298 | Essentially, primitives with a reference implementation. 299 | 300 | (%an (%an.accel (%accel.OpName Args)) Op) 301 | 302 | Accelerators ask a compiler or interpreter to replace Op with an equivalent built-in implementation. The built-in should offer a significant performance advantage, e.g. the opportunity to leverage data representations, CPU bit-banging, SIMD, GPGPU, etc.. Arguments to an accelerator may support specialization or integration. 303 | 304 | Ideally, the compiler or interpreter should verify equivalence between Op and Accelerator through analysis or testing. However, especially in early development and experimentation phases, it can be awkward to maintain Op and Accelerator together. During this period, we may accept `()` or an undefined name as a placeholder, emitting a TODO warning. 305 | 306 | Accelerators support 'performance primitives' without introducing semantic primitives. If we build upon a minimalist set of semantic primitives, we'll be relying on accelerators for arithmetic, large lists, and many other use cases. 307 | 308 | ### Breakpoints 309 | 310 | (%an.bp Chan BucketSel) 311 | 312 | We could feasibly annotate conditional breakpoints into a program. Ideally, we'll integrate the notion of overlay breakpoints, e.g. breaking when conditions are met. 313 | 314 | Not sure exactly what I want here, however. 315 | 316 | ### Content-Addressed Storage 317 | 318 | Annotations can transparently guide use of content-addressed storage for large data. The actual transition to content-addressed storage may be transparently handled by a garbage collector. Access to representation details may be available through reflection APIs but should not be primitive or pervasive. 319 | 320 | ### Environment Abstraction 321 | 322 | Instead of only abstracting data, it can be useful to abstract volumes of the environment. This allows us to develop APIs where the client provides a location, but cannot access the associated data. 323 | 324 | Modeling this in names is too awkward. However, it is feasible to introduce a corollary to '%local' for binding associated names. In this case, our goal is to draw an 'arc' between two registers and treat it as a new prefix of registers. 325 | 326 | ### Type Descriptions 327 | 328 | (%an (%an.type TypeDesc) Op) 329 | 330 | We can just invent some primitive type descriptions like '%type.int' or whatever, things a typechecker is expected to understand without saying, and build up from there. It isn't a big deal if we want to experiment with alternatives later. 331 | 332 | Some thoughts: 333 | - Instead of hard-coding a few types like 'i64' or 'u8', consider an `(%type.int.range 0 255)` or similar. This would allow for more flexible packed representations and precise tracking of precision across arithmetic steps, e.g. adding u8 and u8 has range 0 to 510 (not quite a u9), and we can require explicit modulus or conditionals to store result back into a u8 334 | - we could feasibly use 'int range' types as parameters to list length types, to support vectors of exactly one size (range 32 to 32) or more sizes. 335 | - obviously need a const type that supports only a single value, too. 336 | 337 | ### Reflection, Transpilation, Alternative Models 338 | 339 | The program model provides a foundation for glas systems, but I'm interested in exploring alternative foundations and support for compilation between them. As I see it, there are a few opportunities here: 340 | 341 | - Reflection APIs: a runtime or compile-time could provide some APIs to inspect definitions. 342 | - However, this solution seems semantically troublesome because we'd either be observing recursive definitions *after* substitution of names for definitions is applied, or be observing some runtime-specific internal representation. 343 | - Quotation APIs: a front-end language can support quoting of definitions or expressions into an embedded data representation of an AST. Further, it could support quoted imports, i.e. load binary source code or compile to the AST representation of `Env->Env` without evaluating it. 344 | - This solution is logistically troublesome. We'll need some way to efficiently cache definitions for macro evaluation when building a larger namespace. 345 | 346 | Of these options, I think a foundation of quotation APIs offers the more robust solution. We can tackle logistical challenges by essentially integrating a compiler as a shared library and some clever use of caching and acceleration. 347 | 348 | -------------------------------------------------------------------------------- /docs/GlasApps.md: -------------------------------------------------------------------------------- 1 | # Glas Applications 2 | 3 | The [glas executable](GlasCLI.md) lets users run an application defined in the configuration namespace or a separate script file. To simplify extension and composition, each application is packaged into a single definition. To simplify sharing and integration, applications are named 'app' or 'env.appname.app'. The latter supports many applications to be named and defined within a user configuration. 4 | 5 | ## Application Models 6 | 7 | Basic applications are modeled as [namespace-layer](GlasNamespaces.md) `Env -> Env` functions, tagged "app". The input Env represents a runtime-provided effects API ('sys.\*' and global state) and an open fixpoint ('app.\*'), while the returned Env represents a collection of application methods. Most basic applications should implement at least 'main', 'settings', and 'http'. Sample methods: 8 | 9 | * 'main' - a program, the standard entry point. Upon return, the application halts. 10 | * 'settings' - queried to guide integration and application-specific configuration. 11 | * 'http' - a flexible interface with many use cases (services, browser-based gui, etc.). The runtime opens a configurable port multiplexed with remote procedure calls. 12 | * *Note:* runtimes may reserve `"/sys/*"` (configurable) for debugger and administrative use. 13 | * 'rpc' - (tentative) receive remote procedure calls, more advanced protocol than HTTP (e.g. to integrate with distributed transactions, algebraic effects, and content-distribution networks). 14 | * 'gui' - see [Glas GUI](GlasGUI.md), i.e. GUIs integrated with transaction model 15 | * 'signal' - special administrative signals, e.g. to gracefully halt or hibernate 16 | * 'switch' - first operation on new code in context of live coding 17 | 18 | Use of the 'main' procedure is a familiar convention for defining applications. Separation of 'http', 'rpc', and 'gui' methods simplifies multiplexing, composition, and persistence (compared to manually opening listeners). The open fixpoint 'app.\*' contributes to extensibility, e.g. single-inheritance overrides and mixins. The 'settings', 'signal', and 'switch' operations simplify integration. 19 | 20 | This set of methods is extensible. For portability reasons, 'settings' should indicate which methods the runtime is expected to recognize and integrate. But we can also develop entirely distinct application models, recognizing tags other than "app". For example, it is feasible to model applications as constraint-logic systems, generative grammars, process networks, interaction nets, or hardware description language. 21 | 22 | ### Application Adapter 23 | 24 | The user configuration may define an application adapter. The adapter applies to an application's definition before compiling and running it. This is convenient for portability, e.g. we could adapt application 'settings' to the runtime version, or adapt a runtime effects API to the application. 25 | 26 | But an important use case is support for user-defined application models, e.g. users introduce tag "kpn" for [Kahn process networks](https://en.wikipedia.org/wiki/Kahn_process_networks), the adapter can compile to a basic "app" if the runtime does not have built-in support for "kpn". This enables users to 'run' ad hoc application models without manually wrapping them. 27 | 28 | ### Staged Applications? TBD. 29 | 30 | A runtime can feasibly support 'staged' application models, e.g. where an application is further compiled based on command-line arguments or environment variables. This conflicts with ahead-of-time compilation, requiring the runtime include an interpreter or just-in-time compiler. The main benefit is flexibility. 31 | 32 | ## Runtime-Provided Effects 33 | 34 | The runtime provides an initial namespace of registers and methods to a basic application: 35 | 36 | * 'sys.\*' - system APIs, e.g. network, filesystem, clock, FFI, reflection 37 | * 'db.\*' - shared, persistent registers, bound to a configured database 38 | * 'g.\*' - ephemeral, 'global' registers bound to runtime instance 39 | 40 | These are provided in the input Env (alongside 'app.\*' for the open fixpoint). The 'sys.\*' APIs are described below, while 'db.\*' and 'g.\*' provide some toplevel state visible to 'http' and other methods. Although these model global state, it is possible to scope access and partition application state between subcomponents (assuming support from the front-end compiler). 41 | 42 | ## State 43 | 44 | The program model has built-in support for registers, and the application receives a few volumes of registers as 'db.\*' and 'g.\*'. But it isn't difficult to support first-class references to mutable state (like Haskell's IORef). A viable API: 45 | 46 | * `sys.state.ref.*` - (tentative) first-class state 47 | * `new(Data) : Ref` - new reference initially containing Data; runtime-ephemeral 48 | * `db.new(Data) : Ref` - as 'new' but backed by the database; database-ephemeral 49 | * `with(Ref) : [op]` - pop Ref from stack, run 'op' with access as register 'ref' 50 | 51 | Relative to second-class registers, references complicate static dataflow and conflict analysis, linear type safety, and garbage collection. They also introduce a source of hysteresis or path dependence for data schema change, e.g. in context of live coding. For these reasons, my vision for glas systems favors registers over references, and I do not provide references as a program primitive. 52 | 53 | ## Concurrency 54 | 55 | Concurrency is built into the program model (non-deterministic coroutines, optimistic concurrency control), thus no separate effects API is required. But we can discuss some interesting patterns. 56 | 57 | ### Transaction Loops 58 | 59 | It is feasible to express an application, or a significant part of it, as a transaction loop where the same transaction is performed repeatedly with some non-deterministic choice. 60 | 61 | while (Cond) do { atomic (choice ...); yield } 62 | 63 | Isolated transactions are equivalent to sequential transactions. Thus, we can implement this loop by running many cycles simultaneously, each 'thread' handling a different non-deterministic choice. We can continue running the loop concurrently until Cond fails on some or all non-deterministic choices, then proceed to whatever operation follows the loop. 64 | 65 | Instead of fully recomputing a transaction on every cycle, we can introduce checkpoints for partial rollback. With careful design, each choice has a stable prefix that is cached, so we're actually looping only the unstable suffix. In case of unproductive loops (i.e. failed, diverged, or idempotent), the runtime may wait for relevant state changes before recomputing, modeling reactive systems. 66 | 67 | Unfortunately, implementation of these optimizations is a daunting task. This opportunity is not easy to grasp. My vision for glas systems benefits enormously from transaction-loop optimizations, but short term we will rely on the more conventional coroutines. 68 | 69 | ### Distribution 70 | 71 | I envision a 'runtime' distributed across networked node, and an application running upon it. This requires compatible design of effects APIs, e.g. supporting multiple filesystems, network cards, and clocks. 72 | 73 | In the worst case, we can run every application step in a distributed transaction. However, this is terribly slow and fragile to network faults. To effectively leverage a distributed runtime, we must architect applications such that most steps run on one node, and most remaining steps on two, with very few transactions touching three or more. 74 | 75 | Behavior can be distributed. Coroutines can migrate based on which physical resources a current step is accessing. A non-deterministic transaction loop can mirror choices where locality is irrelevant, and partition choices where locality is relevant. 76 | 77 | State can be distributed. Read-mostly registers can be mirrored, with updates propagated in a wavefront. Other registers may migrate to their users. Of notable interest are queues, bags, and CRDTs: 78 | 79 | * *queues* - modeled by a register containing a list. Reader takes from one end. Writer pushes to the other. (For convenience, a reader may also 'unread' data.) A runtime can split the register between reader and writer nodes, and migrate writes as part of batched node sync. 80 | * *bags* - modeled by a register containing a list. Reader removes a non-deterministic element. Writer inserts an element non-deterministically. A runtime can split the register across all nodes, each may read and write. Data migrates heuristically between nodes. 81 | * *CRDTs* (Conflict-free Replicated Data Types) - a family of types, so pick a few useful ones. A runtime can split the register such that each node maintains a local replica. Replicas are synchronized as part of node sync (we still want isolated transactions, not weaker eventual consistency). 82 | 83 | The runtime may recognize queues, bags, and CRDTs based on annotations, especially acceleration. 84 | 85 | *Note:* It is possible to change data usage patterns at runtime. Doing so generally requires a distributed transaction to rebuild the 'complete' value. But specific cases such as queue to bag may be trivial. 86 | 87 | ### Live Coding 88 | 89 | My vision for glas systems involves code being updated at runtime. Logically, code updates can be applied atomically, between %yield steps. Even in case of a distributed runtime, we can support a wavefront consistent with transactions, like updating mirrored state. 90 | 91 | Unfortunately, anonymous control-flow state, e.g. current continuation of 'main', is difficult to robustly translate. Favoring predictable update, we instead modify only named function calls. But we can recompile and typecheck the continuation in context of updated functions, seeking safe transition points. 92 | 93 | Programmers can design with live coding in mind. For example, they may favor tail-recursive loops as more amenable to live coding than a '%loop' structure for a long-running loop. To further support robust transition of code, we run 'switch' as the first operation in the updated code, retrying as needed. This provides an opportunity to defer transition or explicitly manage critical state. 94 | 95 | ## Futures and Promises (Tentative) 96 | 97 | A useful pattern for asynchronous and concurrent interaction is construction of `(Future, Promise)` pairs. The promise is linear and represents a single-assignment reference. The promised data is readable through the future. Ideally, holding the future is equivalent to holding the promised data modulo reflection APIs, thus futures are linear unless we guarantee promised data is non-linear. 98 | 99 | Compared to full references, futures and promises have simpler interaction static dataflow analysis, linear types, and garbage collection. Of course, futures and promises are also less flexible, but users can model channels, e.g. `type Chan = Future<(T, Chan)|()>`, or more sophisticated structures to support most asynchronous interactions. 100 | 101 | A viable API: 102 | * `sys.promise.*` - 103 | * `new : (Promise, Future)` - returns an associated promise and future pair, runtime-ephemeral. The promise is linear, but future is non-linear. Writing linear data to the promise is a type error, diverging at runtime if detected at runtime. 104 | * `new.linear : (Promise, Future)` - As `new` but with a linear future and the promise accepts linear data when written. 105 | * `read(Future) : T` - await a future. This diverges unless the associated promise is assigned. 106 | * `write(Promise, T)` - assign a promise. This data becomes available within the current transaction, but only becomes visible outside the current transaction after commit. 107 | * `sys.refl.promise.*` - 108 | * `called(Promise) : Promise | FAIL` - returns argument only if the promise has been 'called', i.e. if it seems there is current demand for the promised data. Monotonic: will implicitly 'call' the associated future in case we're observing temporary demand. 109 | * `call(Future) : Future` - This marks a Future as in-demand for purpose of a `called` check. Idempotent and monotonic: once committed, 'called' will always pass, and 'forgotten' will always fail. 110 | * `forgotten(Promise) : () | FAIL` - allows dropping a promise if the future will not be observed. This relies on a garbage collector to decide when the future has fallen from scope. 111 | * `fulfilled(Future) : Future | FAIL` - returns argument if the future is immediately available, i.e. such that 'read' returns immediately and does not diverge. This allows roughly observing the timing for when a promise is fulfilled. 112 | 113 | In theory, we can also support database-ephemeral futures and promises. I do not recommend this because it interacts very awkwardly with network disruption or node failure. Thus, futures and promises are currently restricted to the runtime (albeit, permitting a distributed runtime). 114 | 115 | *Note:* It is feasible to implement futures and promises in terms of mutable references, but we lose out on a few runtime optimizations based on the single assignment constraint. 116 | 117 | ## HTTP 118 | 119 | The 'http' method receives HTTP requests not intercepted by the runtime. 120 | 121 | Instead of operating on a raw binary, this receives an environment of methods from the runtime providing features to swiftly route on the URL and access headers, and also write a valid, structured response. For details, I intend to borrow inspiration from the huge range of existing web server frameworks. 122 | 123 | The 'http' method is not implicitly atomic, but it's convenient if most requests are atomic. Atomic requests are both more RESTful and more widely accessible. 124 | 125 | *Aside:* Based on application settings and user configuration, we could automatically open a browser window after the application starts to provide a GUI. 126 | 127 | *Note:* I am contemplating an alternative API. Instead of a toplevel 'http' method that handles routing to component methods, it seems feasible to 128 | 129 | ## Remote Procedure Calls (RPC) 130 | 131 | A significant benefit of built-in RPC in glas systems is the opportunity to integrate with transactions, transaction-loop optimizations, and my vision for GUI. But, short term, we can integrate conventional RPC without transactions. 132 | 133 | A viable API: 134 | 135 | rpc(MethodRef, Argument) : [cb?, bind] Result 136 | cb(Argument) : [cb?] Result 137 | bind(MethodRef) : MethodURL 138 | 139 | sys.rpc.bind(MethodRef) : MethodURL 140 | sys.rpc(MethodURL, Argument) : [cb?] Result 141 | 142 | types MethodRef, Argument, Result = plain old data 143 | type MethodURL = friendly URL text, full URL 144 | # friendly: no spaces or quotes, balanced parens, etc. 145 | 146 | MethodRef is application-provided data that supports routing, context, and a foundation for [capability-based security](https://en.wikipedia.org/wiki/Capability-based_security). The client calls an unforgeable URL, protected from tampering by cryptographic means such as HMAC signature. The optional 'cb' method supports flexible interactions with the caller before returning. 147 | 148 | This API does not support discovery. It's left to the application to publish the MethodURLs. 149 | 150 | *Aside:* MethodURL does not have a canonical representation. Each runtime may use its own encoding, compression, encryption or signature, etc.. Regardless of encoding, it's opaque in the normal mode of use. The only critical features are being unforgeable, and stable enough to not significantly harm runtime-level incremental computing. 151 | 152 | ### Relative Bind for Composition 153 | 154 | The 'bind' method provided to 'rpc' initially links to 'sys.rpc.bind'. However, within a composite application, we can intercept 'bind' to wrap a MethodRef to better support routing and other features. Essentially, 'bind' is relative while 'sys.rpc.bind' is absolute. 155 | 156 | ### Revocation 157 | 158 | Users can implement revocable capabilities by including expiration times or lookup keys in MethodRef. Expiration is obvious. In case of lookup keys, the capability is disabled if the lookup fails, and we also can conveniently store a large or mutable context with a small, stable MethodURL. 159 | 160 | We can also revoke MethodURLs by changing the cryptographic secret so they no longer authenticate. This doesn't need to be all-or-nothing. In practice, we might wish to rotate secrets so old ones remain available for several hours. A minimum viable API: 161 | 162 | sys.refl.rpc.secret.max(N) # set how many secrets to rotate 163 | sys.refl.rpc.secret.update(Binary) # use a secret (random if empty) 164 | 165 | Note that this doesn't allow the app to query its own secrets. The provided secret may be mangled in memory, e.g. storing a secure hash. However, it is feasible to support persistent secrets and thereby support persistent MethodURLs. 166 | 167 | ### Implementation 168 | 169 | POST /sys/m/encoded-methodref/sig HTTP/1.1 170 | 171 | The earliest implementations of RPC might simply use HTTP. The callback method could be supported by a URL back to the caller, or via special headers in the response to indicate a callback instead of a final response. It is feasible - with clever encoding - to eventually support transactions, to support lazy loading of content-addressed data and reference to content-delivery networks. 172 | 173 | I eventually will want a protocol that is more friendly for callbacks, transactions, transaction-loop optimizations, multiplexing, content-addressed data and integration with content-delivery networks, etc.. But, with HTTP, I can get something working immediately. 174 | 175 | In an HTTP-based implementation, callbacks will likely be represented by runtime-internal MethodURLs, and must be revoked by the runtime when the 'cb' falls out of scope. However, a dedicated protocol should have a built-in notion of lexically scoped callbacks, avoiding that overhead. 176 | 177 | *Note:* Until transactions are supported, 'sys.rpc' should be marked with the '%an.atomic.reject' annotation. 178 | 179 | ### Code Distribution 180 | 181 | A round trip per call or callback adds a lot of latency to RPC. This latency encourages development of batch methods that do more work per trip. This easily leads to frustrating APIs with too many options that are never exactly what is needed. The natural endpoint of this evolution is to support all the options, by sending a script that is interpreted remotely. 182 | 183 | Better to skip the frustration. Implement every RPC API with at least one adequate scripting interface from early on. 184 | 185 | An intriguing possibility is to compile RPC methods into scripts that partially run locally on the caller. Similarly, compile callbacks that partially run remotely. We can develop a MethodURL schema that supports scripting. And a pipeline could partially be encoded into a callback. 186 | 187 | ## Graphical User Interface (GUI) 188 | 189 | I have an interesting [vision for GUI](GlasGUI.md) in glas systems, but it's contingent on those transaction-loop optimizations, and it will be experimental even then. Until then, use FFI for native GUI or 'http' for browser-based GUI. 190 | 191 | ## Background Calls - Transaction Escape Hatch 192 | 193 | For safe operations with cacheable results, such as HTTP GET, it is often convenient to pretend that we already acquired the data before the current transaction. This pretense can be supported via reflection APIs that logically insert an operation before the current transaction. 194 | 195 | Proposed API: 196 | 197 | sys.refl.bgcall(Argument) : [op] Result 198 | op(Argument) : [canceled] Result 199 | canceled() # pass/fail 200 | # constraint: Argument and Result are non-linear 201 | 202 | sys.refl.bgcall.async(Argument) : [op] Future 203 | # asynchronous variant of bgcall, immediately returns 204 | 205 | In this case, the caller provides an 'op' to evaluate in a separate coroutine. That coroutine will run just within scope of op, processing Argument and returning Result. The op does not need to be atomic: it may freely yield, e.g. to await an HTTP response. After completion, Result is then returned to the caller. 206 | 207 | In context of interruption, the runtime does not forcibly cancel the operation. Instead, the background operation tests for 'canceled' at its own discretion. This is a simple pass/fail, passing if there is no demand on Result. Cancellation is weakly monotonic: if observed *and* the observing transaction commits, all future 'canceled' tests will pass, and the final Result is treated as garbage and dropped. 208 | 209 | However, while cancellation is not observed - or if the observer does not commit (enabling developers to model timeouts) - a runtime may opportunistically bind multiple requests to the same Result based on matching 'op' and Argument. This supports re-attach after rollback, but it also enables stable 'bgcall' ops to serve as a publish/subscribe query of sorts. 210 | 211 | Some notes: 212 | - It is possible the background operation itself has a read-write conflict with the caller. There is risk of thrashing. Fortunately, this is relatively easy to detect and debug. 213 | - The runtime Result cache is ephemeral, short-lived. Anything more stable must be maintained by the background operation, either manually or via memo annotations. 214 | - In context of stable, non-deterministic bgcalls, a runtime may freely evaluate every non-deterministic path and return non-deterministic Results. This integrates nicely with transaction loops. 215 | - Aside from 'safe' read-only queries, bgcall is useful for demand-driven triggering of background tasks. Operations need only be 'safe' in the limited sense that side-effects are acceptable after caller aborts. 216 | 217 | ## Foreign Function Interface (FFI) 218 | 219 | I propose a pipelined FFI model. A transaction builds a stream of commands to be handled by a non-transactional FFI thread. The FFI thread interprets this stream, loading libraries, calling functions, reading memory, perhaps JIT-compiling C code so we can directly express composite operations. Results are observed in a future transaction through a queue. 220 | 221 | A viable API: 222 | 223 | TypeHint: 224 | p - pointer (void*) 225 | y,Y - int8, uint8 226 | s,S - int16, uint16 227 | w,W - int32, uint32 228 | q,Q - int64, uint64 229 | i,I - int, unsigned int 230 | Z - size_t 231 | f - float 232 | d - double 233 | 234 | * `sys.ffi.*` - 235 | * `create(Hints) : [ffi] ()` - create an FFI thread bound to register 'ffi' but abstracted. Error if location is already in use. 236 | * With runtime support and appropriate hints, a separate FFI process is also feasible. 237 | * In a distributed runtime, hints would determine which node owns the FFI thread. 238 | * `fork() : [src,dst]` - duplicate an FFI thread from src into dst. This sends a command to duplicate thread-local state. The results queue will be duplicated for commands sprior to fork. 239 | * `close() : [ffi] ()` - sends a command to terminate the FFI thread, and clears the local 'ffi' state. This does not immediately halt the FFI thread. 240 | * Note: We might introduce methods in 'sys.refl.ffi' to browse and kill FFI threads. 241 | * `status() : [ffi] FFIStatus` - recent status of FFI thread: 242 | * *future* - FFI thread doesn't fully exist yet, newly created. 243 | * *ready* - FFI thread is awaiting commands, all prior commands complete. 244 | * *busy* - ongoing activity, still processing prior commands. 245 | * *error:(text:Message, code:Integer, ...)* - FFI thread is halted in a bad state. Unrecoverable without reflection APIs. 246 | * `link.lib(SharedObject) : [ffi] ()` - load a ".dll" or ".so" file. When looking up a symbol, last linked is first searched. 247 | * `link.hdr(Name, Text) : [ffi] ()` - redirects `#include` to `Text` in context of C JIT. 248 | * `link.src(Text) : [ffi] ()` - JIT-compile C source and link (e.g. via Tiny C Compiler). 249 | * `call(Symbol, TypeHint) : [ffi] ()` - call a previously linked symbol. Parameters and results are taken from the thread's data stack, and the return value is pushed backk. TypeHint for `int (*)(float, size_t, void*)` is `"fZp-i"`. In this case, float 'p' should be at top of stack to match C calling conventions. 250 | * Void type is elided, e.g. TypeHint for `void (*)()` is simply `"-"`. 251 | * `script(Text, Symbol, TypeHint) : [ffi] ()` - one-off JIT and call symbol. 252 | * `mem.write(Binary) : [ffi] ()` - (type `"p-"`) send command to write a binary to a pointer found on the FFI thread's data stack. 253 | * `mem.read() : [ffi] ()` - (type `"pZ-"`) given a pointer and size on the data stack, return a binary via the result stream. 254 | * `push(List of Data, TypeHint) : [ffi] ()` - send command to push data to FFI thread's data stack. TypeHint determines conversions, e.g. `"fZp"` may receive glas representations of a rational, an integer, and an abstract pointer in that order, i.e. pointer is last element. 255 | * `peek(N) : [ffi] ()` - query a list of N items from the data stack. The FFI data stack tracks types, so no need to provide them. Notes: 256 | * N=0 returns empty list, useful to await for prior operations to complete. 257 | * floating-point NaNs and infinities aren't supported, result in error status. 258 | * order is consistent with push, i.e. last item was top of FFI thread data stack. 259 | * `move(Text)` - ad hoc stack manipulation, described visually. E.g. Text `"abcd-cdabb"` will swap two pairs of data then copy the new top item. Limited to 'a-z' and each may appear at most once in LHS. 260 | * `stash(N) : [ffi] ()` - move top N items from data stack to top of auxilliary stack, called stash. Order is same as repeating `stash(1)` N times, i.e. inverting order onto stash. If N is negative, moves data from stash to stack instead. 261 | * *Note:* The 'stash' op is intended to serve a role similar to %dip, hiding the top of the data stack until some operations complete. 262 | * *registers* - TBD. Maybe just support a register per upper-case character? Not a priority. 263 | * `results.read(N) : [ffi] (List of Data)` - read and remove N results from the results queue. First result is head of list. Diverges if insufficient data. 264 | * `results.unread(List of Data) : [ffi] ()` - push a list back into results for future reads. 265 | * `results.peek(N) : [ffi] (List of Data)` - as read, copy, unread. 266 | * `ptr.*` - a safety on a footgun. Ptr is an abstract data, and may only be shared between FFI threads ultimately forked from the same 'create' unless 'addr' is used at one and 'cast' at the other. 267 | * `addr(Ptr) : [ffi] Int` - view pointer as an integer (via intptr_t). Error if Ptr and FFI thread have different origin 'create'. 268 | * `cast(Int) : [ffi] Ptr` - treat any integer as a pointer 269 | * `null() : Ptr` - pointer with 0 addr is a special case, accepted by any FFI 270 | * `sys.ffi.pack() : [ffi] FFI` - package FFI thread into an abstract, linear object. 271 | * `sys.ffi.unpack(FFI) : [ffi] ()` - rebind a previously packaged FFI thread. 272 | * `sys.refl.ffi.*` - *TBD* perhaps debugging, browsing, CPU usage, force kill 273 | 274 | This API is designed assuming use of [libffi](https://en.wikipedia.org/wiki/Libffi) and TinyCC. We'll need the [version of TinyCC](https://github.com/frida/tinycc/tree/main) that supports callbacks for includes and linking. 275 | 276 | This kind of API can be adapted to other targets, e.g. JVM, .NET, or JavaScript. 277 | 278 | Potential extensions: 279 | * support for structs, e.g. `"{ysw}"` 280 | * or just use JIT for this. 281 | 282 | *Note:* Full orthogonal persistence of FFI seems infeasible, but FFI as a pipe to a separate thread or process at least can clearly indicate disruption. Ideally, FFI-based APIs should be designed to recover resiliently after a disconnect, so we can support orthogonal persistence later. 283 | 284 | ## Regarding Filesystem, Network, Native GUI, Etc. 285 | 286 | I'm hoping to build most APIs above FFI and bgcall, reducing the development burden on the runtime. We should stick with the 'unpacked linear object' concept instead of references in each case. 287 | 288 | ## Time 289 | 290 | Query the system clock. 291 | 292 | * `sys.time.now() : TimeStamp` - Returns a TimeStamp for estimated time of commit. By default, this timestamp is a rational number of seconds since Jan 1, 1601 UTC, i.e. the Windows NT epoch but with arbitrary precision. 293 | * `sys.time.after(TimeStamp)` - fails unless `sys.time.now() >= TimeStamp`. Use this if waiting on the clock, as it provides the runtime a clear hint for how long to wait. 294 | 295 | It is possible to wait on a clock and model sleeps, but not within a single transaction. Atomicity is semantic or logical instantaneity. Thus, 'yield' is always required. We can acquire time in one transaction, yield, and await that timestamp plus a sleep duration within another transaction. Timeouts can then be expressed as a non-deterministic choice between awaiting the clock and another operation. 296 | 297 | Later, when we develop distributed runtimes, we'll want to extend this API to support multiple clocks. Otherwise, semantics get weird due to observing clock drift on "the same" clock. Perhaps `"sys.clock.time.now() : [clock] TimeStamp"` plus clock creation and so on. With multiple clocks, we could reasonably argue that `sys.time` represents a non-deterministic choice of clocks, allowing best effort with drift. 298 | 299 | ## Arguments and Environment Variables 300 | 301 | A runtime can easily provide access to OS environment variables and command-line arguments. 302 | 303 | * `sys.env.list : List of Text` - return the defined environment variables 304 | * `sys.env.get(Text) : Text` - return value for an OS environment variable 305 | * `sys.env.args : List of Text` - return the command-line arguments 306 | * `sys.env.arg(Index) : Text` - access individual args (may simplify caching staged apps) 307 | 308 | These will simply be read-only within an application, but users could intercept 'sys.env.\*' methods when calling a subprogram. 309 | 310 | *Note:* Applications integrate the configuration environment at compile time through the namespace layer, '%env.\*'. 311 | 312 | ## Console IO 313 | 314 | With users launching glas applications from a command-line interface, it is convenient to support user interaction directly through the same interface. The basics are just reading and writing some text, but it is possible to disable line buffering and input echo then implement sophisticated applications via [ANSI escape codes](https://en.wikipedia.org/wiki/ANSI_escape_code) or extended protocols. 315 | 316 | A viable API: 317 | 318 | * `sys.tty.write(Binary)` - write to standard output, buffered until commit. 319 | * `sys.tty.read(N) : Binary` - read from standard input. Diverges if not enough data. 320 | * `sys.tty.unread(Binary)` - add Binary to head of input buffer for future reads. 321 | * `sys.tty.ctrl(Hint)` - ad hoc control, extensible but mostly for line buffering and echo 322 | 323 | The control hint is runtime specific, perhaps something like `(icanon:on, ...)`. I reserve standard error for runtime use - compile-time warnings, logging, etc.. 324 | 325 | A fundamental issue with console IO is that it isn't very composable. The default is to awkwardly mix streams and hope for the best. Or to avoid composing apps within a single process. But with translation, we could feasibly present a distinct 'sys.tty.\*' to each component application. This could support a few slightly-useful forms of composition, e.g. pipes or screens. 326 | 327 | *Note:* I would like to mirror the terminal through the runtime HTTP interface, e.g. `"/sys/tty"` via [xterm.js](https://xtermjs.org/). 328 | 329 | ## Reflection 330 | 331 | With FFI and bgcall handling external integration, reflection remains one area where the runtime cannot effectively delegate. 332 | 333 | - sys.refl.src.\* - access to abstract '%src' metadata from compile time. 334 | - Minimally, support examination of the the abstract data type, Src 335 | - The `(%src.meta MetaData Src)` can bind metadata for context. 336 | 337 | - sys.refl.log.\* - access to log output streams 338 | - browse log Chans and their activity 339 | - access to log histories 340 | - potentially adjust runtime-local configuration options per Chan 341 | 342 | - sys.refl.prof.\* - access profiling stats 343 | - sys.refl.trace.\* - access recorded traces 344 | 345 | - sys.refl.view.\* - debug thyself, application 346 | - browse view Chans and their activity 347 | - create, clone, pack, unpack, and destroy linear view register contexts 348 | - query a view with a register context and callbacks 349 | 350 | - sys.refl.tty.\* - maybe provide xterm view of console via HTTP? 351 | - access buffered memory of inputs and outputs 352 | - adjust buffer sizes 353 | - 'inject' inputs as if from user input 354 | 355 | - sys.refl.ffi.\* - debugging of FFI issues, mostly 356 | - browse active FFI threads 357 | - view: 358 | - step counter(s) 359 | - data stack and stash 360 | - current command (if any) and start time 361 | - pending buffered commands 362 | - unprocessed results buffers 363 | - estimate CPU utilization (?) 364 | - force kill thread (notably unsafe) 365 | 366 | - sys.refl.bgcall.\* - debug existing bgcalls 367 | - browse active bgcalls (op and Argument) 368 | - view activity, progress, thrashing 369 | - force cancel or kill, possibly 370 | 371 | - sys.refl.http - access runtime's built-in HTTP interface 372 | - sys.refl.http.\* 373 | - browse prior and active requests 374 | 375 | - sys.refl.rpc.\* 376 | - control authentication of MethodURL (e.g. rotating expirations) 377 | - metadata about past and current requests for debugging 378 | - may forcibly kill some requests 379 | 380 | - sys.refl.g.\* 381 | - browse the application 'global' registers 382 | - may forcibly edit them 383 | 384 | - sys.refl.db.\* 385 | - browse persistent registers in use by app 386 | - can also anything app *could* have bound 387 | - may forcibly update registers 388 | 389 | - sys.refl.gc.\* - garbage collection stats; trigger GC manually 390 | - sys.refl.sched.\* - conflicts, rework, backtracking, productivity 391 | 392 | --------------------------------------------------------------------------------