├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── app.fsproj ├── docs ├── AwelonFML.md ├── AwelonKPN.md ├── AwelonLang.md ├── Brainstorm.md ├── Generics.md ├── HTXM.md ├── ImmutableAwelon.md ├── Indexing.md ├── KPN_Effects.md ├── Perf.md ├── Runtime.md ├── SLRP-Article.md ├── SLRP.md └── Todo.md ├── hs ├── Deprecated.md ├── Setup.hs ├── app │ └── Main.hs ├── src │ ├── Awelon.hs │ ├── Awelon │ │ ├── CX.hs │ │ ├── Dict.hs │ │ ├── Dict │ │ │ ├── Format.hs │ │ │ └── Rename.hs │ │ ├── Hash.hs │ │ └── Syntax.hs │ ├── Wikilon.hs │ └── Wikilon │ │ ├── CBT.hs │ │ ├── CX.hs │ │ ├── DB.hs │ │ ├── KVM.hs │ │ └── RT.hs ├── stack.yaml ├── test │ └── Spec.hs └── wikilon.cabal ├── src ├── Awelon │ ├── Awelon.fsproj │ ├── DictIndex.fs │ ├── DictSearch.fs │ ├── Dictionary.fs │ ├── Interpret.fs │ ├── LocalVar.fs │ ├── Parse.fs │ ├── ParseExt.fs │ ├── Test │ │ ├── Program.fs │ │ ├── Test.fsproj │ │ └── Tests.fs │ ├── TypeCheck.fs │ └── WordVersion.fs ├── Data.ByteString │ ├── ByteString.fs │ ├── Data.ByteString.fsproj │ ├── Stream.fs │ ├── Tests │ │ ├── Program.fs │ │ ├── Tests.fs │ │ └── Tests.fsproj │ └── Tree.fs ├── Main.fs ├── Stowage │ ├── Data │ │ ├── CByteString.fs │ │ ├── CVRef.fs │ │ ├── Cache.fs │ │ ├── Codec.fs │ │ ├── CommonEncoders.fs │ │ ├── DB.fs │ │ ├── DurableCache.fs │ │ ├── FingerTree.fs │ │ ├── IntMap.fs │ │ ├── LSMTrie.fs │ │ ├── LVRef.fs │ │ ├── Measured.fs │ │ ├── MemoryCache.fs │ │ ├── Monoid.fs │ │ ├── README.md │ │ ├── RscHash.fs │ │ ├── Seq.fs │ │ ├── Stowage.Data.fsproj │ │ ├── Stowage.fs │ │ ├── Trie.fs │ │ ├── VDiff.fs │ │ └── VRef.fs │ ├── LMDB.fs │ ├── LMDBFFI.fs │ ├── RCTable.fs │ ├── README.md │ ├── Stowage.fsproj │ └── Test │ │ ├── .gitignore │ │ ├── Program.fs │ │ ├── Test.fsproj │ │ └── Tests.fs └── Wikilon │ ├── Database.fs │ ├── History.fs │ ├── README.md │ ├── User.fs │ ├── WS.fs │ └── Wikilon.fsproj └── wikilon.nginx-config /.gitattributes: -------------------------------------------------------------------------------- 1 | *.fs text eol=lf 2 | *.fsproj text eol=lf 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .stack-work/ 3 | .paket/paket.exe 4 | packages/ 5 | bin/ 6 | obj/ 7 | *.userprefs 8 | wiki/ 9 | 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright David Barbour (c) 2017 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of David Barbour nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Wikilon 2 | ======= 3 | 4 | Wikilon is a wiki-insipired software platform and development environment for Awelon project and the Awelon programming language. 5 | 6 | [Awelon programming language](docs/AwelonLang.md) has a simplistic, Forth-like syntax, a simple semantics, and a purely functional evaluation model based on confluent rewriting of concatenative combinators. This simple syntax can be leveraged for projectional editing, and views may be preserved over evaluation. 7 | 8 | Awelon computations cannot reference external data. Instead, Awelon codebases may serve as simple databases, documents, filesystems, spreadsheets, depending on how they're used an updated. Awelon explores non-conventional [application models](docs/ApplicationModel.md). 9 | 10 | Wikilon presents Awelon language through a web service, with wiki inspirations. 11 | 12 | ## Installation 13 | 14 | Wikilon is implemented using F# on CLR. The code generation and JIT capabilities of CLR or JVM are convenient for implementing a lightweight compiler for user created code. CodeDOM and support for tail calls have me favoring CLR. 15 | 16 | Dependencies: 17 | 18 | * [LMDB](http://www.lmdb.tech/doc/) 19 | * sudo apt-get install liblmdb-dev 20 | * [.Net core](https://www.microsoft.com/net/core#linuxubuntu) tools 21 | * instructions on linked website 22 | 23 | Assuming the dependencies and a good Internet connection, you may use `dotnet restore` to download required .Net packages. Use `dotnet run -- -help` to view command line options. *Aside:* I'm favoring .Net core over mono largely for the streamlined `dotnet` CLI tooling. This does incur a few unfortunate opportunity costs, such as WebSharper isn't yet available for .Net core. 24 | 25 | ## Components 26 | 27 | Primary project components: 28 | 29 | * `src/Wikilon` - the web application and UI definitions 30 | * `src/Awelon` - Awelon language and runtime model 31 | * `src/Stowage/Data` - non-relational database model 32 | * `src/Stowage` - LMDB-based implementation of database 33 | * `src/Data.ByteString` - bytestring values, builders, parsers 34 | 35 | This toplevel directory also has `app.fsproj` referencing `src/Main.fs`, which simply processes command line configuration options and runs a web server. 36 | 37 | ## Configuration 38 | 39 | Configuration of Wikilon is performed online, generally through a browser. Initial configuration requires the `-admin` flag to provide an ephemeral password for a root administrative account. But the admin may assign administrative authorities to other accounts. 40 | 41 | -------------------------------------------------------------------------------- /app.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | Exe 4 | netcoreapp2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/AwelonFML.md: -------------------------------------------------------------------------------- 1 | 2 | # Awelon with First-Class Modules 3 | 4 | Awelon is currently a very simplistic language with four primitive combinators. This gives the language a very dynamic feel. Many programs can be statically typed, but it requires a lot of non-local knowledge. I'd like to consider a variation of Awelon with some nicer static reasoning properties: 5 | 6 | * unify values and modules, per [Rossberg's 1ML](https://people.mpi-sws.org/~rossberg/1ml/1ml.pdf) 7 | * dependent types so we can express rich theories 8 | * advanced effects model, such as [Frank's Do Be Do Be Do](https://arxiv.org/pdf/1611.09259.pdf) 9 | 10 | If we start with a richer language, we can go a lot further with Awelon. For example, it becomes feasible to use Awelon as a systems programming language (without relying too much on accelerators). 11 | 12 | Awelon's current language has some nice properties: point-free, concatenative, deterministic, simple syntax, local rewriting. Those latter three properties - deterministic, simple syntax, local rewriting - are the essential properties for Awelon's goals of unifying PX and UX. The simple syntax enables us to project code into a graphical interface, determinism enables sharing code change in meaning, and local rewriting allows projection of the results. Point-free and concatenative are convenient for streaming a program, but are not essential. 13 | 14 | So the goal with this Awelon variant is to preserve those essential properties, while also enabling types and effects. 15 | 16 | ## Modules? 17 | 18 | The basic module interface is described by: 19 | 20 | * a collection of associated types 21 | * a collection of operations on them 22 | 23 | Types and operations are identified symbolically. Description of the types and interfaces might be a result of integrating other interfaces - a type also has a module interface. We can instantiate an interface, or partially instantiate one. 24 | 25 | ## Module Rewriting? 26 | 27 | In the original Awelon, all values are functions. Functions are convenient because the only way to observe a function is to apply it, and the only way to construct a function is to bind an argument. Thus, we only need combinators `a` and `b` to model observations and constructions. Besides these, we had extra combinators for data plumbing. 28 | 29 | With modules, what kinds of observations and constructors do we require? 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | For constructors, we might: 38 | 39 | * parameterize a module 40 | * abstract a module 41 | * 42 | wish to instantiate a module with a given type. 43 | 44 | 45 | 46 | 47 | 48 | 49 | so we have primitive combinators to observe and construct functions, in addition to combinators for basic data plumbing. 50 | 51 | If all values are modules, then we'll instead want combinators for observing and constructing modules. 52 | 53 | and vice versa, then what does it mean to rewrite them? 54 | 55 | 56 | 57 | 58 | There are other approaches, such as use of pattern-match rules. In that case, we might follow 'order' of patterns to achieve determinism. However, pattern matching is a little awkward in general: we need to know a lot about representations or views of the arguments. I think with modules, I'd prefer to follow Awelon's original example: simple confluent combinators. 59 | 60 | With functions, we observe a function by applying it, or we may construct a function by binding an argument. These correspond to Awelon's `a` and `b` primitive combinators, respectively. Besides these, we can copy or drop a function. 61 | 62 | With modules, observations and constructors are much richer, we'll need more combinators. 63 | 64 | 65 | 66 | 67 | * to use one module to parameterize another 68 | * to use one module to instantiate another 69 | * to compose modules, unifying some shared types by name 70 | * to abstract a module, forgetting some details 71 | * 72 | 73 | 74 | 75 | ## First Class Modules 76 | 77 | Rossberg's 1ML proposes to 78 | 79 | 80 | ## Effects Model 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /docs/AwelonKPN.md: -------------------------------------------------------------------------------- 1 | 2 | # Kahn Process Networks in Awelon 3 | 4 | [Awelon language](AwelonLang.md) is purely functional. Although purely functional code permits a lot of simple fork-join parallelism, it presents a challenge for distributed parallel computation, modeling of parallel pipelines and so on. Mostly, the issue regards routing of messages. A purely functional representation of routing will be relatively deterministic and centralized, so it's difficult to leverage arrival-order non-determinism (race conditions) to improve parallelism. 5 | 6 | To resolve this, my current intention is to borrow inspiration from a deterministic concurrency model - especially one such as [Kahn Process Networks (KPNs)](https://en.wikipedia.org/wiki/Kahn_process_networks) that does not require external effects to model channels or single-assignment variables. 7 | 8 | KPNs are also very useful as a potential [effects model](KPN_Effects.md) and process or application model, essentially capable of representing monads and comonads with multiple IO ports. KPNs support reactive or real-time systems by adding a simple time model, e.g. the ability to deliver `Tick + a` messages on every channel instead of just `a` messages. Pushback can also be modeled explicitly by adding acknowledgement feedback messages to the downstream processes. 9 | 10 | ## Single Assignment Placeholders 11 | 12 | We can conceptually model single assignment variables using pseudo-words: 13 | 14 | [A] /put_K => (single assignment) 15 | /get_K => [A] (after assignment) 16 | 17 | Awelon doesn't support single assignment variables directly, but an optimizer or interpreter could presumably introduce placeholders to improve parallelism. In particular, `\get_K` may behave as a value word to be delivered through a computation in parallel with the computation that produces the final value `[A]`. 18 | 19 | Using this technique, we can shift some computations (specifically those with a known output type) into a separate "parallel region" such that the parallel region will ultimately have an identity behavior. 20 | 21 | [Expr](par)(t2) => [/get_1 /get_2] (locally) 22 | Expr /put_2 /put_1 (parallel region) 23 | 24 | We'll create fresh variables as needed, i.e. very `/put_K` will use a distinct `K`. If computation terminates successfully, all of these put and get pseudo-words should be eliminated. However, if we halt due to error or quota, we may need to extract these placeholders. This can be achieved via simple value extraction algorithm: 25 | 26 | T(X,E) - extract variable X from E 27 | X T(X,E) == E 28 | T(X,E) when E does not have X => d 29 | T(X,X) => 30 | T(X,[E]) => [T(X,E)]b 31 | T(X,F G) 32 | when only F has X => T(X,F) G 33 | when only G has X => [F]a T(X,G) 34 | when both have X => c [T(X,F)] a T(X,G) 35 | 36 | Essentially, we can prefix incomplete parallel computations onto our programs then rewrite them back to equivalent inline programs. 37 | 38 | ## Stable Frame Routing 39 | 40 | To model routing of messages, one option is to process available messages in large "frames", touching every channel even if no message is available for that frame. Doing so naively is inefficient but does have potential advantages for batching and especially for routing of *placeholders* because we don't need to know anything about the potential messages before we route them. 41 | 42 | An interesting possibility is to leverage knowledge about stable routes such that we only need to route placeholders once for a frame then deliver multiple times by reusing the existing routes. Essentially, routes become `/put_K` and `/get_K` pairs reusable across multiple update frames without explicitly performing the data plumbing each time. 43 | 44 | ## Identity Frames and Fixpoints 45 | 46 | A KPN won't update further when no messages are added. Essentially, we have a clear fixpoint condition - something easy to check dynamically, and potentially to verify statically. Evaluation of KPNs will always proceed to this fixpoint. Knowing this condition, we can optimize by avoiding routing to a subnetwork when there are no messages. More importantly, we can support "race conditions" within the KPN so long as we clearly determine which subnets might update. 47 | 48 | However, we might need to pay attention to representation of pending messages to ensure this fixpoint behavior. For example, we might need to know we're appending lists. 49 | 50 | If we route stable frames in a fixpoint loop, with a clear halting condition, we'll have the essence of KPNs. 51 | 52 | ## Acceleration 53 | 54 | Ideally, acceleration of KPNs in Awelon shouldn't rely too much on a specialized KPN data structure. We could do that, but I'd prefer something more generalized based on lightweight static analysis involving stable routes and identity frames. 55 | 56 | 57 | -------------------------------------------------------------------------------- /docs/Brainstorm.md: -------------------------------------------------------------------------------- 1 | 2 | # Ideas of Dubious Utility 3 | 4 | Just a scratch pad for ideas I'm contemplating but not committed to in context of Awelon. 5 | 6 | ## Automatic Definitions 7 | 8 | Awelon currently provides automatic definitions for: 9 | 10 | * natural numbers `[1-9][0-9]*` 11 | * embedded texts 12 | 13 | I find it tempting to introduce more general support for automatic definitions, e.g. such that we could derive `cddadr`. I also like the idea of automatic definition for lists/tables based on common word structure. At the moment, we must explicitly manage any lists. But we could feasibly support `foo-*` words that automatically compose a list of words such as `foo-1` and `foo-42`. 14 | 15 | Effectively, these functions would either require some form of macro definitions or some built-in schema. Macro definitions would be more flexible, but also rather more difficult to index for reactive computations (e.g. to automatically add `foo-36` to the appropriate `foo-*` list). I'd prefer to avoid the resulting complexities in my implementations and tooling. 16 | 17 | For the moment, we can shove this issue to external software agents or interactive editing tools, which could automatically define words as-needed based on rules represented within the dictionary. Similarly, we could automatically import or maintain definitions from trusted external sources. 18 | 19 | ## Exchange and Apply 20 | 21 | Right now I have: 22 | 23 | [B][A]a == B[A] (apply) 24 | 25 | But I could split the exchange aspect as follows: 26 | 27 | [B][A]a == B[A] (apply') 28 | [B][A]e == [A][B] (exchange) 29 | 30 | This would result in greater separation of responsibilities. It simplifies the inline function to just `[] a d`. But the benefits ultimately seem limited. OTOH, the current 'apply' has a nice symmetry with bind, regarding the second argument: 31 | 32 | [B][A]a == A[B] apply to all but second element 33 | [B][A]b == [[B]A] apply to just the second element 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/Generics.md: -------------------------------------------------------------------------------- 1 | # Generic Programming in Awelon 2 | 3 | Generic programming is about writing programs in terms of requirements, the abstraction and deferral of non-essential decisions such as data types, sorting algorithms, dependencies, configuration, and extensions. Ideally, this should be achieved without significant overheads for syntax or performance. 4 | 5 | Awelon directly supports parametric polymorphism, which is a useful but limited form of generic programming. Advanced forms of generic programming will require explicit modeling and suitable projections. Unfortunately, popular approaches like multi-methods or type-classes rely on a global registry or type system, and are awkward in Awelon. 6 | 7 | I have some vague ideas involving a monadic constraint system. 8 | 9 | Constraint systems are close in nature to generic programming: a constraint can express a requirement, or further refine one, while the search space serves as a registry of possible and partial solutions. With soft constraints, we can also model preferences. The monadic API would provide a framework for constructing our model, registering the requirements and options. 10 | 11 | A preliminary API: 12 | 13 | type CX v a -- monadic 14 | type V v a -- single assignment variables 15 | 16 | -- model construction 17 | alloc : CX v (Var v a) 18 | read : Var v a -> CX v a 19 | write : Var v a -> a -> CX v a 20 | 21 | -- constraint system 22 | choice : List of CX v a -> CX v a 23 | fork : CX v () -> CX v () 24 | weight : Nat -> CX v () 25 | 26 | 27 | 28 | 29 | 30 | The monadic API can be convenient for abstracting and composing our constraints. Unfortunately, it is somewhat awkward to 'defer' a choice based on downstream requirements in the monadic API. This means we either must front-load our requirements (which is awkward, but doable idiomatically) or find an alternative to true deferal (such as iterative search). 31 | 32 | 33 | 34 | 35 | In this design, we'll have a monad that supports 'search' via alternatives and failure, and also supports single-assignment variables. A variable may only be assigned once, but the assignment may be separated from variable declaration. The idea, then, is to express multiple alternative assignments as a search space, and to represent ad-hoc constraints by search failures. Importantly, variable assignments are interdependent. We'll consider only groups of assignments. 36 | 37 | To this we might add a 'fork' operation that allows us to defer reads, decisions, requirements, and assignments. There is no return value from 'fork'. However, every fork must halt successfully for our constraint system to succeed. 38 | 39 | A reasonable question is how to model a shared collection, such as a list of plugins. My idea is this: we model our plugins as a linked list of single-assignment variables to which we might addend. Each addend is potentially the last one, so we represent this using a local alternative. This pattern is inefficient if implemented naively, but we could optimize it with a dedicated API extension. 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | Ideas: 60 | 61 | * single-assignment variables combined with a search model 62 | * answer-set programming - variables are not independent 63 | * attaching tactical solutions to a variable (or set thereof) 64 | * option to 'fail' 65 | 66 | How would we support extensions, too? I suppose it could be encoded by building a channel using constraint variables and prioritized selection. Intriguingly, this constraint system could enable a high level of parallel computation insofar as different parts are reading or writing. 67 | 68 | type GX e a -- monadic, with error type 'e' 69 | try : GX e a -> GX e' (Either e a) 70 | fail : e -> GX e a 71 | return : a -> GX e a 72 | 73 | This design is sufficient to give us the ability to try 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | The monad provides an API to read and write an ad-hoc registry while computing a value, and also some APIs for failure and fallback, e.g. an exception-safe `try` that rolls back writes upon error. This would be sufficient to model lightweight forms of search, for example. This monad can perform ad-hoc computations, and would return the final result and registry. 83 | 84 | 85 | 86 | We could perform computations 87 | 88 | Further, the monad may support failure and fallback behavior, like a try-catch, allowing for program search. Further, the monad may support a "scoring" mechanism 89 | 90 | 91 | This would allow us to construct collections and options 92 | 93 | while computing a value, and perhaps support ad-hoc alternatives and partial failures. 94 | 95 | 96 | 97 | 98 | 99 | 100 | A viable idea for generic programming is to develop a monadic API for a lightweight constraint programming model. We would simultaneously declare constraint variables and add options to them, while also computing a value. 101 | 102 | our options, while computing a value. This would 103 | 104 | with inspiration from declar constraint systems. We could declare constraint variables, the 105 | 106 | 107 | The monad would allow us to construct a registry and a program at the same time. 108 | 109 | could declare various rules: how to build a set of resources given another set. 110 | 111 | - how to build a resource given some other resources, and which new rules become available and requirements (whi 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | In many cases, such as multi-methods, we must model a registry of instances. To fit Awelon's overall design vision, this registry must be *local* to a program. 120 | 121 | (for caching, control, etc.) this should not be a global registry - reject a global `mm-registry` for multi-methods. Indeed, it's best if the registry is local to each program definition. 122 | 123 | of the generic programming system. If we intend to simulate typeclasses or multi-methods, we must model a registry of instances. 124 | 125 | classes and methods. 126 | 127 | It is convenient to represent registration of resources monadically, mixed together with specification of the program. Intriguingly, we could generalize monadic mechanisms to support dependency-injection or configuration. With support for alternatives and program search, it would also be convenient for rapid prototyping. 128 | 129 | This is a direction I'd very much like to pursue for Awelon, long term. We may need to build upon projections to make it usable. 130 | 131 | -------------------------------------------------------------------------------- /docs/HTXM.md: -------------------------------------------------------------------------------- 1 | Hierarchical transaction machines (HTXMs) are an interesting idea I've been developing recently. HTXMs accomplish my goals of live, resilient, declarative, extensible software systems. Further, HTXMs accomplish these goals in a way that's easy to integrate with existing service and devices, thus avoiding the major weakness of RDP. 2 | 3 | First, let's ignore that 'hierarchical' adjective. I'll get back to it! 4 | 5 | A transaction machine (TXM) is essentially: a transaction, repeated indefinitely. We assume our system has a set of TXMs. 6 | 7 | TXMs intrinsically support extension and liveness: We can model our set of TXMs as a transaction variable. This allows us to modify our system behavior atomically, adding new machines and removing others. If necessary, we can simultaneously transition our state resources. 8 | 9 | TXMs intrinsically have nice resilience properties: transactions are fail-safe, they can degrade gracefully by fallback behavior or halting individual machines, and the system will recover swiftly after a problem is corrected. Further, they can easily support a user interface with consistent views and transactional intervention, allowing for administration or debugging. 10 | 11 | TXMs intrinsically are idempotent: having more than one copy of a TXM won't change the system's behavior. This allows us to reason simply about whether or not we have a behavior. A list of TXMs is also commutative, but that's not special for concurrency models. 12 | 13 | TXMs intrinsically support reactive, concurrent coordination behavior: If a deterministic transaction does not modify state, then it will not if repeated under the same conditions. So we can wait for conditions to change. We can leverage this, e.g. using voluntary failure to make TXMs wait for arbitrary conditions. 14 | 15 | For example, a TXM implementing a stream processor in each step will drain an input queue, maintain some state, and push to an output queue. If input is empty or output is at capacity, it can abort (voluntarily fail) to force a wait. Haskellers might already be familiar with this technique, via the software transactional memory package and use of TQueue or TChan. 16 | 17 | Beyond stream processing, it's easy for TXMs to model reactive constraint systems (maintaining cells according to some goal), blackboard systems (read blackboard, opportunistically contribute), publish-subscribe. Object-oriented code is not difficult to model within the transaction. To more precisely support larger-scale reactive systems, we could include logical timestamps with streaming values. Multiple models can be easily mixed. 18 | 19 | External systems, such network or display, can effectively be integrated as shared memory, or perhaps writing to a system task queue. The main limitation is that we cannot directly model synchronous wait. To indirectly model synchronous wait, we can add a request to a system queue including a reply-to variable, modify some state so we know to wait on that channel, commit, then wait for a reply in a subsequent transaction. 20 | 21 | These are all wonderful properties, and achieve several of my goals. However, an essential feature is missing: support for divide-and-conquer tactics. 22 | 23 | Imagine we have a dynamic set of input queues to handle, with over a hundred elements. In this case, I would like the option to spawn one TXM per input queue. Detached spawning would be trivial to implement. Unfortunately, it also escapes user control, hurting liveness and resilience. 24 | 25 | The idea of attached spawn leads me to Hierarchical TXMs (HTXMs). 26 | 27 | In each step, our machines can fork subordinate behaviors, but these behaviors only remain active until just before the next step. A failed transaction cannot fork. A successful transaction, unless it results in a stable state, will immediately begin the next step. Thus, only successful, stable-state transactions will result in active subordinates. 28 | 29 | To recognize successful, stable-state transactions is not difficult. For a singular transaction, only a read-write behavior on a variable can result in unstable behaviors. For example, if we read a queue, take one element, write modified queue back, then when we repeat our transaction we'll have different observed conditions. If we only read a variable, or blindly write a variable, we're stable. Further, for data types that support efficient equivalence checks, we can stabilize writes that are equivalent to the current value. 30 | 31 | Programmers of HTXM systems will want to control stability. Fortunately, there are many patterns they can use to filter noise, cache stable conditions, partition data into relatively stable volumes, etc.. The desire for stability certainly isn't unique to HTXMs. 32 | 33 | I haven't implemented HTXMs yet. But I believe this model has much to offer, and I'll continue exploring it for now. 34 | -------------------------------------------------------------------------------- /docs/ImmutableAwelon.md: -------------------------------------------------------------------------------- 1 | # Immutable Awelon 2 | 3 | Paul Chiusano's Unison Web project takes an interesting approach to modeling a codebase: instead of mutable definitions (e.g. via files in a filesystem), a program only ever references external code via self-authenticating secure hash. This gives us *immutable* definitions at any given development step. Conveniently, this simplifies indexing, static typing, cached compilation, acceleration, etc.. The advantage is we can assign global meaning for any program. 4 | 5 | Awelon as it currently exists uses an immutable *dictionary*, within which definitions are mutable. Consequently, it is the pairing of dictionary-program that is immutable. But there is an important difference: the meaning of programs can change indirectly due to updates within the dictionary. This gives us ad-hoc spreadsheet-like characteristics, but it also complicates caching and sharing of code. 6 | 7 | With immutable code, our "dictionary" is replaced by a "development environment" within which we maintain a bi-directional relationship between secure hashes and local nicknames. References between code always use secure hashes. We may render and edit code using the nicknames in place of secure hashes. To simulate file-like edits, we can provide means to select and to edit multiple locations concurrently. 8 | 9 | That's the idea. 10 | 11 | Whether it's a "good" idea is a separate question. 12 | 13 | The advantage is that we simplify the sharing of code in a global codebase, and we simplify caching because the evaluation, type, etc. may be associated directly with the secure hash. The disadvantage is that maintaining and distributing data becomes relatively difficult. The codebase is no longer the "living sea of data" I desire for Awelon [application models](ApplicationModel.md). The spreadsheet-like characteristic of Awelon dictionaries is convenient for many of Awelon's use cases. 14 | 15 | Further, it's unclear how to support convenient embedding of data if we must use huge secure hashes for data constructors. I suppose we could [add first-class modules to Awelon](AwelonFML.md) with local symbols to serve the same role, in which case we have first-class dictionary values. 16 | 17 | -------------------------------------------------------------------------------- /docs/Indexing.md: -------------------------------------------------------------------------------- 1 | # Indexing Dictionaries 2 | 3 | I need to index dictionaries for effective use. 4 | 5 | Which indices are most useful? 6 | 7 | * word (or resource) → version 8 | * version → evaluated definition 9 | * indirectly? version → evaluated version + version → definition 10 | * version → link optimized or compiled versions 11 | * version → inferred type 12 | * Reverse Lookup: symbol → client words 13 | * symbols include annotations 14 | * symbols include secure-hash resources 15 | * find obviously erroneous (undefined, cyclic, etc.) words 16 | * potentially find full-text fragments 17 | 18 | A challenge for indexing is that Wikilon will have many dictionaries, both for multiple projects/users and histories and hierarchical structure. I assume that dictionaries will be 99% similar, but the differences might be private to a project or user. So we both need to somehow share indices and ensure secure separation thereof. Association of definitions with deep versions based on secure hash should help, since we can securely index from version to many kinds of computed results (like evaluation or type). This may also be treated similarly to memoization! 19 | 20 | For an interactive development environment, I also want the ability to find type-appropriate words within a dictionary. Maintaining a reverse lookup index for type information is non-trivial because reverse-lookup is always specific to each dictionary. But presumably we could just ask developers or software agents to augment the dictionary with some metadata that can be used in the reverse lookup. E.g. for `foo` we could have `foo-meta-kw` that includes ad-hoc key-words or tags to swiftly discover the word `foo`. 21 | 22 | So, I think the primary indices to maintain are for deep-version and a simple reverse-lookup. Fortunately, these two indices can be maintained incrementally. A relevant concern is that a version update for a 'deep' word (e.g. the initial state in a command pattern) could result in a long, cascading chains of version invalidations with high cost. And if we have a lot of updates, we shouldn't recompute all versions after each update. 23 | 24 | This could be ameliorated by making the deep-version index lazier in nature (so we only invalidate once for multiple updates), or otherwise batching our updates. 25 | 26 | In most cases, we can assume that the more a word is used, the more stable its definition will be. So eager versioning could also work. Or perhaps we should treat version IDs as a memory cached index, recomputed as needed, rather than a fully durable index. 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /docs/KPN_Effects.md: -------------------------------------------------------------------------------- 1 | # KPNs as an Effects Model 2 | 3 | I believe [Kahn Process Networks (KPNs)](https://en.wikipedia.org/wiki/Kahn_process_networks) or variants would offer a better foundation for purely functional effects models and applications compared to monads. KPNs offer a convenient basis for pipeline style concurrency, working with multiple concurrent input sources, and distributed computation. 4 | 5 | Desiderata: 6 | 7 | * Confluent. KPNs have a deterministic result no matter the order of scheduling. 8 | * Concurrent. Order of inputs or outputs on separate channels is not relevant. 9 | * Monotonic. No rollbacks or rework. Every output produced is final. 10 | * Composable. We can compose entire networks externally. 11 | * First Class. KPNs are immutable values that can be passed around. 12 | * Reactive. We can merge and interleave asynchronous input sources. 13 | * Dynamic. Network structure may change incrementally based on input. 14 | 15 | The first several features are implicit for KPNs. 16 | 17 | KPNs as values is implicit to how I plan to represent them, using accelerated evaluation of a DSL-like network description. Explcit copy in Awelon helps: I can use linear update between copies. By passing KPNs around, they can serve as a sort of functional 'object' with asynchronous update and result. 18 | 19 | But reactive and dynamic behavior requires some attention. 20 | 21 | ## Reactive 22 | 23 | Reactivity is not a built-in feature of KPNs, but can be supported indirectly by adding a simple time model to our KPNs. The simplest option, perhaps, is to modify every message type from `a` to `(Tick + a)` such that we can express waiting on a message until time advances on a pipeline. 24 | 25 | ## Pushback 26 | 27 | Pushback is when sending a message causes a process to wait, in order to limit the number of pending messages. KPNs don't usually have pushback, but it's a useful constraint for limiting memory usage. It's easiest to explicitly model pushback via explicit acknowledgement channels, rather than adding them to the model directly. That is, after sending we await acknowledgement - but we can start with a few 'acks' already in the queue to represent bounded buffer channels. 28 | 29 | ## Dynamic 30 | 31 | Dynamic structure is a big challenge for KPNs, and is a potential weakness compared to monadic programming. How should we handle this? 32 | 33 | My best idea so far: 34 | 35 | * a process network has a set of labeled open input and output ports 36 | * a stateful process may 'become' a process network with the same ports 37 | * special support for composable processes (like an `fmap` process). 38 | 39 | With this much, a single process could rewrite itself into multiple smaller processes. Some smaller processes, such as a pair a sequence of two `fmap` processes, could collapse into a single process. Others may be garbage collected when awaiting 'dead' channels (assuming we'll wait forever for an input). 40 | 41 | Failed ideas: first class channels are problematic due to the issue of naming channels, unique names when composing networks, need for linear types, etc.. Explicit delegation is too inefficient for pipelining. 42 | 43 | ## Effects Models 44 | 45 | Modeling effects means providing outputs that result in some feedback. We could do this by convention with a small set of channels labeled based on the types of effects or expected independent sources of input. We can model message buses or routing within a KPN as needed to manage arbitrary effects. 46 | 47 | IO involving independent channels can feasibly be decentralized by sharding a process network, assuming an appropriate context. 48 | 49 | 50 | -------------------------------------------------------------------------------- /docs/Perf.md: -------------------------------------------------------------------------------- 1 | # Performance Monitoring 2 | 3 | To avoid blind optimization, I'll try to develop a useful set of benchmarks with local metrics. 4 | 5 | Benchmarks: 6 | * **bench.repeat10M** a trivial `0 [4 +] 10000000 repeat` benchmark. 7 | 8 | * benchmark ideas 9 | * implement μKanren relational language. 10 | * text processing - regex, parsers, backtracking, etc.. 11 | * Pov-Ray style scene computing programs 12 | * vector and matrix processing, machine learning 13 | * big data processing, stowage tests 14 | * look into computer benchmarks game 15 | 16 | Interpreters: 17 | * **old aoi** - From the `awelon` project. 18 | * Runs a simplifier by default. 19 | * Times include linking. 20 | * **alpha runABC Haskell** 21 | * No simplifier or accelerators. 22 | * Doesn't validate copyable/droppable. 23 | * Time excludes linking. 24 | * **alpha runABC Wikilon** 25 | * No simplifier or accelerators. 26 | * No shared objects. 27 | * Time excludes linking. 28 | * **runABC 32 bit Wikilon** 29 | * memory relative offsets 30 | * smaller integer types 31 | 32 | 33 | 34 | ## Repeat 10M 35 | 36 | * old aoi interpreter: 213 seconds 37 | * alpha runABC Haskell: 413 seconds 38 | * alpha runABC Wikilon: 19.0 seconds 39 | * runABC 32 bit Wikilon: 19.7 seconds 40 | 41 | This is a promising start. A 10x improvement over my old `aoi` interpreter even before optimization work or JIT. This also demonstrates that the 32-bit vs. 64-bit doesn't make a significant difference on CPU performance, while effectively doubling the working context space. 42 | 43 | However, objectively this is not *good* performance. 20 seconds for 10M loops is ~2 microseconds per loop. I hope to achieve *at least* another 10x improvement here in the medium term, via fixpoint accelerators. Long term, a ~100x improvement is desirable via JIT. 44 | 45 | Unfortunately, with my recent (2016 November) change to Awelon's definition, these results won't hold. I'll need to re-implement most of the interpreter and compiler both. But it may be feasible to create a specialized runtime just for super-fast interpretation without all the linking. 46 | 47 | A theoretical limit is closer to a 1000x improvement. 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /docs/SLRP.md: -------------------------------------------------------------------------------- 1 | 2 | # Stream Language Rewrite Processing (SLRP) 3 | 4 | See the [SLRP-Article.md](SLRP-Article.md). 5 | 6 | The essential idea for SLRP is that we have fixed-memory processors rewrite an input stream in multiple passes. This has a many benefits for pipelining, partitioning, and parallelism. In the article, I outline an initial SLRP machine code based on a concatenative combinatory logic. 7 | 8 | Unfortunately, data plumbing (dip, dup, swap, loop, etc.) is too expensive in our initial example language. I can hack a solution with virtual memory. However, if this were adequately solved in pure SLRP, SLRP could become a very nice, practical foundation for computing. 9 | 10 | ## Data Plumbing 11 | 12 | The initial SLRP machine code has several binary operators. However, these operators only "see" one operand within our stream. To overcome this limit, we're using expensive divide-and-conquer tactics to ferry `xyz` buffers of data. This can be mitigated by large buffers, such that `xyz` may involve tens of kilobytes. However, even if we mitigate this, it still requires our processor's full attention, and occurs too frequently for common operations. 13 | 14 | To solve this problem, we need processors that support multiple input streams. This way, an operator could observe two streams and either merge them element-wise (zip, merge-sort, etc.) or at least reorder large volumes efficiently (e.g. all of X followed by all of Y, no ferrying small buffers). 15 | 16 | However, divided streams introduce semantic and mechanical challenges. 17 | 18 | Where does that second stream come from? How does it interact with abstraction? 19 | 20 | ## Brainstorming 21 | 22 | No solid solutions yet, just some ideas. 23 | 24 | ## Multiple Channel SLRP 25 | 26 | With a multi-channel SLRP, processors have a fixed number K of input and output streams. This fixed number would be determined by our machine-code and hardware, and is program-independent. The processor must rewrite these several streams collectively. 27 | 28 | To support pipeline processing with bounded buffers, these channels must be synchronous. If the channels were asynchronous, then bounded buffers will fill at variable rates. This would result in pushback, and downstream processors will become "stuck" without input on every channel. It is certainly feasible to represent 'whitespace' NOPs, though. It is feasible to model channels that are synchronous at different 'rates' however. For example, two bytes of channel B for every byte in channel A. 29 | 30 | In mechanical terms, a multi-channel SLRP is equivalent to a single-stream SLRP with a word-size proportional to the number of bytes transferred in a synchronous time-step. Thus, a "multi-channel" behavior must be part of our semantics, all about how we interpret and leverage these extra bytes. 31 | 32 | *Note:* For convenient programming, I would favor a semantics where all valid programs can be represented as one stream, assuming the other streams are initially just whitespace. We should also be able to extract the interleaved stream back into a single stream. 33 | 34 | ## Bi-directional SLRP 35 | 36 | The idea with bi-directional SLRP is to have input-output channels flowing in opposite directions. 37 | 38 | A difficulty with this is that, unless we're willing to accept a non-deterministic model, we need to clearly understand where each "rendezvous" between channels should occur. (And for non-determinism, I'd rather just use effects for this role.) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | Awkwardly, to support pipeline processing, these channels must be 'synchronous' at the byte level. Otherwise, bounded buffers will at variable rates and downstream processors may become stuck, unable to observe at least one of the K in 64 | 65 | It is feasible to have some channels with larger "word sizes" than others, e.g. so channel B has 3 bytes for every 1 byte in channel A. However, without a loss of generality, we could simply divide B into 3 channels that are byte-for-byte synchronous with A. 66 | 67 | 68 | 69 | That is, every output from one channel must be accompanied by outputs on other channels. 70 | 71 | To support pipeline processing, these channels must (rather awkwardly) be synchronous at the byte-level. The mechanical reason for this is that we'll have bounded buffers between processors. 72 | 73 | These channels would need to be *synchronous*. The reason for this: if we 'pushback' on a stream 74 | 75 | 76 | 77 | Can this work? 78 | 79 | Ignoring pipelining, we can guarantee our processor has inputs on every port. Thus, we could develop a set of rewrite rules that always make progress. Unfortunately, this no longer works when we pipeline our processors together. Relevantly, 80 | 81 | 82 | 83 | 84 | 85 | ## Concrete Channels? 86 | 87 | 88 | , and we rewrite them collectively into several unbounded output streams. Processors may pushback on their unbounded input sources, and are not guaranteed to provide output on any given output channel in a timely fashion. 89 | 90 | A consequence of 'pushback' means we cannot usually expand a si 91 | 92 | 93 | 94 | Mechanically, this seems feasible. 95 | 96 | Semantically, this is a challenge. That is, assuming we intend to maintain nice features such as pipeline processing, easy partitioning, confluence up to effects, virtual memory, etc.. 97 | 98 | What is our mechanical foundation upon which we may construct a semantics? 99 | 100 | * Processors are physically wired to K input streams. 101 | * Processors are wired to K' output streams. K = K'? 102 | * Operations and operands are encoded in the streams. 103 | * Stream states: terminated, waiting, data available. 104 | * We can only see the "header" for larger operands. 105 | 106 | If we assume multiple rewrites are required to complete a computation, and that pipeline processing might be useful, then our number of outputs channels must match the number of input channels, and routing must be simple and linear. 107 | 108 | If we want confluent computation, then we must not observe the "waiting" condition for any input stream. If we want local rewrites, then we also shouldn't be able to observe the "terminated" condition. Instead, we should have a simple approach to determine how much data a given operation is waiting on. It is feasible to achieve limited parallelism if rewrites on multiple inputs are not conflicting. 109 | 110 | Our implementation must look at a small buffer of data available on each stream and simply *know* what to do next. 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /docs/Todo.md: -------------------------------------------------------------------------------- 1 | # Recent 2 | 3 | Performance profiling. Comparisons in Perf.md. 4 | 5 | # TODO 6 | 7 | My primary efforts at the moment should be: 8 | 9 | * simplify and accelerate ABC code! 10 | * recognize existing accelerators 11 | * develop MORE accelerators 12 | * fast fixpoint inline (fix.i) 13 | * maybe as an attribute? 14 | * faster conditional behavior. 15 | * `if` and `if_` behaviors. 16 | * common EQ/NEQ comparisons. 17 | * sum to boolean conversion. 18 | * some deep stack manipulations 19 | * develop specialized accelerators: 20 | * deep structural manipulations 21 | * list processing accelerators 22 | * consider support for 'compact' texts 23 | 24 | * compact, fast bytecode 25 | * optionally compact during parse 26 | * high performance quotations 27 | * directly contain binaries, texts, blocks. 28 | * explicit reserve for structured data 29 | * design with eye toward shared objects. 30 | 31 | * persistence and stowage 32 | * potential for shared, stable object models 33 | * use for web service storage! 34 | 35 | * [parallelism](Parallelism.md)! 36 | * get `{&fork}` and `{&join}` working 37 | 38 | * shared bytecode objects 39 | * avoid full copies for large bytecode. 40 | * for now, maybe separate from stowage. 41 | 42 | * profiling and debugging of AO code 43 | * simplified stack traces 44 | * other profiling options 45 | 46 | * get web service active again! 47 | * develop useful application models 48 | * develop some console style apps 49 | 50 | * command line utilities 51 | * favor via web service, not filesystem 52 | * support queries, claw, computations 53 | * support import/export to AO files 54 | * with or without history 55 | * FUSE adapter (maybe) 56 | 57 | * runtime performance 58 | * compact bytecode 59 | * shared memory (loopy code, binaries, texts) 60 | * simplification and optimizations 61 | * partial evaluation and applications 62 | * dead code and data elim - trash, etc.. 63 | 64 | * extraction of data 65 | * texts, images/video, sound/music 66 | * binaries 67 | * cacheable results, please. 68 | 69 | ## Lower Priority (for now) 70 | 71 | * faster basic bytecode! 72 | * typechecked, preallocated variants. 73 | * explicit memory reservations. 74 | * ability to run a block unchecked. 75 | * explicit type tests. Basic Block Value? 76 | 77 | * programmable completion for linkAO 78 | * maybe add default linkAO file via env. 79 | * really want to write `linkAO -w foo[TAB]`. 80 | * but it's low priority 81 | 82 | * claw to AO conversion (and vice versa) 83 | * simplify command line testing, please. 84 | 85 | * dictonary applications 86 | * model of edit and debug sessions 87 | * multi-media interactive fictions 88 | * multi-media multi user dungeons 89 | * quests 90 | 91 | * persistence and reflection 92 | * dictionaries as runtime values 93 | * Wikilon state as runtime values 94 | * eliminate dependency on VCache 95 | 96 | * extraction of executables or libraries 97 | * extract to Rust, C, JavaScript 98 | * perhaps via reflection of dictionaries 99 | 100 | * background computations 101 | * for types, errors, cycles, undefined words 102 | * continuous zero-button testing, live tests 103 | * quickly find type errors and failed tests 104 | * background compuations and eventual consistency 105 | * maintaining lists of words with properties 106 | * caching, recomputation, wide reuse of cache 107 | * preferably: over history and between dicts 108 | * leverage structure sharing and stowage 109 | * index to find words with a suitable type 110 | * index for fast fuzzy find, tab completion 111 | * index for full text search 112 | 113 | * debuggable runtimes 114 | * replays in debug view: animations, etc. 115 | * automatic debugging and error isolation 116 | 117 | * heavier focus on Claw views and dictionary apps 118 | * visual claw, stateful widgets in source 119 | * named variables and closures (?) 120 | * I haven't found any variation of this I like 121 | * maybe to fix a local `x y z` variable environment 122 | 123 | * Tunable claw (not critical) 124 | * bytecode views defined within dictionaries? 125 | 126 | * Neat features 127 | * render environments, animate evaluations 128 | * color-coded namespaces 129 | * bolder or larger words for expensive words 130 | 131 | * less ad-hoc web application views 132 | * application logic into AO dictionaries 133 | * subscriptions model: long polling, web-sockets 134 | * shared, updateable views as the default 135 | * favor semantic spans/divs, forms, and CSS 136 | * try for generic designs (MFlow, etc.) 137 | * use ETags correctly (e.g. If-None-Match) 138 | 139 | * model metadata per dictionary 140 | * edit sessions 141 | * bug trackers 142 | * stats and logs 143 | * geneology: fork, merge, branch 144 | * users and activities, collision prevention 145 | * recent changes 146 | 147 | * generic web resources 148 | * support for image resources (PNG, SVG, favicon) 149 | * applications for edit sessions, bug trackers 150 | * support to upload/download binaries and texts 151 | * model REPLs, forums, wikis, spreadsheets, etc. 152 | * box and wires or VRML systems above visual claw 153 | * support for video-audio resources (GIF, HTML5 video, etc.) 154 | * turtle graphics, iPython notebooks, xiki 155 | 156 | * extraction of applications 157 | * Optimized ABC 158 | * JavaScript + DOM 159 | * Unikernels or Docker apps 160 | 161 | * user model 162 | * names and logins 163 | * themes, configuration 164 | * quotas, etc. 165 | * presence (tracking activity via dictionary) 166 | * support for 'secured' dictionary apps (games, etc.) 167 | 168 | Note that I've decided to model almost everything as dictionary apps, including stuff like edit sessions, bug trackers, and users. The idea, or goal, is to keep all of this information uniformly available for import, export, history, views of the dictionary, etc.. 169 | 170 | The main challenge with this concept is security. Security *within* an AO dictionary isn't really a feasible thing. What we can probably do, however, is control interactions (merges, dataflows, etc.) between dictionaries, and perhaps also constrain merging of value sealers. An external agent would make decisions given a set of dictionaries. 171 | -------------------------------------------------------------------------------- /hs/Deprecated.md: -------------------------------------------------------------------------------- 1 | Development of Wikilon has been started and aborted for Haskell several times now, largely due to the performance concerns surrounding dynamic code. Currently I'm migrating to F# on the CLR 2 | 3 | 4 | -------------------------------------------------------------------------------- /hs/Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /hs/app/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ViewPatterns, PatternGuards, OverloadedStrings #-} 2 | 3 | module Main (main) where 4 | 5 | import Control.Monad 6 | import Control.Monad.Loops (anyM) 7 | import Control.Exception 8 | import qualified Data.ByteString as BS 9 | import qualified Data.ByteString.UTF8 as U8 10 | import qualified System.IO as Sys 11 | import qualified System.Exit as Sys 12 | import qualified System.Environment as Env 13 | import qualified Data.List as L 14 | import qualified System.EasyFile as FS 15 | import qualified System.Entropy as Sys 16 | import qualified Network.Wai.Handler.Warp as WS 17 | import qualified Network.Wai.Handler.WarpTLS as WS 18 | import qualified Network.Wai as Wai 19 | import Awelon.Hash (hash) 20 | import qualified Wikilon.DB as DB 21 | import qualified Wikilon as Wiki 22 | 23 | helpMsg :: String 24 | helpMsg = 25 | "Expected Usage:\n\ 26 | \\n\ 27 | \ wikilon [-pPORT] [-dbMB] [-admin] \n\ 28 | \ -pPORT listen for HTTP requests on given port (default 3000) \n\ 29 | \ -dbMB configure maximum database file size (default 4T) \n\ 30 | \ -admin print admin password (valid until process restart)\n\ 31 | \\n\ 32 | \ Environment variables:\n\ 33 | \ WIKILON_HOME: directory for persistent data.\n\ 34 | \ defaults to OS dependent user app directory.\n\ 35 | \\n\ 36 | \ For TLS and HTTPS:\n\ 37 | \ add 'wiki.key' and 'wiki.crt' files to WIKILON_HOME\n\ 38 | \ if TLS is enabled, insecure connections are denied\n\ 39 | \\n\ 40 | \The wikilon web server is primarily accessed and configured through a\n\ 41 | \normal browser. Documentation online. Initial configuration requires\n\ 42 | \an -admin password, but persistent administrators may be configured.\n\ 43 | \" 44 | 45 | -- should I allow users to *provide* the admin password? For now, I 46 | -- think that's probably unwise. Better to just treat admin as the 47 | -- bootstrap account, discourage unnecessary use in scripts etc. 48 | 49 | -- TLS files 50 | crt, key :: FS.FilePath 51 | crt = "wiki.crt" 52 | key = "wiki.key" 53 | 54 | data Args = Args 55 | { a_port :: Int 56 | , a_dbsz :: Int 57 | , a_admin :: Bool 58 | , a_help :: Bool 59 | , a_bad :: [String] 60 | } 61 | defaultArgs :: Args 62 | defaultArgs = Args 63 | { a_port = 3000 64 | , a_dbsz = 4 * 1000 * 1000 65 | , a_admin = False 66 | , a_help = False 67 | , a_bad = [] 68 | } 69 | 70 | readSizeMul :: String -> Maybe (Int -> Int) 71 | readSizeMul "" = Just id 72 | readSizeMul "M" = Just id 73 | readSizeMul "G" = Just $ (* (1000)) 74 | readSizeMul "T" = Just $ (* (1000 * 1000)) 75 | readSizeMul _ = Nothing 76 | 77 | readSize :: String -> Maybe Int 78 | readSize s = case reads s of 79 | [(a,m)] | (a > 0) -> 80 | readSizeMul m >>= \ f -> 81 | return (f a) 82 | _ -> Nothing 83 | 84 | readPort :: String -> Maybe Int 85 | readPort s = case reads s of 86 | [(a,"")] | (a > 0) -> Just a 87 | _ -> Nothing 88 | 89 | procArgs :: [String] -> Args 90 | procArgs = L.foldr (flip pa) defaultArgs where 91 | pa a ('-': 'p' : (readPort -> Just p)) = a { a_port = p } 92 | pa a ('-': 'd' : 'b': (readSize -> Just mb)) = a { a_dbsz = mb } 93 | pa a "-admin" = a { a_admin = True } 94 | pa a "-?" = a { a_help = True } 95 | pa a s = let bad' = s : a_bad a in a { a_bad = bad' } 96 | 97 | useWikilonHome :: IO () 98 | useWikilonHome = do 99 | e <- Env.getEnvironment 100 | home <- case L.lookup "WIKILON_HOME" e of 101 | Just h -> return h 102 | Nothing -> FS.getAppUserDataDirectory "wikilon" 103 | FS.createDirectoryIfMissing True home 104 | FS.setCurrentDirectory home 105 | 106 | -- create fresh admin password, as needed 107 | mkAdminPass :: IO BS.ByteString 108 | mkAdminPass = BS.take 24 . hash <$> Sys.getEntropy 48 109 | 110 | putErrLn :: String -> IO () 111 | putErrLn = Sys.hPutStrLn Sys.stderr 112 | 113 | -- run Warp using TLS if avaialable, otherwise insecure 114 | runWarp :: Int -> Wai.Application -> IO () 115 | runWarp port app = do 116 | bUseTLS <- anyM FS.doesFileExist [crt, key] 117 | let tlsOpt = WS.tlsSettings crt key 118 | let warpOpt = WS.setPort port 119 | $ WS.setTimeout 120 -- allow longer heartbeats 120 | $ WS.defaultSettings 121 | if bUseTLS then WS.runTLS tlsOpt warpOpt app 122 | else WS.runSettings warpOpt app 123 | 124 | haltOnError :: SomeException -> IO a 125 | haltOnError e = do 126 | putErrLn "Wikilon halted with exception:" 127 | putErrLn (show e) 128 | putErrLn "Aborting." 129 | Sys.exitFailure 130 | 131 | -- | This essentially just passes the buck to Wikilon. 132 | main :: IO () 133 | main = body `catch` haltOnError where 134 | getArgs = procArgs <$> Env.getArgs 135 | body = 136 | getArgs >>= \ args -> 137 | if a_help args then Sys.putStrLn helpMsg else 138 | let badArgs = not (L.null (a_bad args)) in 139 | if badArgs then failWithBadArgs args else do 140 | admin <- if not (a_admin args) then return Nothing else do 141 | pass <- mkAdminPass 142 | Sys.putStrLn ("admin:" ++ U8.toString pass) 143 | return (Just pass) 144 | runServer args admin 145 | runServer args admin = do 146 | useWikilonHome 147 | let wikiOpts = Wiki.setAdmin admin 148 | $ Wiki.defaultOpts 149 | db <- DB.open "db" (a_dbsz args) 150 | app <- Wiki.mkWaiApp wikiOpts db 151 | runWarp (a_port args) app 152 | failWithBadArgs args = do 153 | putErrLn $ "Unrecognized arguments (try `-?` for help): " 154 | putErrLn $ show (a_bad args) 155 | Sys.exitFailure 156 | 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /hs/src/Awelon.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE NoImplicitPrelude #-} 2 | -- | Awelon Language 3 | -- 4 | -- Awelon language has a very simple syntax and semantics. The syntax 5 | -- is essentially a stream of words (with special forms for texts and 6 | -- annotations) and first-class subprograms. The semantics is based on 7 | -- rewriting with only four primitive combinators. 8 | -- 9 | -- The syntax is intended for use with projectional editing. Purely 10 | -- textual projections might introduce decimal numbers, lists, DSLs, 11 | -- local variables and lambdas and let expressions, and so on. But 12 | -- we can feasibly build graphical programming environments above the 13 | -- simple syntax. 14 | -- 15 | -- The semantics is based on confluent rewriting, i.e. it's purely 16 | -- functional and deterministic but may also work with incomplete 17 | -- programs, and any subprogram may be evaluated. This works with 18 | -- the syntactic projections, too, since the output may be subject 19 | -- to the same view models as the input. 20 | -- 21 | -- Awelon has only four primitive rewrite rules, one primitive data 22 | -- element (the block of code): 23 | -- 24 | -- [B][A]a == A[B] (apply) 25 | -- [B][A]b == [[B]A] (bind) 26 | -- [A]c == [A][A] (copy) 27 | -- [A]d == (drop) 28 | -- 29 | -- While these semantics are simple, they're far from bare metal and 30 | -- some extra work is required to achieve performance. Arithmetic and 31 | -- number models must be recognized by the runtime and accelerated to 32 | -- perform efficient number crunching. Linear algebra too, ideally. 33 | -- 34 | -- So the job of a runtime or compiler becomes software acceleration 35 | -- for a variety of widely useful models, in addition to more basic 36 | -- optimizations. 37 | -- 38 | -- Awelon is complicated further by annotations, parenthetical words 39 | -- that extend the runtime but do not affect formal program semantics. 40 | -- These extensions may affect performance (e.g. adding parallelism, 41 | -- memoizing computations, optimizing representations) or cause an 42 | -- invalid program to fail faist (e.g. simple type assertions). 43 | -- 44 | -- Wikilon aims to support Awelon with reasonable performance. 45 | -- 46 | module Awelon () where 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /hs/src/Awelon/CX.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies, FlexibleContexts, ExistentialQuantification #-} 2 | 3 | -- | Abstract Runtime Context for Awelon 4 | -- 5 | -- Although Awelon is purely functional, it does leverage annotations 6 | -- and accelerators to support performance or debugging. Rather than 7 | -- a concrete implementation in IO, Awelon.CX represents the required 8 | -- evaluation contexts with an abstract typeclass-based API admitting 9 | -- some purely functional implementations. 10 | -- 11 | module Awelon.CX 12 | ( Par(..), Async(..), race' 13 | , Lazy(..), Trace(..) 14 | , Stowage(..), load 15 | , Cache(..) 16 | , CX(..) 17 | , ByteString 18 | , module Awelon.Hash 19 | ) where 20 | 21 | import Control.Applicative 22 | import Data.ByteString.Lazy (ByteString) 23 | import Awelon.Hash 24 | 25 | -- | Fork-join parallelism via composable futures. 26 | class (Monad (F m), Monad m) => Par m where 27 | data F m :: * -> * -- ^ a future value 28 | fork :: m a -> m (F m a) -- ^ parallel future 29 | join :: F m a -> m a -- ^ wait for result 30 | 31 | -- Might need some means to extend monadic operations. 32 | -- Something like the following: 33 | -- 34 | -- andThen :: F m a -> (a -> m b) -> m (F m b) -- ^ extend parallel task 35 | -- might be able to provide a default impl with fork and join 36 | -- 37 | -- Or perhaps: 38 | -- 39 | -- liftF :: m a -> F m a 40 | -- 41 | -- Not really sure what's best here, nor how much expressiveness 42 | -- we need. 43 | 44 | -- | Concurrency via arrival-order indeterminism. 45 | -- 46 | -- The `race` operation enables clients to process results based on 47 | -- the order in which they become available. This introduces a form 48 | -- of non-determinism, which is generally impure. But indeterminism 49 | -- can be leveraged to accelerate confluent computations. So this is 50 | -- a feature to use, albeit with some care. 51 | class (Par m) => Async m where 52 | race :: F m a -> F m b -> F m (Either a b) 53 | 54 | -- | Data-preserving race, for linearity. 55 | race' :: Async m => F m a -> F m b -> F m (Either (a, F m b) (F m a, b)) 56 | race' fa fb = k <$> race fa fb where 57 | k (Left a) = Left (a,fb) 58 | k (Right b) = Right (fa,b) 59 | 60 | -- | Note on Acceleration of Kahn Process Networks 61 | -- 62 | -- Use of Async peek and wait can support KPN accelerators via the 63 | -- arrival-order merge for outputs from child processes. This can 64 | -- reduce processing latencies, and improve utilization of our CPUs. 65 | -- Further, except where a message's destination depends on content, 66 | -- it is feasible to route future message values, such that routing 67 | -- is performed ahead of processing. (With a little filtering, we 68 | -- may even route some messages to the wrong destination.) 69 | -- 70 | -- It is feasible to perform better by explicitly routing with shared 71 | -- memory, for example by use of `ivar :: m (a -> m (), F m a)` for 72 | -- single-assignment variables, to model channels with push behavior. 73 | -- However, ivars align poorly to Awelon's semantics, serializability, 74 | -- memoization, persistence, local reasoning. And they're also bad for 75 | -- physical distribution, requiring shared memory and distributed GC. 76 | -- 77 | -- Fortunately, explicit routing is unlikely to be our bottleneck. So 78 | -- the clean but inexpressive `Async` class is favored at this time. 79 | 80 | -- | Lazy (Call by Need) Evaluation 81 | -- 82 | -- A lazy computation will be performed only once, and only when some 83 | -- demand exists, with the result being memoized. This could be useful 84 | -- for some evaluation modes, JIT, lazy linking and evaluation of the 85 | -- dependencies, CSE optimization, etc.. Some accelerators might also 86 | -- use laziness. 87 | class Lazy m where 88 | lazy :: m a -> m a -- ^ wrap computation for memoization 89 | 90 | -- | Trace Message Debugging 91 | -- 92 | -- Debug messages are a simple, convenient technique for debugging. 93 | -- Awelon may support debug messages via (trace) annotations, and 94 | -- thus generate an additional sequence of messages. 95 | class Trace m where 96 | trace :: ByteString -> m () 97 | 98 | -- | Data Stowage 99 | -- 100 | -- Awelon heavily utilizes a notion of 'secure hash resources', which 101 | -- are essentially just binary resources that are globally identified 102 | -- by secure hash, specifically Awelon.Hash. 103 | -- 104 | -- This API supports asynchronous loads, useful if stowage is backed 105 | -- by a networked service. However, failure is not handled directly. 106 | -- If any resource cannot be loaded, that should be represented as an 107 | -- exception at the the monad layer. 108 | -- 109 | -- Resources may generally be garbage collected. It's up to the monad 110 | -- to provide sensible GC that mitigates common sources of failure, 111 | -- e.g. to avoid GC of newly stowed resources except at transaction 112 | -- or checkpoint boundaries (provided through a persistence API). 113 | -- 114 | class (Monad m) => Stowage m where 115 | stow :: ByteString -> m Hash 116 | load_async :: Hash -> m (m ByteString) 117 | 118 | load :: (Stowage m) => Hash -> m ByteString 119 | load h = load_async h >>= id 120 | 121 | -- question: how do 122 | 123 | -- | Cached Computations 124 | -- 125 | -- Caching plays an essential role in Awelon systems, the basis for 126 | -- incremental computing. Caching may be implicit for toplevel words, 127 | -- or explicit for compositional views of persistent data structures. 128 | -- 129 | -- The latter requires the compositional property: 130 | -- 131 | -- ∃F.∀X,+,Y. V(X + Y) == F(V(X), '+', V(Y)) 132 | -- 133 | -- That is, the view on a composite is a function of the views on the 134 | -- individual components. With this, we can easily cache V(X) and V(Y) 135 | -- so when we update Y and compute V(X+Y') we can reuse V(X). If this 136 | -- caching is performed deeply, computing V(Y') might then reuse some 137 | -- structure internally. 138 | -- 139 | -- I favor a monotonic cache, which avoids the challenges surrounding 140 | -- cache invalidation. The key must uniquely identify the computation, 141 | -- including the dependencies (e.g. by including a secure hash). The 142 | -- runtime may heuristically delete cache entries to recover space. 143 | -- 144 | -- To maximize reuse, cache keys should also be precise, such that 145 | -- most irrelevant changes to a codebase don't affect the key. But 146 | -- there is a heuristic tradeoff between precision and performance 147 | -- of the key computation. 148 | -- 149 | class Cache m where 150 | cacheGet :: ByteString -> m (Maybe ByteString) -- ^ cacheGet key 151 | cachePut :: ByteString -> ByteString -> m () -- ^ cachePut key val 152 | 153 | -- | An Awelon context simply aggregates the aformentioned features. 154 | class ( Monad m, Monad (F m) 155 | , Par m, Async m, Lazy m 156 | , Stowage m, Cache m, Trace m 157 | ) => CX m 158 | 159 | -- Am I missing anything? 160 | -- 161 | -- It isn't clear whether I should handle quotas here. Nor how. 162 | -- 163 | -- The Haskell runtime cannot easily track unique references, so I 164 | -- won't support in-place list manipulations. That said, I could 165 | -- leverage Data.Sequence or persistent-vector in that role. 166 | 167 | 168 | -------------------------------------------------------------------------------- /hs/src/Awelon/Dict.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Awelon Dictionary Format 3 | -- 4 | -- Awelong language specifies a simple, DVCS-inspired dictionary 5 | -- format that is designed for scalability and structure sharing 6 | -- between versions and developers. It looks roughly like this: 7 | -- 8 | -- secureHashOfPatch1 9 | -- secureHashOfPatch2 10 | -- @word1 def1 11 | -- @word2 def2 12 | -- @@ns secureHashOfSubDict 13 | -- @word3 def3 14 | -- 15 | -- Awelon uses secure hashes to identify binary resources instead of 16 | -- filenames or URLs. The resources identified by the initial hashes 17 | -- are logically inlined. The hash after `@ns` supports hierarchical 18 | -- containmenent of one dictionary within another. At the top level, 19 | -- a dictionary may be precisely versioned by a root secure hash. The 20 | -- last definition of any symbol wins. 21 | -- 22 | -- Thus we have a deeply immutable, persistent data structure that 23 | -- can be updated using flexible update models. But this flexibility 24 | -- does create some challenges for indexing. We cannot assume sorted 25 | -- words, for example. 26 | -- 27 | -- And as for scale, Awelon's application model unifies the database 28 | -- and the codebase. Thus, we must think about dictionaries the size 29 | -- of popular forums or Wikipedia, at least in the long term. Scale 30 | -- creates its own challenges, since we cannot expect to keep a full 31 | -- codebase in memory, nor even an index on the codebase. 32 | -- 33 | -- Ideally, I will want several indices: 34 | -- 35 | -- * quickly find definition for a word 36 | -- * reverse lookup, find uses of a word 37 | -- * find words matching a suffix or prefix 38 | -- * fuzzy find, spellcheck: words by similarity 39 | -- * full text search 40 | -- * cached evaluated definitions, types, optimized link defs, etc. 41 | -- * word to a deep-version behavior hash? (a vanderhoof versum?) 42 | -- * reverse type lookup, hoogle style for code completion 43 | -- 44 | -- Also, I will also want to keep histories for dictionaries, and to 45 | -- merge updates on similar dictionaries, and support transactional 46 | -- updates, and so on. Everything will need to be incremental, so I 47 | -- can quickly account for changes or differences in code. Memoized 48 | -- computation of partial, composable indices is a promising approach, 49 | -- though it may constrain which indices can efficiently be maintained. 50 | -- 51 | -- There is much to do here. It's very daunting. But I'll just add one 52 | -- feature at a time, as needed. 53 | -- 54 | module Awelon.Dict 55 | ( 56 | ) where 57 | 58 | 59 | -------------------------------------------------------------------------------- /hs/src/Awelon/Dict/Format.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | -- | This module is concerned with parsing or serializing the Awelon 3 | -- dictionary representation, which essentially consists of: 4 | -- 5 | -- secureHashOfPatch1 6 | -- secureHashOfPatch2 7 | -- @word1 def1 8 | -- @word2 def2 9 | -- @@ns secureHashOfDict 10 | -- ... 11 | -- 12 | -- Secure hashes in Awelon identify external resources (see Stowage 13 | -- under Awelon.CX). A dictionary starts with a sequence of secure 14 | -- hashes (one per line) that are logically included, followed by 15 | -- definitons. Each definition may specify a namespace or a word, 16 | -- indicated by prefix - `@@ns` or `@word`. 17 | -- 18 | -- A namespace is defined with a secure hash (or may be empty), while 19 | -- a word should be defined with valid Awelon code. Awelon code also 20 | -- has a very simple syntactic structure (see Awelon.Syntax). 21 | -- 22 | -- Order of definitions for distinct symbols is irrelevant. But if a 23 | -- word or namespace is defined more than once, the 'last' definition 24 | -- wins. This provides a simple, compositional patch and update model. 25 | -- A dictionary might be treated as an append-only update log. A full 26 | -- dictionary may be understood as a patch on the empty dictionary. 27 | -- 28 | -- By convention, a word may be 'undefined' by defining it to itself, 29 | -- e.g. `@foo foo`. Cyclic definitions aren't valid Awelon, but we'll 30 | -- not call out the trivial cycle any more than we normally would an 31 | -- undefined word. A namespace may be undefined via the empty string. 32 | -- 33 | -- This module only detects trivial parse errors. A lot more work must 34 | -- be done to search for cyclic definitions, type or totality errors, 35 | -- and so on. 36 | -- 37 | module Awelon.Dict.Format 38 | ( Dict(..), emptyDict 39 | , decode, DictErr 40 | , encodeBB, encode 41 | , splitDict, splitLL, findLLSep 42 | ) where 43 | 44 | import Prelude hiding (Word) 45 | import qualified Data.ByteString as BS 46 | import qualified Data.ByteString.Builder as BB 47 | import qualified Data.ByteString.Lazy as LBS 48 | import qualified Data.ByteString.Lazy.UTF8 as LU8 49 | import qualified Data.List as L 50 | import Data.Int (Int64) 51 | import Data.Word (Word8) 52 | import qualified Data.Map as M 53 | import Data.Monoid 54 | import Awelon.Syntax (NS(..), Word(..), Prog(..), validWordByte) 55 | import qualified Awelon.Syntax as P 56 | import Awelon.Hash 57 | 58 | -- | A parsed dictionary binary or patch file. 59 | -- 60 | -- This representation loses irrelevant information such as whitespace 61 | -- formatting, order of definitions, and definitions later overridden. 62 | -- 63 | -- For the namespaces, the `Nothing` value indicates an empty string was 64 | -- specified, so we only use a Hash value where we actually have a valid 65 | -- hash string. 66 | data Dict = Dict { dictInc :: [Hash] 67 | , dictNS :: M.Map NS (Maybe Hash) 68 | , dictDefs :: M.Map Word Prog 69 | } deriving (Eq, Ord) 70 | 71 | emptyDict :: Dict 72 | emptyDict = Dict mempty mempty mempty 73 | 74 | isWS :: Word8 -> Bool 75 | isWS c = (10 == c) || (32 == c) 76 | 77 | skipWS :: LBS.ByteString -> LBS.ByteString 78 | skipWS = LBS.dropWhile isWS 79 | 80 | readHash :: LBS.ByteString -> Maybe (Hash, LBS.ByteString) 81 | readHash s = 82 | let (h, s') = LBS.span validHashByte s in 83 | if (validHashLen /= LBS.length h) then Nothing else 84 | Just (LBS.toStrict h, s') 85 | 86 | readHashes :: LBS.ByteString -> ([Hash], LBS.ByteString) 87 | readHashes = rd [] . skipWS where 88 | rd r s = case readHash s of 89 | Just (h, s') -> rd (h:r) (skipWS s') 90 | Nothing -> (L.reverse r, s) 91 | 92 | -- | Dictionary decode errors. 93 | -- 94 | -- At the moment, errors are simply returned in terms of unprocessed 95 | -- entries, or any unrecognized suffix of the header. I might need to 96 | -- improve error reporting in the future, e.g. to include some line 97 | -- numbers. But this should be sufficient for simple error reports. 98 | type DictErr = (LBS.ByteString, [LBS.ByteString]) 99 | 100 | -- | Decode (parse) a dictionary binary. 101 | -- 102 | -- This is a lenient decoder. It parses as much as it can while 103 | -- returning both the dictionary and parse errors. 104 | decode :: LBS.ByteString -> (DictErr, Dict) 105 | decode binary = ((eInc, eEnt), Dict inc ns defs) where 106 | (hd, bdy) = splitDict binary 107 | (inc, eInc) = readHashes hd 108 | ents = fmap decodeEnt bdy 109 | eEnt = [e | Right (Right e) <- ents] 110 | ns = M.fromList [x | Right (Left x) <- ents] 111 | defs = M.fromList [x | Left x <- ents] 112 | 113 | decodeEnt :: LBS.ByteString -> Either (Word, Prog) (Either (NS, Maybe Hash) LBS.ByteString) 114 | decodeEnt s = 115 | let badEnt = Right (Right s) in 116 | case LBS.uncons s of 117 | Just (64, s') -> 118 | let (w,d) = LBS.span validWordByte s' in 119 | if (LBS.null w) then badEnt else 120 | let ns = NS (Word (LBS.toStrict w)) in 121 | case decodeNSDef d of 122 | Just h -> Right (Left (ns, h)) 123 | _ -> badEnt 124 | _ -> 125 | let (w,d) = LBS.span validWordByte s in 126 | if (LBS.null w) then badEnt else 127 | let w' = Word (LBS.toStrict w) in 128 | case P.decode d of 129 | Right prog -> Left (w', prog) 130 | _ -> badEnt 131 | 132 | -- | A namespace definition is either a single hash or empty string. 133 | -- Here 'Nothing' means parse failure, while 'Just Nothing' means we 134 | -- received an empty string. Whitespace is trimmed. 135 | decodeNSDef :: LBS.ByteString -> Maybe (Maybe Hash) 136 | decodeNSDef s = 137 | let s' = skipWS s in 138 | if LBS.null s' then Just Nothing else 139 | case readHash s' of 140 | Just (h, rem) | LBS.null (skipWS rem) -> Just (Just h) 141 | _ -> Nothing 142 | 143 | -- | Our encoder will write out a dictionary binary. 144 | encode :: Dict -> LBS.ByteString 145 | encode = BB.toLazyByteString . encodeBB 146 | 147 | -- | Encode with a flexible output strategy. This uses a very 148 | -- simple and consistent space formatting: one SP after each 149 | -- word or namespace, one LF at the end. 150 | encodeBB :: Dict -> BB.Builder 151 | encodeBB (Dict inc ns defs) = bbInc <> bbNS <> bbDefs where 152 | bbInc = mconcat $ fmap bbIncElem inc 153 | bbNS = mconcat $ fmap bbNSElem (M.toList ns) 154 | bbDefs = mconcat $ fmap bbDefElem (M.toList defs) 155 | bbIncElem h = BB.byteString h <> BB.word8 10 156 | bbNSElem (ns, def) = 157 | BB.word8 64 <> BB.word8 64 <> BB.byteString (wordBytes (nsWord ns)) <> 158 | BB.word8 32 <> maybe mempty BB.byteString def <> BB.word8 10 159 | bbDefElem (w, def) = 160 | BB.word8 64 <> BB.byteString (wordBytes w) <> BB.word8 32 <> 161 | P.encodeBB def <> BB.word8 10 162 | 163 | instance Show Dict where 164 | showsPrec _ = showString . LU8.toString . encode 165 | 166 | -- | An Awelon dictionary separates logical lines with the `LF @` 167 | -- sequence. This function returns the index to the next `LF @` 168 | -- line separator (returning the index for the LF). 169 | findLLSep :: LBS.ByteString -> Maybe Int64 170 | findLLSep = f 0 where 171 | f !k s = case LBS.elemIndex 10 s of 172 | Nothing -> Nothing 173 | Just ix -> 174 | let skip = (1 + ix) in 175 | let s' = LBS.drop skip s in 176 | case LBS.uncons s' of 177 | Just (64, _) -> Just (k + ix) 178 | _ -> f (k + skip) s' 179 | 180 | -- | split the logical lines 181 | splitLL :: LBS.ByteString -> [LBS.ByteString] 182 | splitLL s = case findLLSep s of 183 | Nothing -> [s] 184 | Just ix -> (LBS.take ix s) : splitLL (LBS.drop (ix + 2) s) 185 | 186 | -- | break dictionary into header and list of definition entries. 187 | splitDict :: LBS.ByteString -> (LBS.ByteString, [LBS.ByteString]) 188 | splitDict s = case LBS.uncons s of 189 | Just (64, s') -> (mempty, splitLL s') 190 | _ -> case findLLSep s of 191 | Nothing -> (s, mempty) 192 | Just ix -> (LBS.take ix s, splitLL (LBS.drop (ix + 2) s)) 193 | 194 | -- note: we might wish to annotate line numbers in case of a parse error. 195 | 196 | -- parse hashes 197 | -- parse entries 198 | -- map entries 199 | 200 | 201 | -------------------------------------------------------------------------------- /hs/src/Awelon/Dict/Rename.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Renaming words is a common refactoring that is relatively easy 3 | -- to automate, assuming we can perform lookups and reverse lookups 4 | -- in context of the dictionary. 5 | data Awelon.Dict.Rename 6 | -- ( renameWord, renameWords 7 | -- , renameDict, renameDicts 8 | -- ) where 9 | 10 | -- TODO: 11 | -- 12 | -- I need an appropriate reverse lookup index, to find words to 13 | -- reference a given word or namespace. 14 | 15 | -------------------------------------------------------------------------------- /hs/src/Awelon/Hash.hs: -------------------------------------------------------------------------------- 1 | module Awelon.Hash 2 | ( Hash 3 | , hash, hashL 4 | , hashAlphabet 5 | , validHashByte, validHashLen, validHash 6 | ) where 7 | 8 | import qualified Data.ByteString.Lazy as LBS 9 | import qualified Data.ByteString as BS 10 | import qualified Data.ByteString.Internal as BS 11 | import qualified Data.ByteString.UTF8 as U8 12 | import qualified Crypto.Hash.BLAKE2.BLAKE2b as B2b 13 | import qualified Data.List as L 14 | import qualified Data.Array.Unboxed as A 15 | import Data.Word (Word8, Word32) 16 | import Data.Char (chr, ord) 17 | import Data.Bits 18 | import Data.String 19 | import Foreign.ForeignPtr (withForeignPtr) 20 | import Foreign.Ptr 21 | import Foreign.Storable 22 | import System.IO.Unsafe (unsafeDupablePerformIO) 23 | import Control.Exception (assert) 24 | 25 | -- | A Hash is just a bytestring, for now. But it should be valid. 26 | -- 27 | -- Note: In context of lookups for secure hash resources, we may 28 | -- need to ensure constant-time comparisons. 29 | type Hash = BS.ByteString 30 | 31 | -- | Awelon language makes widespread use of secure hashes to name 32 | -- various resources. In particular, we use a 280-bit Blake2b hash 33 | -- and we encode this using a specilized base32 alphabet. 34 | -- 35 | -- > :set -XOverloadedStrings 36 | -- > hash "test" 37 | -- "HSjFNGRnqHpFFbPhlThmqCbqkmDSHCBlJNnmDPnDtnCpKHqtNgqhRMJG" 38 | -- 39 | hash :: BS.ByteString -> Hash 40 | hash = encodeHash . B2b.hash 35 mempty 41 | 42 | hashL :: LBS.ByteString -> Hash 43 | hashL = encodeHash . b2b_digest 35 44 | 45 | b2b_digest :: Int -> LBS.ByteString -> BS.ByteString 46 | b2b_digest z = B2b.finalize z 47 | . L.foldl (flip B2b.update) (B2b.initialize z) 48 | . LBS.toChunks 49 | 50 | -- | Awelon's unusual Base32 alphabet for hashes. 51 | -- 52 | -- This alphabet does not conflict with human meaningful words or 53 | -- numbers, nor structural conventions that use punctuation. And it 54 | -- is unlikely to appear by accident in context of conservative GC. 55 | hashAlphabet :: String 56 | hashAlphabet = assert (32 == L.length alphabet) $ alphabet where 57 | alphabet = "bcdfghjklmnpqrstBCDFGHJKLMNPQRST" 58 | 59 | -- | The length of a valid hash string. 60 | validHashLen :: (Num a) => a 61 | validHashLen = 56 62 | 63 | hashBytesArray :: A.UArray Word8 Bool 64 | hashBytesArray = A.listArray (0,255) $ fmap accept [0..255] where 65 | accept = flip L.elem hashAlphabet . chr 66 | 67 | -- | Really tests whether a byte is a valid base64url character. 68 | validHashByte :: Word8 -> Bool 69 | validHashByte = (A.!) hashBytesArray 70 | 71 | validHash :: BS.ByteString -> Bool 72 | validHash s = (validHashLen == BS.length s) 73 | && (BS.all validHashByte s) 74 | 75 | -- encoding 76 | type Word5 = Word8 77 | 78 | eW5A :: A.UArray Word5 Word8 79 | eW5A = A.listArray (0,31) $ fmap (fromIntegral . ord) hashAlphabet 80 | 81 | eW5 :: Word5 -> Word8 82 | eW5 = (A.!) eW5A 83 | 84 | encodeHash :: BS.ByteString -> BS.ByteString 85 | encodeHash (BS.PS fptr off sz) = 86 | unsafeDupablePerformIO $ 87 | BS.create validHashLen $ \ dst -> 88 | withForeignPtr fptr $ \ src -> 89 | packHash (src `plusPtr` off) dst 90 | 91 | -- pack hash assuming sufficient data in source and space in destination. 92 | -- Note that this is highly specialized for the validHashlen. It does not 93 | -- try for a generic hash encoding. 94 | -- 95 | -- We want to output 8 bytes for every 5 bytes of input. Following normal 96 | -- conventions for base32 (RFC4648) the high order bit of the first byte 97 | -- will be the first bit in the stream. 98 | -- 99 | -- We never actually 'decode' this base32 string. It's more convenient and 100 | -- debuggable to stick with the ASCII encoding after the hash is formed. 101 | packHash :: Ptr Word8 -> Ptr Word8 -> IO () 102 | packHash s d = p280 where 103 | p280 = do { p40 0; p40 1; p40 2; p40 3; p40 4; p40 5; p40 6 } 104 | p40 n = do 105 | let r ix = peekElemOff (s `plusPtr` (5 * n)) ix 106 | let w ix v = pokeElemOff (d `plusPtr` (8 * n)) ix (eW5 v) 107 | -- read five bytes 108 | i0 <- r 0 109 | i1 <- r 1 110 | i2 <- r 2 111 | i3 <- r 3 112 | i4 <- r 4 113 | -- write eight bytes 114 | w 0 $ ((i0 .&. 0xF8) `unsafeShiftR` 3) 115 | w 1 $ ((i0 .&. 0x07) `unsafeShiftL` 2) .|. 116 | ((i1 .&. 0xC0) `unsafeShiftR` 6) 117 | w 2 $ ((i1 .&. 0x3E) `unsafeShiftR` 1) 118 | w 3 $ ((i1 .&. 0x01) `unsafeShiftL` 4) .|. 119 | ((i2 .&. 0xF0) `unsafeShiftR` 4) 120 | w 4 $ ((i2 .&. 0x0F) `unsafeShiftL` 1) .|. 121 | ((i3 .&. 0x80) `unsafeShiftR` 7) 122 | w 5 $ ((i3 .&. 0x7C) `unsafeShiftR` 2) 123 | w 6 $ ((i3 .&. 0x03) `unsafeShiftL` 3) .|. 124 | ((i4 .&. 0xE0) `unsafeShiftR` 5) 125 | w 7 $ ((i4 .&. 0x1F) ) 126 | 127 | -------------------------------------------------------------------------------- /hs/src/Wikilon.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Wikilon ultimately provides a WAI web service. 3 | module Wikilon 4 | ( mkWaiApp 5 | , WSOpts, defaultOpts 6 | , setAdmin, getAdmin 7 | ) where 8 | 9 | import Data.Monoid 10 | import Data.ByteString (ByteString) 11 | import Wikilon.DB (DB) 12 | import Network.Wai (Application) 13 | import qualified Network.Wai as Wai 14 | 15 | -- | Extra configuration for the Wikilon service. 16 | -- 17 | -- Most configuration will be performed at runtime, by users or agents 18 | -- with administrative authorities. But a few features may need to be 19 | -- handled up front. 20 | data WSOpts = WSOpts 21 | { ws_admin :: Maybe ByteString 22 | } 23 | 24 | defaultOpts :: WSOpts 25 | defaultOpts = WSOpts 26 | { ws_admin = mempty 27 | } 28 | 29 | -- | Administration of Wikilon 30 | -- 31 | -- The 'admin' account is reserved for administrative bootstrapping 32 | -- and troubleshooting. Enabled by setting a password at startup. This 33 | -- password is usable only for the process lifetime. 34 | setAdmin :: Maybe ByteString -> WSOpts -> WSOpts 35 | setAdmin v ws = ws { ws_admin = v } 36 | 37 | getAdmin :: WSOpts -> Maybe ByteString 38 | getAdmin = ws_admin 39 | 40 | -- other potential options: 41 | -- 42 | -- support for web service composition (very Low priority) 43 | -- access to a higher level rep. of application 44 | -- prefix for keys in the DB 45 | -- extra prefix for URLs 46 | -- 47 | -- The following might be better configured at runtime by 48 | -- administrative agents. 49 | -- 50 | -- Integration of external resources: 51 | -- files, and resources in the filesystem 52 | -- distributed computation setup, authorities 53 | -- cooperative Wikilon nodes 54 | -- creation of VMs 55 | -- OpenCL clouds 56 | -- backups 57 | -- logging 58 | -- 59 | -- Tab icons, default CSS, etc.. 60 | 61 | 62 | -- | The Wikilon web service 63 | mkWaiApp :: WSOpts -> DB -> IO Application 64 | mkWaiApp opts db = undefined 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /hs/src/Wikilon/CBT.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, TypeFamilies #-} 2 | 3 | -- | Crit-bit Tree (Variant) 4 | -- 5 | -- This is an in-memory crit-bit tree, a variant with key-value pairs 6 | -- and that keeps the least key in the parent. This largely acts as a 7 | -- reference for development of stowage-based KVM, but it may find 8 | -- use elsewhere. 9 | -- 10 | module Wikilon.CBT 11 | ( CBK(..) 12 | , CBT(..) 13 | , Node(..) 14 | , null, member, union 15 | , insert, delete, lookup 16 | , fromList, toList 17 | ) where 18 | 19 | import Prelude hiding (lookup, null) 20 | import qualified Data.ByteString as BS 21 | import qualified Data.ByteString.Unsafe as BS 22 | import qualified Data.ByteString.Lazy as LBS 23 | import qualified Data.ByteString.Lazy.Internal as LBS 24 | import qualified Data.List as L 25 | import Data.Maybe 26 | import Data.Word 27 | import Data.Bits 28 | 29 | 30 | 31 | -- | Requirements for a crit-bit key. 32 | -- 33 | -- thoughts: it might be better to change this to a `getByte` and a 34 | -- `length`, or perhaps a `getWord` where a word has a finite number 35 | -- of bytes. As is, too much crit-bit logic is embedded in this class. 36 | class (Eq k) => CBK k where 37 | -- | Determine bit at specified offset within key. 38 | getBit :: Int -> k -> Bool 39 | 40 | -- | Seek offset of first difference between two keys 41 | -- that is no less than the initial offset. 42 | critBit :: Int -> k -> k -> Maybe Int 43 | 44 | instance CBK Word8 where 45 | getBit = getBitFB 46 | critBit = critBitFB 47 | {-# INLINE getBit #-} 48 | {-# INLINE critBit #-} 49 | 50 | instance CBK Word16 where 51 | getBit = getBitFB 52 | critBit = critBitFB 53 | {-# INLINE getBit #-} 54 | {-# INLINE critBit #-} 55 | 56 | instance CBK Word32 where 57 | getBit = getBitFB 58 | critBit = critBitFB 59 | {-# INLINE getBit #-} 60 | {-# INLINE critBit #-} 61 | 62 | instance CBK Word64 where 63 | getBit = getBitFB 64 | critBit = critBitFB 65 | {-# INLINE getBit #-} 66 | {-# INLINE critBit #-} 67 | 68 | getBitFB :: FiniteBits b => Int -> b -> Bool 69 | getBitFB n a = testBit a ((finiteBitSize a) - n) 70 | {-# INLINE getBitFB #-} 71 | 72 | critBitFB :: FiniteBits b => Int -> b -> b -> Maybe Int 73 | critBitFB n a b = 74 | let x = (a `xor` b) `shiftL` n in 75 | if (zeroBits == x) then Nothing else 76 | Just $! (n + (countLeadingZeros x)) 77 | {-# INLINE critBitFB #-} 78 | 79 | 80 | instance CBK BS.ByteString where 81 | getBit n s = 82 | let (q,r) = n `divMod` 8 in 83 | let s' = BS.drop q s in 84 | if BS.null s' then False else 85 | getBit r (BS.unsafeHead s') 86 | {-# INLINE getBit #-} 87 | 88 | critBit = begin where 89 | -- special handling for first byte 90 | begin n a b = 91 | let (q,r) = n `divMod` 8 in 92 | let a' = BS.drop q a in 93 | let b' = BS.drop q b in 94 | let hd s = if BS.null s then 0 else BS.unsafeHead s in 95 | case critBit r (hd a') (hd b') of 96 | Just off -> Just $! (n + (off - r)) 97 | Nothing -> go2 (q + 1) (BS.drop 1 a') (BS.drop 1 b') 98 | 99 | -- compare whole bytes 100 | go2 !q !a !b = 101 | if BS.null a then go1 q b else 102 | if BS.null b then go1 q a else 103 | let ca = BS.unsafeHead a in 104 | let cb = BS.unsafeHead b in 105 | if (ca == cb) 106 | then go2 (q+1) (BS.unsafeTail a) (BS.unsafeTail b) 107 | else Just $! (8 * q) + countLeadingZeros (ca `xor` cb) 108 | 109 | -- search for a non-zero byte 110 | go1 !q !s = case BS.uncons s of 111 | Just (c, s') 112 | | (0 == c) -> go1 (q + 1) s' 113 | | otherwise -> Just $! (8 * q) + countLeadingZeros c 114 | Nothing -> Nothing 115 | 116 | data CBT k v 117 | = Empty 118 | | Root k (Node k v) 119 | deriving (Eq, Ord) 120 | 121 | data Node k v 122 | = Leaf v 123 | | Inner {-# UNPACK #-} !Int (Node k v) k (Node k v) 124 | deriving (Eq, Ord) 125 | 126 | -- question: should I add size information to nodes? 127 | -- 128 | -- I'm not sure whether O(1) sizes are useful enough when they aren't 129 | -- being used to balance trees. But they might prove more useful for 130 | -- the KVMs, e.g. to keep size information for the stowage nodes so we 131 | -- can always determine size without high-latency lookups. 132 | -- 133 | -- More generally, it might be convenient to compute monoidal meta-data 134 | -- that is a result of composition. Size information would be a simple 135 | -- monoid, but it seems feasible to compute useful summary data back 136 | -- up the tree. Still, might be better to focus on the plain old data 137 | -- type first, and create the variant as another module. 138 | 139 | instance (Show k, Show v) => Show (CBT k v) where 140 | showsPrec _ m = showString "fromList " . shows (toList m) 141 | 142 | instance (CBK k) => Monoid (CBT k v) where 143 | mempty = Empty 144 | mappend = union 145 | 146 | instance Functor (CBT k) where 147 | fmap _ Empty = Empty 148 | fmap fn (Root k n) = (Root k (fmap fn n)) 149 | 150 | instance Functor (Node k) where 151 | fmap fn (Leaf v) = Leaf (fn v) 152 | fmap fn (Inner cb l k r) = Inner cb (fmap fn l) k (fmap fn r) 153 | 154 | -- | Convert CBT to list. 155 | toList :: CBT k v -> [(k,v)] 156 | toList Empty = [] 157 | toList (Root k n) = go [] k n where 158 | go p k (Inner _ l k' r) = go ((k',r):p) k l 159 | go p k (Leaf v) = (k,v) : more p 160 | more ((k',r):p) = go p k' r 161 | more [] = [] 162 | 163 | -- | Convert list to CBT. 164 | fromList :: (CBK k) => [(k,v)] -> CBT k v 165 | fromList = L.foldl' ins mempty where 166 | ins m (k,v) = insert k v m 167 | 168 | null :: CBT k v -> Bool 169 | null Empty = True 170 | null _ = False 171 | 172 | member :: (CBK k) => k -> CBT k v -> Bool 173 | member k m = not (isNothing (lookup k m)) 174 | 175 | lookup :: (CBK k) => k -> CBT k v -> Maybe v 176 | lookup k = ini where 177 | ini Empty = Nothing 178 | ini (Root lk l) = go lk l 179 | go lk (Inner cb l rk r) = 180 | if getBit cb k then go rk r 181 | else go lk l 182 | go lk (Leaf v) = 183 | if (lk == k) then Just v 184 | else Nothing 185 | 186 | -- update leftmost item 187 | updL :: (v -> v) -> Node k v -> Node k v 188 | updL fn (Leaf v) = Leaf (fn v) 189 | updL fn (Inner cb l k r) = Inner cb (updL fn l) k r 190 | 191 | -- insert value to left at given depth, using the given 192 | -- key as the least key for the given node. 193 | insL :: Int -> v -> k -> Node k v -> Node k v 194 | insL cb = insLN cb . Leaf 195 | 196 | -- insert node to left of tree with depth and least key 197 | insLN :: Int -> Node k v -> k -> Node k v -> Node k v 198 | insLN cb n' k' = ins where 199 | ins (Inner ncb l k r) | (cb > ncb) = 200 | Inner ncb (ins l) k r 201 | ins n = Inner cb n' k' n 202 | 203 | -- insert node to right of tree 204 | insert :: CBK k => k -> v -> CBT k v -> CBT k v 205 | insert = insertWith const 206 | 207 | -- Could we insert more efficiently just looking at the crit-bits? 208 | -- note: might be better to model this as a union with a singleton. 209 | insertWith :: CBK k => (v -> v -> v) -> k -> v -> CBT k v -> CBT k v 210 | insertWith fn kIns vIns = ini where 211 | ini Empty = Root kIns (Leaf vIns) 212 | ini (Root k n) = uncurry Root (ins 0 k n) 213 | 214 | ins cb k n = case critBit cb kIns k of 215 | Just cb -> case getBit cb kIns of 216 | False -> (kIns, insL cb vIns k n) -- insert as new least node 217 | True -> (k, insR cb n) -- insert to right after cb 218 | Nothing -> (k, updL (fn vIns) n) -- update in place 219 | 220 | -- insR inserts to the right of the least key, 221 | -- just after a specified critbit. 222 | insR cb (Inner ncb l k r) 223 | | (cb > ncb) = Inner ncb (insR cb l) k r -- differentiates further down 224 | | otherwise = uncurry (Inner cb l) (ins cb k r) 225 | insR cb v0@(Leaf _) = Inner cb v0 kIns (Leaf vIns) -- right of leaf 226 | 227 | -- delete a specific node. 228 | delete :: CBK k => k -> CBT k v -> CBT k v 229 | delete kDel = ini where 230 | ini Empty = Empty 231 | ini (Root k n) = case critBit 0 kDel k of 232 | Nothing -> Empty 233 | Just cb -> undefined 234 | 235 | -- | left-biased union 236 | union :: CBK k => CBT k v -> CBT k v -> CBT k v 237 | union = unionWith const 238 | 239 | -- | union 240 | -- 241 | -- ideally I'll provide a more efficient option than inserting each 242 | -- element independently. But for now, I'm just making it work. 243 | unionWith :: CBK k => (v -> v -> v) -> CBT k v -> CBT k v -> CBT k v 244 | unionWith fn l = L.foldl' ins l . toList where 245 | ins m (k,v) = insertWith (flip fn) k v m 246 | 247 | -------------------------------------------------------------------------------- /hs/src/Wikilon/CX.hs: -------------------------------------------------------------------------------- 1 | -- | Abstract Wikilon Runtime Context 2 | -- 3 | -- Wikilon operates in a similar context as Awelon, but we also need 4 | -- some access to persistent data storage in form of a key-value DB. 5 | -- And it might need some access to secure entropy and other features. 6 | module Wikilon.CX 7 | ( Storage(..) 8 | , module Awelon.CX 9 | ) where 10 | 11 | import Data.ByteString.Lazy (ByteString) 12 | import Awelon.CX 13 | 14 | -- | Access to a key-value registry (usually persistent) 15 | -- 16 | -- Storage operates on a simple key-value registry, representing a 17 | -- persistent store such as a filesystem or database. In Wikilon, 18 | -- this is probably Wikilon.DB. 19 | class Storage m where 20 | regGet :: ByteString -> m ByteString 21 | regPut :: ByteString -> ByteString -> m () 22 | 23 | 24 | -- NOTE: other potential features to consider are checkpointing or 25 | -- backtracking. But I'd prefer to wait for a real requirement. 26 | 27 | -------------------------------------------------------------------------------- /hs/src/Wikilon/KVM.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | First-Class Key-Value Databases 3 | -- 4 | -- Wikilon.DB offers a mutable key-value database with stowage and GC. 5 | -- This KVM models a key-value map above stowage, enabling first class 6 | -- database values to potentially be larger than memory. 7 | -- 8 | -- The tree structure used for KVM is a variant of a crit-bit tree or 9 | -- trie. Differences from conventional crit-bit tree: 10 | -- 11 | -- - least key is held by parent, to support full tree diffs and merges 12 | -- - each key is associated with a binary value (which may be empty) 13 | -- - keys, values, nodes may be stowed outside of volatile memory 14 | -- 15 | -- Keys and values within the KVM are free to reference other stowage 16 | -- resources. But keys mustn't have any trailing null bytes. 17 | -- 18 | -- Batched updates are important: it's inefficient to allocate lots of 19 | -- short-lived nodes at the stowage layers. This module will aim to make 20 | -- batching and buffering relatively simple and easy. 21 | -- 22 | -- First-class database values offer a lot of benefits over conventional 23 | -- key-value databases: histories, forking, diffs, composition. Wikilon 24 | -- relies on KVM for most data indexing and processing. 25 | -- 26 | module Wikilon.KVM 27 | ( Key, Val 28 | 29 | ) where 30 | 31 | import qualified Data.ByteString.Lazy as LBS 32 | import Control.Exception 33 | 34 | type Key = LBS.ByteString 35 | type Val = LBS.ByteString 36 | 37 | -- our crit-bit tree doesn't distinguish keys with trailing NULLs, 38 | -- instead treating every key as having an infinite extent of zero 39 | -- bits following the final non-zero bit. 40 | -- 41 | -- Since we can't distinguish keys with trailing NULLs, we also should 42 | -- not accept them into our trees. 43 | validKey :: Key -> Bool 44 | validKey s = LBS.null s || (0 /= LBS.last s) 45 | 46 | 47 | 48 | 49 | 50 | -- | A simple trie with bytestring data. 51 | 52 | -------------------------------------------------------------------------------- /hs/src/Wikilon/RT.hs: -------------------------------------------------------------------------------- 1 | 2 | -- | Wikilon runtime types. 3 | module Wikilon.RT 4 | ( RTE(..) 5 | ) where 6 | 7 | import Control.Monad.Trans.Reader 8 | import Wikilon.DB (DB) 9 | import qualified Wikilon.DB as DB 10 | 11 | data RTE = RTE 12 | { rte_db :: DB 13 | } 14 | 15 | -- Adding some form of checkpoint model to our RTE seems potentially 16 | -- useful. 17 | 18 | 19 | -------------------------------------------------------------------------------- /hs/stack.yaml: -------------------------------------------------------------------------------- 1 | # This file was automatically generated by 'stack init' 2 | # 3 | # Some commonly used options have been documented as comments in this file. 4 | # For advanced use and comprehensive documentation of the format, please see: 5 | # http://docs.haskellstack.org/en/stable/yaml_configuration/ 6 | 7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version. 8 | # A snapshot resolver dictates the compiler version and the set of packages 9 | # to be used for project dependencies. For example: 10 | # 11 | # resolver: lts-3.5 12 | # resolver: nightly-2015-09-21 13 | # resolver: ghc-7.10.2 14 | # resolver: ghcjs-0.1.0_ghc-7.10.2 15 | # resolver: 16 | # name: custom-snapshot 17 | # location: "./custom-snapshot.yaml" 18 | resolver: lts-8.18 19 | 20 | # User packages to be built. 21 | # Various formats can be used as shown in the example below. 22 | # 23 | # packages: 24 | # - some-directory 25 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz 26 | # - location: 27 | # git: https://github.com/commercialhaskell/stack.git 28 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a 29 | # - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a 30 | # extra-dep: true 31 | # subdirs: 32 | # - auto-update 33 | # - wai 34 | # 35 | # A package marked 'extra-dep: true' will only be built if demanded by a 36 | # non-dependency (i.e. a user package), and its test suites and benchmarks 37 | # will not be run. This is useful for tweaking upstream packages. 38 | packages: 39 | - '.' 40 | # Dependency packages to be pulled from upstream that are not in the resolver 41 | # (e.g., acme-missiles-0.3) 42 | extra-deps: 43 | - containers-0.5.10.2 44 | 45 | # Override default flag values for local packages and extra-deps 46 | flags: {} 47 | 48 | # Extra package databases containing global packages 49 | extra-package-dbs: [] 50 | 51 | # Control whether we use the GHC we find on the path 52 | # system-ghc: true 53 | # 54 | # Require a specific version of stack, using version ranges 55 | # require-stack-version: -any # Default 56 | # require-stack-version: ">=1.3" 57 | # 58 | # Override the architecture used by stack, especially useful on Windows 59 | # arch: i386 60 | # arch: x86_64 61 | # 62 | # Extra directories used by stack for building 63 | # extra-include-dirs: [/path/to/dir] 64 | # extra-lib-dirs: [/path/to/dir] 65 | # 66 | # Allow a newer minor version of GHC than the snapshot specifies 67 | # compiler-check: newer-minor 68 | -------------------------------------------------------------------------------- /hs/test/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import Control.Monad 4 | import qualified Data.ByteString.Lazy as LBS 5 | import qualified Data.ByteString.Lazy.UTF8 as LU8 6 | import qualified System.EasyFile as FS 7 | import qualified Wikilon.DB as DB 8 | import qualified Data.List as L 9 | import Control.Exception 10 | import Debug.Trace 11 | import System.Mem 12 | 13 | -- TODO: learn and use a Haskell test framework. 14 | 15 | main :: IO () 16 | main = withTmpDir "wikilon-test" $ do 17 | (Right db) <- DB.open "test-db" 1000 18 | testDB db 19 | return () 20 | 21 | 22 | 23 | 24 | 25 | 26 | -- utilities to simplify the read-write labor 27 | 28 | utf8 :: Show a => a -> LU8.ByteString 29 | utf8 = LU8.fromString . show 30 | 31 | -- just take the first value read 32 | writeBatch :: (Show a, Show b) => DB.DB -> [(a,b)] -> IO () 33 | writeBatch db kvList = do 34 | tx <- DB.newTX db 35 | mapM_ (uncurry (write tx)) kvList 36 | ok <- DB.commit tx 37 | unless ok $ fail ("failed to write batch " ++ show kvList) 38 | return () 39 | 40 | write :: (Show a, Show b) => DB.TX -> a -> b -> IO () 41 | write tx a b = DB.writeKey tx (utf8 a) (utf8 b) 42 | 43 | tryRead :: (Read a) => LU8.ByteString -> a 44 | tryRead s = case reads (LU8.toString s) of 45 | ((a,_):_) -> a 46 | [] -> error $ "could not read: " ++ show s 47 | 48 | read :: (Show a, Read b) => DB.TX -> a -> IO b 49 | read tx = fmap tryRead . DB.readKey tx . utf8 50 | 51 | readDB :: (Show a, Read b) => DB.DB -> a -> IO b 52 | readDB db = fmap tryRead . DB.readKeyDB db . utf8 53 | 54 | testDB :: DB.DB -> IO () 55 | testDB db = do 56 | -- NOTE: I'll be able to test the database more thoroughly 57 | -- once I have some support for large tries or similar. 58 | 59 | tx0 <- DB.newTX db 60 | DB.writeKey tx0 "red" "green" 61 | DB.writeKey tx0 "blue" "orange" 62 | DB.writeKey tx0 "yellow" "violet" 63 | DB.commit tx0 64 | 65 | r <- DB.readKeyDB db "red" 66 | tx1 <- DB.newTX db 67 | b <- DB.readKey tx1 "blue" 68 | y <- DB.readKey tx1 "yellow" 69 | DB.commit tx1 70 | rby <- DB.readKeysDB db ["red","blue","yellow"] 71 | 72 | unless ((r == "green") && (y == "violet") && (rby == [r,b,y])) $ 73 | fail "unexpected results for rby" 74 | 75 | tx2 <- DB.newTX db 76 | h0 <- DB.stowRsc tx2 "hello" 77 | h1 <- DB.stowRsc tx2 "goodbye" 78 | DB.writeKey tx2 "hello" (LBS.fromStrict h1) 79 | DB.clearRsc tx2 80 | DB.commit tx2 81 | 82 | let ns = [0..100] 83 | let sel = L.take 20 . L.drop 30 84 | tx3 <- DB.newTX db 85 | hs <- mapM (DB.stowRsc tx3 . utf8) ns 86 | DB.writeKey tx3 "p" (utf8 (sel hs)) 87 | DB.commit tx3 88 | DB.clearRsc tx3 89 | DB.gcDB db 90 | --DB.gcDB db 91 | -- traceIO ("hs: " ++ show hs) 92 | ns2 <- mapM (DB.loadRscDB db) hs 93 | traceIO ("ns2 : " ++ show ns2) 94 | 95 | 96 | hs' <- DB.readKeyDB db "p" 97 | unless (utf8 (sel hs) == hs') $ 98 | fail ("failure to read expected hashes") 99 | 100 | 101 | 102 | withTmpDir :: FilePath -> IO a -> IO a 103 | withTmpDir subdir action = do 104 | initialPath <- FS.getCurrentDirectory 105 | tmp <- FS.getTemporaryDirectory 106 | let myTmpDir = tmp FS. subdir 107 | FS.createDirectoryIfMissing False myTmpDir 108 | FS.setCurrentDirectory myTmpDir 109 | r <- try action 110 | FS.setCurrentDirectory initialPath 111 | -- FS.removeDirectoryRecursive myTmpDir 112 | reraise r 113 | 114 | 115 | reraise :: Either SomeException a -> IO a 116 | reraise = either throw return 117 | 118 | 119 | -------------------------------------------------------------------------------- /hs/wikilon.cabal: -------------------------------------------------------------------------------- 1 | name: wikilon 2 | version: 0.1.0.0 3 | synopsis: a wiki-inspired development environment for Awelon Language 4 | -- description: 5 | homepage: https://github.com/dmbarbour/wikilon#readme 6 | license: BSD3 7 | -- license-file: LICENSE 8 | author: David Barbour 9 | maintainer: dmbarbour@gmail.com 10 | copyright: 2017 David Barbour 11 | category: Web 12 | build-type: Simple 13 | extra-source-files: 14 | cabal-version: >=1.10 15 | 16 | library 17 | hs-source-dirs: src 18 | exposed-modules: 19 | Awelon 20 | Awelon.Syntax 21 | Awelon.Hash 22 | Awelon.CX 23 | Awelon.Dict 24 | Awelon.Dict.Format 25 | 26 | Wikilon 27 | Wikilon.CX 28 | Wikilon.DB 29 | Wikilon.KVM 30 | Wikilon.CBT 31 | Wikilon.RT 32 | 33 | build-depends: base >= 4.7 && < 5 34 | -- CONTROL 35 | , transformers 36 | , monad-loops 37 | -- VOLATILE DATA 38 | , deepseq 39 | , array 40 | , containers (>= 0.5.8) 41 | , bytestring 42 | , utf8-string 43 | , blake2 44 | -- PERSISTENT DATA 45 | , lmdb 46 | , filelock 47 | , easy-file 48 | -- WEB SERVICES 49 | , servant 50 | , wai 51 | , wai-websockets 52 | , websockets 53 | , yesod-form 54 | default-language: Haskell2010 55 | 56 | executable wikilon 57 | hs-source-dirs: app 58 | main-is: Main.hs 59 | ghc-options: -threaded -rtsopts -with-rtsopts=-N 60 | build-depends: base 61 | , wikilon 62 | , bytestring 63 | , utf8-string 64 | , monad-loops 65 | , wai 66 | , warp 67 | , warp-tls 68 | , entropy 69 | , easy-file 70 | default-language: Haskell2010 71 | 72 | test-suite wikilon-test 73 | type: exitcode-stdio-1.0 74 | hs-source-dirs: test 75 | main-is: Spec.hs 76 | build-depends: base 77 | , wikilon 78 | , bytestring 79 | , utf8-string 80 | , easy-file 81 | ghc-options: -threaded -rtsopts -with-rtsopts=-N 82 | default-language: Haskell2010 83 | 84 | source-repository head 85 | type: git 86 | location: https://github.com/dmbarbour/wikilon 87 | 88 | 89 | -------------------------------------------------------------------------------- /src/Awelon/Awelon.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/Awelon/DictSearch.fs: -------------------------------------------------------------------------------- 1 | namespace Awelon 2 | 3 | 4 | // The goal for DictSearch is to model an index data structure 5 | // with the following features: 6 | // 7 | // - we can index each dictionary node independently 8 | // - we can efficiently merge indices on Dict nodes 9 | // - we can store computed values - types, versions 10 | // 11 | // Ideally, we can achieve these features using one index model. 12 | // This would allow efficient indexing across multiple versions 13 | // of a dictionary. 14 | // 15 | // Computing values is difficult because the value computed at 16 | // a node may depend on other nodes in its context. 17 | // 18 | // If we achieve these features together, we can maintain our 19 | // indices across multiple versions of a dictionary, and produce 20 | // a new index as needed. 21 | // 22 | // One viable option is to maintain global index on secure hash 23 | // resources 24 | // hash resources to their 25 | // 26 | // 27 | // Besides search, I also want effective caching of Dict 28 | 29 | -------------------------------------------------------------------------------- /src/Awelon/Interpret.fs: -------------------------------------------------------------------------------- 1 | namespace Awelon 2 | open Data.ByteString 3 | open System.Collections.Generic 4 | 5 | // Intepretation of Awelon. 6 | // 7 | // Awelon programs logically rewrite to equivalent Awelon programs. 8 | // But Awelon can be naively implemented as a stack language, we 9 | // only need some care to preserve or recover words that should not 10 | // link/rewrite at evaluation time. It seems that for every word we 11 | // might link, we should compute static link conditions. 12 | // 13 | // For performance, acceleration is essential. At this time, use of 14 | // acceleration is simplified to `[F](accel-nat-add) => [!nat-add]`. 15 | // That is, we don't need to validate that `F` correctly implements 16 | // the accelerated nat-add operation at the interpreter layer. We can 17 | // and should validate it at other layers, such as a linter or type 18 | // checker. In any case, this makes it easy to treat accelerators as 19 | // built-in functions for fast interpretation. 20 | // 21 | // Besides accelerators, other important concerns are memoization, 22 | // caching, stowage, parallelism, and debug trace outputs. Interpreter 23 | // requires a full environment model, with some implicit sharing under 24 | // the hood. Further, we might also need to track *quotas*, to limit 25 | // evaluation efforts. 26 | module Interpret = 27 | 28 | /// Linker Conditions 29 | /// 30 | /// A word will link into a computation under two conditions: 31 | /// 32 | /// - Arity: There are sufficient arguments on the stack to link. 33 | /// - Avail: A word's evaluated definition has available outputs. 34 | /// 35 | /// Arity can range 0..9, due to the (a2)..(a9) annotations. Arity 36 | /// zero implies a word is not awaiting further input. Avail has 37 | /// no upper limit, but there's simply no need to calculate it above 38 | /// the maximum arity 9. 39 | /// 40 | /// A "noun" will have arity=0, avail=1. A noun may simply be treated 41 | /// as a value for binding, etc.. 42 | type LinkArity = int // how many inputs this observes on data stack 43 | type LinkAvail = int // how many outputs are immediately available 44 | 45 | /// 46 | /// 47 | /// 48 | 49 | 50 | 51 | // Stowage References 52 | 53 | type Env = 54 | val Src : Dict 55 | 56 | /// Our Interpreter will process programs as follows: 57 | /// 58 | /// - Nouns and Verbs are distinct. 59 | /// - Evaluated data is distinct from un-evaluated. 60 | /// - 61 | 62 | 63 | /// Interpreter-layer values should be optimized towards runtime 64 | /// interpretations. Relevantl 65 | /// 66 | 67 | 68 | 69 | /// I assume `[code](accel)` will be used within source to 70 | /// indicate accelerated implementations. Wh 71 | 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /src/Awelon/ParseExt.fs: -------------------------------------------------------------------------------- 1 | namespace Awelon 2 | open Data.ByteString 3 | 4 | // ParseExt is a 'default' textual projection for Awelon with all 5 | // the bells and whistles we can implement that *don't* depend on 6 | // special knowledge about a dictionary. This is intended both as 7 | // an initial scaffolding for programming Awelon, and a fallback 8 | // or 'safe mode' textual projection for debugging (if needed). 9 | // 10 | // In older versions of Awelon, I might have called this `CLAW` as 11 | // a command language for Awelon. However, 12 | // 13 | module ParseExt = 14 | 15 | // SUPPORTABLE NOW (based on developed techniques): 16 | // - local variables 17 | // - monadic sequences 18 | // - simple list literals 19 | // - labeled data (records, variants) 20 | // - basic numeric notations 21 | // - raw vs preprocessed literals 22 | // - lightweight qualified namespaces 23 | 24 | type NatRep = uint64 25 | type IntRep = int64 26 | type RatRep = (struct(IntRep * NatRep)) 27 | type DecRep = (struct(IntRep * int16)) // 3141 -3 => 3.141 28 | type SciRep = (struct(DecRep * IntRep)) // Decimal*10^Exponent 29 | 30 | type Number = 31 | | Nat of NatRep 32 | | Int of IntRep 33 | | Rat of RatRep 34 | | Dec of DecRep 35 | | Sci of SciRep 36 | 37 | 38 | // Potential Future Extensions: 39 | // 40 | // - pattern matching (parser combinators) 41 | // - concise type descriptors 42 | 43 | -------------------------------------------------------------------------------- /src/Awelon/Test/Program.fs: -------------------------------------------------------------------------------- 1 | module Program = let [] main _ = 0 2 | -------------------------------------------------------------------------------- /src/Awelon/Test/Test.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp2.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/Awelon/Test/Tests.fs: -------------------------------------------------------------------------------- 1 | module Tests 2 | 3 | open System 4 | open System.IO 5 | open Xunit 6 | open Stowage 7 | open Awelon 8 | open Data.ByteString 9 | 10 | // For parser tests, it's convenient to parse then print and 11 | // compare the printed string. If parse fails, we can instead 12 | // test the remainder with "?rem". 13 | let ps s = 14 | match Parser.parse (BS.fromString s) with 15 | | Parser.ParseOK p -> BS.toString (Parser.write p) 16 | | Parser.ParseFail st -> BS.toString (BS.cons (byte '?') (st.s)) 17 | 18 | // Shuffle an array for various tests. 19 | let shuffle (rng:System.Random) (a : 'T[]) : unit = 20 | let rec shuffleIx ix = 21 | if (ix = a.Length) then () else 22 | let ixSwap = rng.Next(ix, Array.length a) 23 | let tmp = a.[ix] 24 | a.[ix] <- a.[ixSwap] 25 | a.[ixSwap] <- tmp 26 | shuffleIx (ix + 1) 27 | shuffleIx 0 28 | 29 | let clearTestDir path = 30 | if Directory.Exists(path) 31 | then Directory.Delete(path,true) 32 | 33 | let readNat (s:ByteString) : int = 34 | let readByte acc c = (acc * 10) + int (c - byte '0') 35 | BS.fold readByte 0 s 36 | 37 | [] 38 | let ``basic parser tests`` () = 39 | Assert.Equal("", ps " ") 40 | Assert.Equal("1 2 3", ps " 1 2 3 ") 41 | Assert.Equal("\"hello\" world", ps " \"hello\" world ") 42 | Assert.Equal("?\"test", ps " 1 2 \"hello\" \"test") 43 | Assert.Equal("[[] [[] []]] []", ps "[[][[][]]][]") 44 | Assert.Equal("?@ ", ps "1 2 3 @ ") 45 | Assert.Equal("?→", ps "→") 46 | Assert.Equal("?", ps "[0[1[2") 47 | Assert.Equal("?]]]", ps "[]0]]]") 48 | Assert.Equal("hello/world/[1]", ps " hello/world/[ 1 ] ") 49 | Assert.Equal("a/b/c/d", ps " a/b/c/d ") 50 | Assert.Equal("? []", ps "hello/world/ []") 51 | Assert.Equal("(hello)", ps " (hello) ") 52 | Assert.Equal("?(hel ", ps " (hel ") 53 | Assert.Equal("?) ", ps " hello) ") 54 | Assert.Equal("?%bbb", ps " %bbb") 55 | let h = RscHash.hash (BS.fromString "test") 56 | let asBin = BS.toString (BS.cons (byte '%') h) 57 | let asRsc = BS.toString (BS.cons (byte '$') h) 58 | Assert.Equal(asBin, ps asBin) 59 | Assert.Equal(asRsc, ps asRsc) 60 | 61 | // TODO: test interpreters 62 | 63 | let testDefStr n = 64 | let s = if (0 = n) then "[zero]" else 65 | "[" + string (n - 1) + " succ] (nat)" 66 | BS.fromString s 67 | 68 | let inline bs n = BS.fromString (string n) 69 | let inline addN (n:int) (d:Dict) : Dict = Dict.add (bs n) (Dict.Def(testDefStr n)) d 70 | let inline remN (n:int) (d:Dict) : Dict = Dict.remove (bs n) d 71 | let inline flip fn a b = fn b a 72 | 73 | [] 74 | let ``simple dict tests`` () = 75 | Assert.True(Dict.isEmpty (Dict.empty)) 76 | Assert.False(Dict.isEmpty (Dict.empty |> addN 1)) 77 | Assert.True(Dict.isEmpty (Dict.empty |> addN 1 |> remN 1)) 78 | Assert.False(Dict.isEmpty (Dict.empty |> addN 1 |> addN 2 |> remN 1)) 79 | let d = seq { for i = 1 to 2000 do yield i } 80 | |> Seq.fold (flip addN) Dict.empty 81 | let has n d = Dict.contains (bs n) d 82 | let d10 = Dict.dropPrefix (bs 10) d 83 | Assert.True(has 10 d) 84 | Assert.False(has 10 d10) 85 | Assert.True(has 101 d) 86 | Assert.False(has 101 d10) 87 | Assert.True(has 1023 d) 88 | Assert.False(has 1023 d10) 89 | Assert.True(has 1 d10) 90 | Assert.True(has 11 d10) 91 | Assert.Equal(2000 - 111, Seq.length (Dict.toSeq d10)) // 1..2000 - 10,100..109,1000..1099 92 | 93 | let d30 = Dict.selectPrefix (bs 30) d 94 | Assert.True(has 30 d30) 95 | Assert.True(has 300 d30) 96 | Assert.True(has 308 d30) 97 | Assert.True(has 310 d) 98 | Assert.False(has 310 d30) 99 | Assert.True(has 31 d) 100 | Assert.False(has 31 d30) 101 | Assert.True(has 200 d) 102 | Assert.False(has 200 d30) 103 | Assert.Equal(11, Seq.length (Dict.toSeq d30)) // 30,300,301,302,..309 104 | 105 | // a fixture is needed to load the database 106 | type TestDB = 107 | val s : LMDB.Storage 108 | val db : DB 109 | new () = 110 | let path = "testDB" 111 | let maxSizeMB = 1000 112 | do clearTestDir path 113 | let s = new LMDB.Storage(path,maxSizeMB) 114 | { s = s 115 | db = DB.fromStorage (s :> DB.Storage) 116 | } 117 | interface System.IDisposable with 118 | member this.Dispose() = 119 | this.db.Flush() 120 | (this.s :> System.IDisposable).Dispose() 121 | 122 | let bsPair ((a,b)) = (BS.fromString a, BS.fromString b) 123 | 124 | type DBTests = 125 | val s : LMDB.Storage 126 | val db : DB 127 | new (fixture : TestDB) = 128 | let s = fixture.s 129 | { s = fixture.s 130 | db = fixture.db 131 | } 132 | interface IClassFixture 133 | 134 | member inline t.DB with get() = t.db 135 | member inline t.Stowage with get() = (t.s :> Stowage) 136 | member inline t.Storage with get() = (t.s :> DB.Storage) 137 | member inline t.Flush() = DB.flushStorage (t.Storage) 138 | 139 | member t.TryLoad (h:RscHash) : ByteString option = 140 | try t.Stowage.Load h |> Some 141 | with 142 | | MissingRsc _ -> None 143 | 144 | member t.FullGC() = 145 | System.GC.Collect() 146 | let rec gcLoop ct = 147 | t.s.GC() 148 | let ct' = t.s.Stats().stow_count 149 | //printfn "GC - elements in stowage: %A" ct' 150 | if (ct' <> ct) then gcLoop ct' 151 | gcLoop 0UL 152 | 153 | member tf.CompactionTest (alen:int) (frac:int) (rng:System.Random) : unit = 154 | let sw = new System.Diagnostics.Stopwatch() 155 | let cc d = Codec.compact (Dict.node_codec) (tf.Stowage) d 156 | let compactK k d = if (0 <> (k % frac)) then d else cc d 157 | let add n d = compactK n (addN n d) 158 | let rem n d = compactK n (remN n d) 159 | let a = [| 1 .. alen |] 160 | let r = [| 1 .. (alen / 3) |] 161 | shuffle rng a 162 | shuffle rng r 163 | 164 | printfn "building a test dictionary" 165 | sw.Restart() 166 | let dRef = Dict.empty 167 | |> Array.foldBack add a 168 | |> Array.foldBack rem r 169 | |> cc 170 | |> VRef.stow (Dict.codec) (tf.Stowage) 171 | tf.Flush() 172 | sw.Stop() 173 | printfn "test dictionary built (%A ms)" (sw.Elapsed.TotalMilliseconds) 174 | let write_op_ct = Array.length a + Array.length r 175 | let write_op_usec = (1000.0 * sw.Elapsed.TotalMilliseconds) / (double write_op_ct) 176 | printfn "usec per write op: %A" write_op_usec 177 | 178 | // really doing iteration (sequential read). I might need to test 179 | // performance of random reads, seperately. But this isn't primarily 180 | // a performance test. 181 | let accum s n = s + uint64 n 182 | let array_sum arr = Array.fold accum 0UL arr 183 | let sum_expected = array_sum a - array_sum r 184 | 185 | sw.Restart() 186 | let dR = VRef.load dRef 187 | let sum = Dict.toSeq dR 188 | |> Seq.map (fst >> readNat) 189 | |> Seq.fold accum 0UL 190 | sw.Stop() 191 | let read_op_ct = (Array.length a) - (Array.length r) 192 | let read_op_usec = (1000.0 * sw.Elapsed.TotalMilliseconds) / (double read_op_ct) 193 | printfn "usec per read op: %A" read_op_usec 194 | Assert.Equal(sum, sum_expected) 195 | 196 | // random-read performance test 197 | shuffle rng a 198 | sw.Restart() 199 | let dRR = VRef.load dRef 200 | let fnAccum acc k = acc + if Dict.contains (bs k) dRR then uint64 k else 0UL 201 | let rsum1 = Array.fold fnAccum 0UL a 202 | sw.Stop() 203 | let rread_op_ct = Array.length a 204 | let rread_op_usec = (1000.0 * sw.Elapsed.TotalMilliseconds) / (double rread_op_ct) 205 | printfn "usec per random read op: %A" rread_op_usec 206 | Assert.Equal(rsum1,sum_expected) 207 | 208 | [] 209 | member tf.``test dict compaction`` () = 210 | let rng = new System.Random(1) 211 | 212 | printfn "size 70k, 10 compactions" 213 | tf.CompactionTest 70000 7000 rng 214 | printfn "size 70k, 100 compactions" 215 | tf.CompactionTest 70000 700 rng 216 | printfn "size 70k, 1 compactions" 217 | tf.CompactionTest 70000 700000 rng 218 | 219 | // following test takes about 30 seconds on my machine 220 | //printfn "size 700k, 100 compactions" 221 | //tf.CompactionTest 700000 7000 rng 222 | 223 | // TODO: test splitAtKey, etc. 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /src/Awelon/TypeCheck.fs: -------------------------------------------------------------------------------- 1 | namespace Awelon 2 | 3 | // Awelon language can support static typing based on arities 4 | // and annotations. However, Awelon does not specify any type 5 | // model. This is because we might wish to type some programs 6 | // using simple static types, and others using sophisticated 7 | // dependent types, and some might require dynamic analysis. 8 | // 9 | // Ultimately, I'd like for most type checking to be modeled 10 | // within the dictionary. But in the short term, this module 11 | // can provide ad-hoc static type analysis for Awelon code. 12 | // 13 | // My primary concerns for type analysis will be: 14 | // 15 | // - static analysis of arities 16 | // - accelerated data types 17 | // - simple type annotations 18 | // - dynamic types for macros 19 | // 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/Awelon/WordVersion.fs: -------------------------------------------------------------------------------- 1 | namespace Awelon 2 | 3 | // We can associate an Awelon program with a deep, behavioral version 4 | // by taking the secure hash of the program together with versions of 5 | // its dependencies. Doing so is rather useful, and the result is easy 6 | // to cache and use for memoization of other details (such as inferred 7 | // types or evaluations). 8 | // 9 | // Although we could keep a permanent index of versions, it might prove 10 | // simpler to recompute versions as needed. The cost is predictable and 11 | // we can still benefit from temporary caching. 12 | 13 | -------------------------------------------------------------------------------- /src/Data.ByteString/Data.ByteString.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/Data.ByteString/Stream.fs: -------------------------------------------------------------------------------- 1 | 2 | namespace Data.ByteString 3 | 4 | module ByteStream = 5 | 6 | /// A ByteString Writer. 7 | /// 8 | /// This is similar to System.IO.MemoryStream in write mode, albeit 9 | /// with a simpler and more restrictive access limitations. Clients 10 | /// can capture only the data that they write to the stream. Older 11 | /// data cannot be observed or overwritten. 12 | /// 13 | /// Assumes single-threaded use. 14 | /// 15 | /// The buffer is resized on write. A System.OutOfMemoryException 16 | /// is possible if the buffer cannot be resized sufficiently. 17 | type Dst = 18 | val mutable internal Data : byte[] // resizable bytes array 19 | val mutable internal Pos : int // current writer head 20 | internal new() = { Data = Array.empty; Pos = 0 } 21 | 22 | let inline private resize (sz:int) (d:Dst) : unit = 23 | assert(sz >= d.Pos) 24 | 25 | // reallocate array with sufficient space relative to Pos 26 | let private alloc (amt:int) (d:Dst) : unit = 27 | let maxAmt = System.Int32.MaxValue - d.Pos 28 | if (amt > maxAmt) 29 | then raise (new System.OutOfMemoryException("ByteStream reserve")) 30 | // adjust for geometric growth 31 | let newSize = d.Pos + max amt (min maxAmt (max 200 d.Pos)) 32 | let mem = Array.zeroCreate newSize 33 | Array.blit (d.Data) 0 mem 0 (d.Pos) 34 | d.Data <- mem 35 | 36 | let inline private requireSpace (amt:int) (dst:Dst) : unit = 37 | let avail = dst.Data.Length - dst.Pos 38 | if (amt > avail) then alloc amt dst 39 | 40 | /// Reserve space for writing. 41 | /// 42 | /// If this is the first operation on the stream, it performs an 43 | /// initial allocation of the exact size requested. Otherwise, it 44 | /// ensures there is space to write the amount requested without 45 | /// further reallocation. 46 | let reserve (amt:int) (dst:Dst) : unit = 47 | assert(amt > 0) 48 | if (Array.isEmpty dst.Data) 49 | then dst.Data <- Array.zeroCreate amt 50 | else requireSpace amt dst 51 | 52 | let writeByte (b:byte) (dst:Dst) : unit = 53 | requireSpace 1 dst 54 | dst.Data.[dst.Pos] <- b 55 | dst.Pos <- (1 + dst.Pos) 56 | 57 | let writeBytes (bs:ByteString) (dst:Dst) : unit = 58 | requireSpace (bs.Length) dst 59 | Array.blit (bs.UnsafeArray) (bs.Offset) (dst.Data) (dst.Pos) (bs.Length) 60 | dst.Pos <- (bs.Length + dst.Pos) 61 | 62 | let inline private captureBytes (p0:int) (dst:Dst) : ByteString = 63 | BS.unsafeCreate (dst.Data) p0 (dst.Pos - p0) 64 | 65 | /// Capture writes to a Dst. 66 | /// 67 | /// This allows a client to observe whatever they have written 68 | /// without extra intermediate buffers or arrays. However, the 69 | /// initial Dst must be sourced at a `write` operation. 70 | let capture (dst:Dst) (writer:Dst -> unit) : ByteString = 71 | let p0 = dst.Pos 72 | writer dst 73 | captureBytes p0 dst 74 | 75 | /// Capture with an extra result. 76 | let capture' (dst:Dst) (writer:Dst -> 'X) : (ByteString * 'X) = 77 | let p0 = dst.Pos 78 | let x = writer dst 79 | let b = captureBytes p0 dst 80 | (b,x) 81 | 82 | /// Capture writes to a new stream. 83 | /// 84 | /// Use of a `write` operation is the only means to construct the 85 | /// output stream, ensuring that all data is captured by at least 86 | /// one observer. You can use `reserve` immediately to provide an 87 | /// initial capacity. 88 | let write (writer:Dst -> unit) : ByteString = 89 | capture (new Dst()) writer 90 | 91 | /// Write with an extra result. 92 | let write' (writer: Dst -> 'X) : (ByteString * 'X) = 93 | capture' (new Dst()) writer 94 | 95 | /// A ByteString Reader. 96 | /// 97 | /// This is similar to System.IO.MemoryStream in read-only mode, albeit 98 | /// without the ability to seek backwards and with alternative features 99 | /// for lookahead parsing. The motivation is to make it easier to reason 100 | /// about program behavior, and convenient integration with ByteString. 101 | /// 102 | /// Assumes single-threaded use. 103 | type Src = 104 | val internal Data : byte[] // const 105 | val internal Limit : int // max Pos 106 | val mutable internal Pos : int 107 | internal new(s:ByteString) = 108 | { Data = s.UnsafeArray 109 | Limit = (s.Offset + s.Length) 110 | Pos = s.Offset 111 | } 112 | 113 | /// Generic exception for insufficient or unexpected data. 114 | exception ReadError 115 | 116 | /// End-of-Stream check. 117 | let eos (src:Src) : bool = 118 | (src.Limit = src.Pos) 119 | 120 | /// Check how many bytes remain in a stream. 121 | let bytesRem (src:Src) : int = 122 | (src.Limit - src.Pos) 123 | 124 | /// Observe remaining bytes in stream without removing them. 125 | let peekRem (src:Src) : ByteString = 126 | BS.unsafeCreate (src.Data) (src.Pos) (bytesRem src) 127 | 128 | /// Read remaining bytes in stream. Removes them from stream. 129 | let readRem (src:Src) : ByteString = 130 | let result = peekRem src 131 | src.Pos <- src.Limit 132 | result 133 | 134 | /// Observe next byte in stream (or raise ReadError) 135 | let peekByte (src:Src) : byte = 136 | if eos src then raise ReadError else 137 | src.Data.[src.Pos] 138 | 139 | /// Read a single byte (or raise ReadError) 140 | let readByte (src:Src) : byte = 141 | let result = peekByte src 142 | src.Pos <- (1 + src.Pos) 143 | result 144 | 145 | /// Observe a run of remaining bytes without removing them. 146 | let peekBytes (len:int) (src:Src) : ByteString = 147 | if(len < 0) then invalidArg "len" "negative byte count" else 148 | if(len > bytesRem src) then raise ReadError else 149 | BS.unsafeCreate (src.Data) (src.Pos) len 150 | 151 | /// Read run of bytes, removing them from the stream. 152 | let readBytes (len:int) (src:Src) : ByteString = 153 | let result = peekBytes len src 154 | src.Pos <- (len + src.Pos) 155 | result 156 | 157 | /// Ignore several bytes. 158 | let skip (len:int) (src:Src) : unit = 159 | readBytes len src |> ignore 160 | 161 | /// Attempt to read a byte-stream, but backtrack on ReadError 162 | let tryRead (reader:Src -> 'X) (src:Src) : 'X option = 163 | let p0 = src.Pos 164 | try Some (reader src) 165 | with 166 | | ReadError -> src.Pos <- p0; None 167 | 168 | /// Attempt to read a byte-stream, but backtrack on ReadError or None. 169 | let tryMatch (reader:Src -> 'X option) (src:Src) : 'X option = 170 | let p0 = src.Pos 171 | try let result = reader src 172 | if Option.isNone result then src.Pos <- p0 173 | result 174 | with 175 | | ReadError -> src.Pos <- p0; None 176 | 177 | /// Read a ByteString. 178 | /// 179 | /// Note: This will raise a ReadError if we're NOT at the end-of-stream 180 | /// after performing a read. You might need to add a final readRem if 181 | /// you aren't at the end of stream. 182 | let read (reader:Src -> 'X) (b:ByteString) : 'X = 183 | let src = new Src(b) 184 | let x = reader src 185 | if not (eos src) then raise ReadError 186 | x 187 | 188 | 189 | 190 | type ByteSrc = ByteStream.Src 191 | type ByteDst = ByteStream.Dst 192 | 193 | -------------------------------------------------------------------------------- /src/Data.ByteString/Tests/Program.fs: -------------------------------------------------------------------------------- 1 | module Program = let [] main _ = 0 2 | -------------------------------------------------------------------------------- /src/Data.ByteString/Tests/Tests.fs: -------------------------------------------------------------------------------- 1 | module Tests.Data.ByteString 2 | 3 | #nowarn "988" // suppress warning for empty program 4 | 5 | open System 6 | open System.Runtime.InteropServices 7 | open Xunit 8 | open Data.ByteString 9 | 10 | 11 | [] 12 | let ``empty is length zero`` () = 13 | Assert.Equal(0, BS.empty.Length) 14 | 15 | [] 16 | let ``empty equals empty`` () = 17 | Assert.Equal(BS.empty, BS.empty) 18 | Assert.NotEqual(BS.empty, BS.singleton 101uy) 19 | 20 | [] 21 | let ``isEmpty empty`` () = 22 | Assert.True(BS.isEmpty BS.empty) 23 | Assert.False(BS.isEmpty (BS.singleton 101uy)) 24 | 25 | [] 26 | let ``isEmpty emptyString`` () = 27 | Assert.True(BS.isEmpty (BS.fromString "")) 28 | 29 | [] 30 | let ``empty string converts to empty bytestring`` () = 31 | Assert.Equal(BS.empty, BS.fromString "") 32 | 33 | 34 | [] 35 | let ``using FNV-1a hash`` () = 36 | let arr = BS.unsafeCreateA [| 116uy; 101uy; 115uy; 116uy |] // test 37 | let h32 = ByteString.Hash32 arr 38 | let h64 = ByteString.Hash64 arr 39 | Assert.True((2949673445u = h32)) 40 | Assert.True((18007334074686647077UL = h64)) 41 | 42 | [] 43 | let ``basic structural equality`` () = 44 | Assert.Equal(BS.fromString "hello", BS.fromString "hello") 45 | Assert.NotEqual(BS.fromString "hello", BS.fromString "goodbye") 46 | 47 | 48 | [] 49 | let ``basic string conversions`` () = 50 | let testStr = "→↑←" 51 | let a = BS.fromString testStr 52 | Assert.Equal(a.Length, 9) // UTF-8 conversions 53 | Assert.Equal(testStr, a.ToString()) 54 | Assert.Equal(testStr, BS.toString a) 55 | 56 | [] 57 | let ``empty slice is empty`` () = 58 | let foo = (BS.fromString "xyzzy").[3..2] 59 | Assert.True(BS.isEmpty foo) 60 | 61 | [] 62 | let ``non-empty slices equality`` () = 63 | let foo = BS.fromString "xyzxyz" 64 | Assert.Equal(foo.[0..2], foo.[3..5]) 65 | Assert.NotEqual(foo.[0..1], foo.[2..3]) 66 | 67 | [] 68 | let ``slices share underlying array`` () = 69 | let foo = BS.fromString "xyzzy" 70 | Assert.Equal(foo.[0..2].UnsafeArray, foo.[2..3].UnsafeArray) 71 | 72 | [] 73 | let ``simple cons`` () = 74 | Assert.Equal(BS.fromString "test", BS.cons (byte 't') (BS.fromString "est")) 75 | 76 | [] 77 | let ``simple append`` () = 78 | Assert.Equal(BS.fromString "hello, world", 79 | BS.append (BS.fromString "hello,") (BS.fromString " world")) 80 | 81 | [] 82 | let ``simple concat`` () = 83 | Assert.Equal(BS.fromString "hello, world", 84 | ["hello"; ","; " "; "world"] |> Seq.map BS.fromString |> BS.concat) 85 | 86 | [] 87 | let ``empty is smallest`` () = 88 | Assert.True(BS.empty < BS.unsafeCreateA [| 0uy |]) 89 | 90 | [] 91 | let ``lexicographic order`` () = 92 | Assert.True(BS.fromString "x" < BS.fromString "xx") 93 | Assert.True(BS.fromString "xx" < BS.fromString "xy") 94 | Assert.True(BS.fromString "xy" < BS.fromString "yx") 95 | 96 | [] 97 | let ``ordering on slices`` () = 98 | let foo = BS.fromString "xyzxyz" 99 | Assert.True(foo.[1..2] > foo.[3..4]) 100 | Assert.True(foo.[0..1] < foo.[3..5]) 101 | 102 | [] 103 | let ``simple enumerator`` () = 104 | let x : ByteString = BS.fromString "test===" 105 | let mutable sum = 0 106 | for c in (x.[..3]) do 107 | sum <- (int c) + sum 108 | Assert.Equal(448, sum) 109 | 110 | [] 111 | let ``simple fold`` () = 112 | let x : ByteString = BS.fromString "===test" 113 | let accum s c = s + int c 114 | let sum = BS.fold accum 0 (x.[3..]) 115 | Assert.Equal(448, sum) 116 | 117 | [] 118 | let ``pinned data access`` () = 119 | let x : ByteString = (BS.fromString "==test==").[2..5] 120 | BS.withPinnedBytes x (fun p -> 121 | Assert.Equal(116uy, Marshal.ReadByte(p,0)) 122 | Assert.Equal(101uy, Marshal.ReadByte(p,1)) 123 | Assert.Equal(115uy, Marshal.ReadByte(p,2))) 124 | 125 | [] 126 | let ``trivial byte writer`` () = 127 | let x : ByteString = (BS.fromString "==test==").[2..5] 128 | let x' = ByteStream.write(fun dst -> 129 | ByteStream.writeByte x.[0] dst 130 | ByteStream.writeBytes x.[1..] dst) 131 | Assert.Equal(x,x') 132 | 133 | [] 134 | let ``trivial byte reader`` () = 135 | let x = (BS.fromString "==test==").[2..5] 136 | let rd src = 137 | let t = ByteStream.readByte src 138 | let est = ByteStream.readBytes 3 src 139 | (t,est) 140 | let (t,est) = ByteStream.read rd x 141 | Assert.Equal(t, byte 't') 142 | Assert.Equal(est, x.[1..]) 143 | 144 | [] 145 | let ``test critbit basics`` () = 146 | let x = BS.unsafeCreateA [| 0xFFuy; 0x00uy; 0xC1uy |] 147 | Assert.True(CritbitTree.testCritbit 0 x) 148 | Assert.True(CritbitTree.testCritbit 8 x) 149 | Assert.True(CritbitTree.testCritbit 9 x) 150 | Assert.False(CritbitTree.testCritbit 10 x) 151 | Assert.False(CritbitTree.testCritbit 17 x) 152 | Assert.True(CritbitTree.testCritbit 18 x) 153 | Assert.True(CritbitTree.testCritbit 19 x) 154 | Assert.True(CritbitTree.testCritbit 20 x) 155 | Assert.False(CritbitTree.testCritbit 21 x) 156 | Assert.False(CritbitTree.testCritbit 25 x) 157 | Assert.True(CritbitTree.testCritbit 26 x) 158 | Assert.False(CritbitTree.testCritbit 27 x) 159 | 160 | [] 161 | let ``find critbit basics`` () = 162 | let x = BS.unsafeCreateA [| 0xFFuy; 0x00uy; 0x00uy |] 163 | let y = BS.unsafeCreateA [| 0xCCuy; 0x00uy |] 164 | Assert.Equal(Some 3, CritbitTree.findCritbit 0 x y) 165 | Assert.Equal(Some 3, CritbitTree.findCritbit 1 x y) 166 | Assert.Equal(Some 3, CritbitTree.findCritbit 2 x y) 167 | Assert.Equal(Some 3, CritbitTree.findCritbit 3 x y) 168 | Assert.Equal(Some 4, CritbitTree.findCritbit 4 x y) 169 | Assert.Equal(Some 7, CritbitTree.findCritbit 5 x y) 170 | Assert.Equal(Some 7, CritbitTree.findCritbit 7 x y) 171 | Assert.Equal(Some 8, CritbitTree.findCritbit 8 x y) 172 | Assert.Equal(Some 18, CritbitTree.findCritbit 9 x y) 173 | 174 | [] 175 | let ``tree basics`` () = 176 | let add s t = CritbitTree.add (BS.fromString s) s t 177 | let d1 : string list = ["bar"; "band"; "bald"; "bandit"; "bald eagle"; "bard"; "barrister"] 178 | let t1 = List.fold (fun t s -> add s t) CritbitTree.empty d1 179 | Assert.True(CritbitTree.validate t1) 180 | Assert.Equal(d1.Length, CritbitTree.size t1) 181 | 182 | Assert.True(CritbitTree.exists (fun k _ -> (byte 'r' = k.[2])) t1) 183 | Assert.False(CritbitTree.exists (fun k _ -> (byte 'z' = k.[1])) t1) 184 | Assert.True(CritbitTree.forall (fun k _ -> (byte 'b' = k.[0])) t1) 185 | 186 | //CritbitTree.iter (fun k v -> printfn "%s" v) t1 187 | 188 | let has t s = CritbitTree.containsKey (BS.fromString s) t 189 | Assert.True(has t1 "band") 190 | Assert.True(has t1 "bard") 191 | Assert.True(has t1 "bald") 192 | Assert.True(has t1 "bar") 193 | Assert.False(has t1 "test") 194 | Assert.False(has t1 "") 195 | Assert.False(has t1 "apple") 196 | Assert.False(has t1 "barrier") 197 | Assert.False(has t1 "bardiche") 198 | 199 | Assert.Equal(Some "bard", CritbitTree.tryFind (BS.fromString "bard") t1) 200 | Assert.Equal(Some "bar", CritbitTree.tryFind (BS.fromString "bar") t1) 201 | Assert.Equal(Some "bald", CritbitTree.tryFind (BS.fromString "bald") t1) 202 | 203 | let rem s t = CritbitTree.remove (BS.fromString s) t 204 | let t2 = t1 |> rem "apple" |> rem "bard" |> rem "band" |> rem "bald" 205 | Assert.True(CritbitTree.validate t2) 206 | Assert.Equal(d1.Length, 3 + CritbitTree.size t2) 207 | Assert.False(has t2 "apple") 208 | Assert.False(has t2 "bard") 209 | Assert.False(has t2 "band") 210 | Assert.True(has t2 "bald eagle") 211 | Assert.True(has t2 "barrister") 212 | 213 | Assert.Equal(t1,t1) 214 | Assert.Equal(t1, CritbitTree.selectPrefix (BS.fromString "ba") t1) 215 | Assert.Equal(3, CritbitTree.size (CritbitTree.selectPrefix (BS.fromString "bar") t1)) 216 | Assert.Equal(2, CritbitTree.size (CritbitTree.selectPrefix (BS.fromString "ban") t1)) 217 | Assert.Equal(4, CritbitTree.size (CritbitTree.dropPrefix (BS.fromString "bar") t1)) 218 | Assert.Equal(5, CritbitTree.size (CritbitTree.dropPrefix (BS.fromString "ban") t1)) 219 | 220 | let p t s = 221 | let struct(a,b) = CritbitTree.splitAtKey (BS.fromString s) t 222 | (a,b) 223 | Assert.Equal(t1, snd (p t1 "bald")) // "bald" is least key 224 | Assert.Equal(2, CritbitTree.size (fst (p t1 "ban"))) // bald and bald eagle to left 225 | Assert.Equal(4, CritbitTree.size (fst (p t1 "bar"))) 226 | 227 | let d1' = List.map snd (CritbitTree.toList t1) 228 | Assert.Equal(d1', List.sort d1) 229 | 230 | 231 | 232 | 233 | -------------------------------------------------------------------------------- /src/Data.ByteString/Tests/Tests.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp2.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/Main.fs: -------------------------------------------------------------------------------- 1 | module Wikilon.Main 2 | 3 | open System 4 | open System.IO 5 | open System.Threading 6 | open System.Security.Cryptography 7 | open Stowage 8 | open Suave 9 | open Data.ByteString 10 | 11 | let helpMsg = """ 12 | Wikilon is a wiki-inspired development environment for Awelon. This program 13 | starts a local web server, which is mostly configured online. Arguments: 14 | 15 | [-help] print this help message 16 | [-p Port] bind specified port (default 3000) 17 | [-ip IP] bind IP or DNS (default 127.0.0.1) 18 | [-dir Dir] where to store data (default wiki) 19 | [-size GB] maximum database size (default 100) 20 | [-cache MB] space-speed tradeoff (default 100) 21 | [-admin] print a temporary admin password 22 | 23 | Configuration of Wikilon is managed online. Requesting an `-admin` password 24 | makes the admin account available until process reset. The admin can create 25 | other accounts with administrative authorities. 26 | 27 | Wikilon does not have built-in support for TLS. Try a reverse proxy, such as 28 | NGINX, to add the TLS layer if needed. 29 | """ 30 | 31 | type Args = { 32 | help : bool; 33 | port : int; 34 | ip : string; 35 | home : string; 36 | size : int; 37 | cache : int; 38 | admin : bool; 39 | bad : string list; 40 | } 41 | 42 | let defaultArgs : Args = { 43 | help = false; 44 | port = 3000; 45 | ip = "127.0.0.1"; 46 | home = "wiki"; 47 | size = 100 48 | cache = 100 49 | admin = false; 50 | bad = []; 51 | } 52 | 53 | let (|Nat|_|) (s : string) : int option = 54 | let mutable ival = 0 55 | if System.Int32.TryParse(s, &ival) && (ival > 0) 56 | then Some ival 57 | else None 58 | 59 | let rec procArgs xs (a : Args) : Args = 60 | match xs with 61 | | [] -> {a with bad = List.rev a.bad } 62 | | "-help"::xs' -> procArgs xs' { a with help = true } 63 | | "-p"::(Nat p)::xs' -> procArgs xs' { a with port = p } 64 | | "-ip"::ip::xs' -> procArgs xs' { a with ip = ip } 65 | | "-dir"::dir::xs' -> procArgs xs' { a with home = dir } 66 | | "-size"::(Nat n)::xs' -> procArgs xs' { a with size = n } 67 | | "-cache"::(Nat n)::xs' -> procArgs xs' { a with cache = n } 68 | | "-admin"::xs' -> procArgs xs' { a with admin = true } 69 | | x::xs' -> procArgs xs' {a with bad = x :: a.bad } 70 | 71 | let getEntropy (n : int) : ByteString = 72 | let mem = Array.zeroCreate n 73 | do RandomNumberGenerator.Create().GetBytes(mem) 74 | BS.unsafeCreateA mem 75 | 76 | let hashStr = Stowage.RscHash.hash >> BS.toString 77 | 78 | let setAppWorkingDir fp = 79 | do Directory.CreateDirectory(fp) |> ignore 80 | Directory.SetCurrentDirectory(fp) 81 | 82 | // thoughts: it might be useful to separate the authorizations DB 83 | // from the main storage layer, e.g. to simplify integration with 84 | // open ID models. 85 | 86 | [] 87 | let main argv = 88 | let args : Args = procArgs (List.ofArray argv) defaultArgs 89 | if args.help then printfn "%s" helpMsg; 0 else 90 | let bad = not (List.isEmpty args.bad) 91 | if bad then printfn "Unrecognized args (try -help): %A" args.bad; (-1) else 92 | do setAppWorkingDir args.home 93 | Stowage.Cache.resize (1_000_000UL * (uint64 args.cache)) 94 | use dbStore = new Stowage.LMDB.Storage("data", (1024 * args.size)) 95 | let dbRoot = Stowage.DB.fromStorage dbStore 96 | let dbWiki = DB.withPrefix (BS.fromString "wiki/") dbRoot 97 | let adminPass = getEntropy 64 |> Stowage.RscHash.hash |> BS.take 24 98 | let wsParams : WS.Params = { db = dbWiki; admin = adminPass } 99 | let app = WS.mkApp wsParams 100 | let cts = new CancellationTokenSource() 101 | let svc = 102 | { defaultConfig with 103 | hideHeader = true 104 | bindings = [ HttpBinding.createSimple HTTP args.ip args.port 105 | ] 106 | cancellationToken = cts.Token 107 | } 108 | let (_,serve) = startWebServerAsync svc app 109 | Async.Start(serve, cts.Token) 110 | if args.admin then do 111 | printfn "admin:%s" (BS.toString adminPass) 112 | printfn "Press any key to halt." 113 | Console.ReadKey true |> ignore 114 | cts.Cancel() // clean cancel 115 | 0 // return an integer exit code 116 | 117 | 118 | -------------------------------------------------------------------------------- /src/Stowage/Data/CByteString.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// A "compacting" ByteString. 5 | /// 6 | /// The idea here is that small bytestrings should be held locally 7 | /// in memory, while large bytestrings will be remotely referenced 8 | /// and only briefly cached in memory. 9 | /// 10 | /// Although this is represented by CVRef, we will use 11 | /// a specialized encoder to reduce overheads and ensure the "raw" 12 | /// encoding is used for remote binaries (no size prefix), making 13 | /// structure sharing with equivalent binaries a little easier. 14 | type CByteString = CVRef 15 | 16 | module EncCByteString = 17 | 18 | // Prefix is: 19 | // 0 for remote 20 | // EncVarNat (1 + Length) for local. 21 | // 22 | // Suffix is: 0 for remote, nothing for local. This is 23 | // used to simplify recognition of remote references. 24 | 25 | let write (ref:CByteString) (dst:ByteDst) : unit = 26 | match ref with 27 | | Local (s,_) -> 28 | EncVarNat.write (1UL + uint64 s.Length) dst 29 | ByteStream.writeBytes s dst 30 | | Remote vref -> 31 | EncVarNat.write (0UL) dst 32 | ByteStream.writeBytes (vref.Addr) dst 33 | ByteStream.writeByte 0uy dst 34 | 35 | let read (db:Stowage) (src:ByteSrc) : CByteString = 36 | let s0 = ByteStream.bytesRem src 37 | let len = EncVarNat.read src 38 | if (0UL = len) then // Remote 39 | let h = ByteStream.readBytes (RscHash.size) src 40 | let bSuffix = ByteStream.readByte src 41 | if (bSuffix <> 0uy) then raise ByteStream.ReadError 42 | Remote (LVRef.wrap (VRef.wrap (EncBytesRaw.codec) db h)) 43 | else // Local 44 | let bs = ByteStream.readBytes (int (len - 1UL)) src 45 | let sf = ByteStream.bytesRem src 46 | Local (bs, uint64 (s0 - sf)) 47 | 48 | let remoteSize = uint64 (2 + RscHash.size) 49 | let inline localSize s = 50 | let len = uint64 (BS.length s) 51 | len + EncVarNat.size (1UL + len) 52 | 53 | let compact (thresh:SizeEst) (db:Stowage) (ref:CByteString) : struct(CByteString * SizeEst) = 54 | match ref with 55 | | Local (s,szEst) -> 56 | if (szEst < thresh) then struct(ref, szEst) else 57 | let sz = localSize s 58 | if (sz < thresh) then struct(Local(s,sz), sz) else 59 | let vref = LVRef.stow (EncBytesRaw.codec) db s sz 60 | struct(Remote vref, remoteSize) 61 | | Remote _ -> struct(ref, remoteSize) 62 | 63 | let codec (thresh:SizeEst) = 64 | { new Codec with 65 | member __.Write ref dst = write ref dst 66 | member __.Read db src = read db src 67 | member __.Compact db ref = compact thresh db ref 68 | } 69 | 70 | module CByteString = 71 | 72 | // local, load, etc. matching interface for CVRef 73 | let inline local (s:ByteString) : CByteString = CVRef.local s 74 | let inline isRemote (ref:CByteString) : bool = CVRef.isRemote ref 75 | let inline load (ref:CByteString) : ByteString = CVRef.load ref 76 | let inline load' (ref:CByteString) : ByteString = CVRef.load' ref 77 | 78 | /// Wrap a remote binary resource. Will decref remote resource 79 | /// when destroyed. Use `wrap` if you need to incref resource. 80 | let inline wrap' (db:Stowage) (h:RscHash) = 81 | Remote (LVRef.wrap (VRef.wrap' (EncBytesRaw.codec) db h)) 82 | 83 | /// Wrap remote binary resource, with incref to prevent GC. 84 | let inline wrap (db:Stowage) (h:RscHash) = 85 | db.Incref h 86 | wrap' db h 87 | 88 | /// Construct a compacted bytestring directly, compacting immediately 89 | /// if required according to the threshold. This function uses the 90 | /// specialized encoder from EncCByteString. 91 | let stow (thresh:SizeEst) (db:Stowage) (s:ByteString) : CByteString = 92 | let struct(ref,_) = EncCByteString.compact thresh db (local s) 93 | ref 94 | 95 | -------------------------------------------------------------------------------- /src/Stowage/Data/CVRef.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// Compacting Value References 5 | /// 6 | /// The overhead for secure hash resources is relatively high, so we don't 7 | /// want to use them for small values. Instead, keep small values inline. 8 | /// CVRef models this common use case. It is either a value in local memory 9 | /// or a cacheable LVRef. Upon compaction, we heuristically move large data 10 | /// into LVRefs while. Repeated compactions are short-circuited by keeping 11 | /// some size metadata. 12 | type CVRef<'V> = 13 | | Local of 'V * SizeEst 14 | | Remote of LVRef<'V> 15 | override cref.ToString() = 16 | match cref with 17 | | Local (v,_) -> sprintf "`%A" v 18 | | Remote lvref -> lvref.ToString() 19 | 20 | /// Serialization for CVRef 21 | module EncCVRef = 22 | // encoding is `local or {hash}. 23 | 24 | // overlapping the first byte with the RscHash container. 25 | let cLocal = byte '`' 26 | do assert(cLocal <> EncRscHash.cPrefix) 27 | 28 | let write (cV:Codec<'V>) (ref:CVRef<'V>) (dst:ByteDst) : unit = 29 | match ref with 30 | | Local (v,_) -> 31 | ByteStream.writeByte cLocal dst 32 | Codec.write cV v dst 33 | | Remote vref -> EncVRef.write (vref) dst 34 | 35 | let read (cV:Codec<'V>) (db:Stowage) (src:ByteSrc) : CVRef<'V> = 36 | let b0 = ByteStream.readByte src 37 | if (b0 = cLocal) then 38 | let s0 = ByteStream.bytesRem src 39 | let v = Codec.read cV db src 40 | let sf = ByteStream.bytesRem src 41 | Local (v, uint64 (s0 - sf)) 42 | else if (b0 = EncRscHash.cPrefix) then 43 | let h = ByteStream.readBytes (RscHash.size) src 44 | let bf = ByteStream.readByte src 45 | if (bf <> EncRscHash.cSuffix) then raise ByteStream.ReadError 46 | Remote (LVRef.wrap (VRef.wrap cV db h)) 47 | else raise ByteStream.ReadError 48 | 49 | let compact (thresh:SizeEst) (cV:Codec<'V>) (db:Stowage) (ref:CVRef<'V>) : struct(CVRef<'V> * SizeEst) = 50 | match ref with 51 | | Local (v,szEst) -> 52 | if (szEst < thresh) then struct(ref, 1UL + szEst) else 53 | let struct(v',szV) = Codec.compactSz cV db v 54 | if (szV < thresh) then struct(Local(v',szV), 1UL + szV) else 55 | struct(Remote (LVRef.stow cV db v' szV), EncRscHash.size) 56 | | Remote _ -> struct(ref, EncRscHash.size) 57 | 58 | let codec (thresh:SizeEst) (cV:Codec<'V>) = 59 | { new Codec> with 60 | member __.Write ref dst = write cV ref dst 61 | member __.Read db src = read cV db src 62 | member __.Compact db ref = compact thresh cV db ref 63 | } 64 | 65 | 66 | module CVRef = 67 | 68 | /// Local in-memory value. 69 | /// 70 | /// The first compaction will determine whether this remains local 71 | /// or is stowed remotely. Subsequent compactions of the same size 72 | /// will be short-circuited. 73 | let inline local v = Local (v, System.UInt64.MaxValue) 74 | 75 | /// Remote value reference. 76 | let inline remote r = Remote r 77 | 78 | /// Test whether value is a Stowage reference. 79 | let isRemote ref = 80 | match ref with 81 | | Local _ -> false 82 | | Remote _ -> true 83 | 84 | /// Caching access to value. 85 | let load (ref:CVRef<'V>) : 'V = 86 | match ref with 87 | | Local (v,_) -> v 88 | | Remote r -> LVRef.load r 89 | 90 | /// Non-caching access to value. 91 | let load' (ref:CVRef<'V>) : 'V = 92 | match ref with 93 | | Local (v,_) -> v 94 | | Remote r -> LVRef.load' r 95 | 96 | /// Construct a compacted value directly. 97 | /// 98 | /// This will immediately compact the value in memory, then decide 99 | /// whether to keep it local or stow it to the database based on 100 | /// the estimated size. Even if stowed, the value remains in cache 101 | /// at least briefly. 102 | let inline stow (thresh:SizeEst) (cV:Codec<'V>) (db:Stowage) (v:'V) : CVRef<'V> = 103 | let struct(ref,_) = EncCVRef.compact thresh cV db (local v) 104 | ref 105 | 106 | -------------------------------------------------------------------------------- /src/Stowage/Data/Cache.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open System.Threading 3 | open System.Threading.Tasks 4 | 5 | /// Abstract cached resource. 6 | /// 7 | /// A cached resource provides both an interface to clear the cache 8 | /// and a 'usage' heuristic - usually a touch count or time stamp - 9 | /// to delay release of a recently used resource. 10 | type Cached = 11 | abstract member Usage : int with get 12 | abstract member Clear : unit -> unit 13 | 14 | /// Stowage Cache 15 | /// 16 | /// Use of Stowage resources can operate a lot like virtual memory, 17 | /// a purely functional variant where stowed elements are loaded and 18 | /// parsed as needed. A Stowage cache exists to manage the resources, 19 | /// simulating the virtual memory paging heuristics. 20 | /// 21 | /// The current implementation uses a concrete heuristic strategy that 22 | /// combines aspects of least-recently-used with exponential-decay. It 23 | /// should be effective for most use cases. Resources are cleared from 24 | /// the .Net task thread pool. 25 | /// 26 | /// This module only provides a manager, not lookup. For cached lookups, 27 | /// consider MCache or DCache. 28 | module Cache = 29 | 30 | type private Rsc = (struct(SizeEst * int * System.WeakReference)) 31 | type private Frame = ResizeArray 32 | 33 | // Our simple algorithm for releasing memory. 34 | let private scrubFrame (rng:System.Random) (pdecay:int) (f:Frame) : struct(Frame * SizeEst) = 35 | let mutable erased = 0UL 36 | let newFrame = new Frame() 37 | for (struct(sz,tc,wref)) in f do 38 | match wref.Target with 39 | | null -> 40 | erased <- (erased + sz) 41 | | :? Cached as c -> 42 | let tc' = c.Usage 43 | let doScrub = (tc = tc') && (rng.Next(100) < pdecay) 44 | if doScrub 45 | then c.Clear(); erased <- (erased + sz) 46 | else newFrame.Add(struct(sz,tc',wref)) 47 | | _ -> failwith "invalid state" 48 | struct(newFrame,erased) 49 | 50 | /// Concrete cache manager. 51 | /// 52 | /// This object manages a set of Cached resources, clearing some 53 | /// whenever a quota overflows (this is a soft quota). The items 54 | /// cleared are selected based on strategies of least-recently 55 | /// used and exponential decay. This should be good for most use 56 | /// cases, but more sophisticated strategies can be implemented 57 | /// by having Cached items decide what to do upon Clear. 58 | /// 59 | /// Cached items are only referenced weakly, such that GC can 60 | /// remove items independently from the cache manager clearing 61 | /// them. Due to GC, there is no guarantee Clear is called. 62 | type Manager = 63 | val mutable private szMax : SizeEst 64 | val mutable private szCur : SizeEst 65 | val mutable private ixHd : int 66 | val private frames : Frame[] 67 | val private rngSrc : System.Random 68 | val private pdecay : int 69 | val mutable private bgtask : bool 70 | new(framect,pdecay,quota) = 71 | { szMax = quota 72 | szCur = 0UL 73 | ixHd = 0 74 | frames = Array.init (max framect 2) (fun _ -> new Frame()) 75 | rngSrc = new System.Random(0) 76 | pdecay = (max pdecay 1) 77 | bgtask = false 78 | } 79 | new(quota) = new Manager(12,60,quota) 80 | 81 | /// Adjust the managed quota. 82 | member m.Resize (quota:SizeEst) : unit = 83 | lock m (fun () -> 84 | m.szMax <- quota 85 | m.ConsiderBGScrub()) 86 | 87 | /// Add object for management. When added, a size estimate must 88 | /// also be provided to count against the quota. 89 | member m.Receive (c:Cached) (sz0:SizeEst) : unit = 90 | lock m (fun () -> 91 | let f = m.frames.[m.ixHd] 92 | let sz = 80UL + sz0 // add per-item overhead 93 | let tc = c.Usage + System.Int32.MinValue // logical touch 94 | f.Add(struct(sz,tc,System.WeakReference(c))) 95 | m.szCur <- (m.szCur + sz) 96 | m.ConsiderBGScrub()) 97 | 98 | member private m.ConsiderBGScrub() : unit = 99 | if (m.bgtask || (m.szMax >= m.szCur)) then () else 100 | assert(Monitor.IsEntered(m)) 101 | m.bgtask <- true 102 | Task.Run(fun () -> m.BGScrub()) |> ignore 103 | 104 | member inline private m.NextFrameIx() = 105 | ((m.ixHd + 1) % (m.frames.Length)) 106 | 107 | member private m.BGScrub() : unit = 108 | assert(not (Monitor.IsEntered(m))) 109 | lock (m.frames) (fun () -> 110 | assert(m.bgtask) 111 | lock m (fun () -> m.ixHd <- m.NextFrameIx()) 112 | let ixScrub = m.NextFrameIx() 113 | let f = m.frames.[ixScrub] 114 | let struct(f',erased) = scrubFrame (m.rngSrc) (m.pdecay) (f) 115 | m.frames.[ixScrub] <- f' 116 | lock m (fun () -> 117 | assert(m.szCur >= erased) 118 | m.szCur <- (m.szCur - erased) 119 | m.bgtask <- false 120 | m.ConsiderBGScrub())) 121 | 122 | /// Although there are some use-cases for multiple cache managers, 123 | /// it's usually best to just use a global cache manager to match 124 | /// our global heap and OS-provided virtual memory system. 125 | let defaultManager = new Manager(80_000_000UL) 126 | 127 | /// Configure the global Stowage cache size. Default is eighty 128 | /// megabytes. Sizes aren't exact, but are used to estimate when 129 | /// an overflow occurs to drive background expiration of data. 130 | /// 131 | /// Cache size should be larger than the normal working set or 132 | /// we'll suffer cache thrashing. It shouldn't be larger than 133 | /// RAM because Stowage serves as a virtual memory system. 134 | let inline resize sz = defaultManager.Resize sz 135 | 136 | /// Manage a cached resource. 137 | /// 138 | /// Cached objects are held by weak reference, expire only 139 | /// upon cache overflow, and cleared by background thread. 140 | /// The `Clear` method is only called if the object has not 141 | /// been garbage collected, hence is not guaranteed. 142 | /// 143 | /// Size estimates should include rough overheads. They don't 144 | /// need to be exact, only sufficient that we're not overshooting 145 | /// our quotas by too much. Better to err towards high estimates. 146 | let inline receive c sz = defaultManager.Receive c sz 147 | 148 | -------------------------------------------------------------------------------- /src/Stowage/Data/Codec.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// Estimated Serialization Size (in bytes). 5 | type SizeEst = uint64 6 | 7 | /// Abstract encoder-decoder type for data in Stowage. 8 | /// 9 | /// This uses the stream-oriented read and write from Data.ByteString 10 | /// to avoid constructing too many intermediate buffers. It decodes 11 | /// in context of a DB so we can usefully decode reference values into 12 | /// VRefs, BRefs, or LVRefs. 13 | /// 14 | /// Stowage data structures frequently supports a "compaction" step 15 | /// that rewrites large, stable fragments of a value as VRefs. This 16 | /// is accessible through the Stowage codec, and compaction returns 17 | /// an estimated write size to aid heuristics. 18 | /// 19 | /// Compaction is a special step in context of stowage, whereby the 20 | /// components of a large value are replaced by value refs that can 21 | /// be loaded, cached, and efficiently serialized via secure hashes. 22 | /// For heuristic compaction, a size estimate is also returned. 23 | /// 24 | /// Compaction should ideally be idempotent, but this isn't enforced. 25 | type Codec<'T> = 26 | abstract member Write : 'T -> ByteDst -> unit 27 | abstract member Read : Stowage -> ByteSrc -> 'T 28 | abstract member Compact : Stowage -> 'T -> struct('T * SizeEst) 29 | 30 | module Codec = 31 | 32 | let inline write (c:Codec<'T>) (v:'T) (dst:ByteDst) : unit = c.Write v dst 33 | 34 | let inline read (c:Codec<'T>) (db:Stowage) (src:ByteSrc) : 'T = c.Read db src 35 | 36 | let inline compactSz (c:Codec<'T>) (db:Stowage) (v:'T) : struct('T * SizeEst) = c.Compact db v 37 | 38 | let inline compact (c:Codec<'T>) (db:Stowage) (v:'T) : 'T = 39 | let struct(v',_) = compactSz c db v 40 | v' 41 | 42 | let inline writeBytes (c:Codec<'T>) (v:'T) : ByteString = 43 | ByteStream.write (write c v) 44 | 45 | /// Read full bytestring as value, or raise ByteStream.ReadError 46 | let inline readBytes (c:Codec<'T>) (db:Stowage) (b:ByteString) : 'T = 47 | ByteStream.read (read c db) b 48 | 49 | /// Read full bytestring as value, or return None. 50 | let inline tryReadBytes (c:Codec<'T>) (db:Stowage) (b:ByteString) : 'T option = 51 | try readBytes c db b |> Some 52 | with 53 | | ByteStream.ReadError -> None 54 | 55 | /// Stow a value. 56 | /// 57 | /// Note: You'll have a reference to the resulting RscHash, so 58 | /// you'll need to use db.Decref later, or wrap into VRef. See 59 | /// VRef.stow for a more convenient .Net reference type. 60 | let inline stow (c:Codec<'T>) (db:Stowage) (v:'T) : RscHash = 61 | let result = db.Stow (writeBytes c v) 62 | System.GC.KeepAlive v // prevent GC of VRefs in v during write 63 | result 64 | 65 | /// Load a stowed value from RscHash. 66 | let inline load (c:Codec<'T>) (db:Stowage) (h:RscHash) : 'T = 67 | readBytes c db (db.Load h) 68 | 69 | /// Encode 'Val via 'Rep. 70 | /// 71 | /// This is very useful for building codec combinators. It should be 72 | /// the case that get and set compose to identity, and the translation 73 | /// should ideally be inexpensive in each direction. 74 | let view (cRep : Codec<'Rep>) (get : 'Rep -> 'Val) (set : 'Val -> 'Rep) = 75 | { new Codec<'Val> with 76 | member __.Write v dst = cRep.Write (set v) dst 77 | member __.Read db src = get (cRep.Read db src) 78 | member __.Compact db v = 79 | let struct(rep, szRep) = cRep.Compact db (set v) 80 | struct(get rep, szRep) 81 | } 82 | 83 | /// Boxed Codec, to unify various codec types. 84 | let inline boxed (cV : Codec<'V>) : Codec = 85 | view cV (box<'V>) (unbox<'V>) 86 | 87 | /// Invalid Codec - raises `invalidOp` for every operation. 88 | /// 89 | /// This is intended as a temporary placeholder for construction of 90 | /// recursively structured codecs (for example, the Trie codec). 91 | let invalid : Codec<'Val> = 92 | { new Codec<'Val> with 93 | member __.Write _ _ = invalidOp "invalid Codec cannot write" 94 | member __.Read _ _ = invalidOp "invalid Codec cannot read" 95 | member __.Compact _ _ = invalidOp "invalid Codec cannot compact" 96 | } 97 | 98 | 99 | -------------------------------------------------------------------------------- /src/Stowage/Data/DurableCache.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// Durable Resource Cache 5 | /// 6 | /// The motive with DCache is to support large-scale memoization 7 | /// for incremental compilation or monadic views. The cached data 8 | /// is held on disk (rather than in memory), and is accessed via 9 | /// ByteString keys. If the cache has reached its quota limits, 10 | /// we'll gradually erase entries using heuristic techniques. 11 | module DCache = 12 | 13 | /// Keys must be ByteStrings. Typically, these represent names or 14 | /// computations (with version information or secure hashes). 15 | type Key = ByteString 16 | 17 | // For simplicity, I rewrite the keys using a secure hash. 18 | // 19 | // This ensures data has uniform depth and distribution, which 20 | // in turn simplifies the exponential decay implementation. It 21 | // also guards against timing attacks on a shared cache. 22 | let inline private mangleKey k = RscHash.hash k |> BS.drop 4 23 | 24 | // Each element will remember its given "memory" size estimate, 25 | // which may be different from encoded size. Clients should use 26 | // use CVRef or other reference type explicitly for larger values. 27 | type private E<'V> = (struct(SizeEst * 'V)) 28 | 29 | // How should we represent the cache? 30 | // 31 | // I'm leaning towards an LSM Trie, to support efficient updates 32 | // and lazy deletions. A normal trie would also be acceptable. 33 | // 34 | // Delayed parse might be useful - we could use an intermediate 35 | // MCache for stored data. But it might be better to make this 36 | // explicit in our value types, e.g. using a lazy value encoder. 37 | 38 | 39 | // Our basic storage representation - a sized tree. We need the 40 | // sizes to help drive GC for quota management. 41 | type private StowageRep<'V> = 42 | { data : LSMTrie> 43 | size : uint64 // how much data (total of SizeEst) 44 | count : uint64 // how many keys 45 | } 46 | 47 | let private cRep cV = 48 | let cT = LSMTrie.codec (EncPair.codec' (EncVarNat.codec) cV) 49 | { new Codec> with 50 | member cR.Write r dst = 51 | EncVarNat.write (r.count) dst 52 | EncVarNat.write (r.size) dst 53 | Codec.write cT (r.data) dst 54 | member cR.Read db src = 55 | let count = EncVarNat.read src 56 | let size = EncVarNat.read src 57 | let data = Codec.read cT db src 58 | { count = count; size = size; data = data } 59 | member cR.Compact db r = 60 | let struct(data',szData) = Codec.compactSz cT db (r.data) 61 | let szEst = EncVarNat.size (r.count) 62 | + EncVarNat.size (r.size) 63 | + szData 64 | let r' = { count = (r.count); size = (r.size); data = data' } 65 | struct(r', szEst) 66 | } 67 | 68 | 69 | // thoughts: should I favor an LSM trie or a plain trie? A plain 70 | // trie has advantages of more precise size estimates, but for a 71 | // cache the update performance and working sets from LSM may be 72 | // more valuable. 73 | // 74 | // If I don't secure-hash keys, an IntMap based hashmap may be wise. 75 | // 76 | // I don't want to use a DB TVar directly because I don't want to 77 | // sync every update with other durable DB updates. Instead, I might 78 | // want to write after some threshold is buffered in memory, or after 79 | // several seconds delay. Or some other heuristic. Loss is acceptable. 80 | // 81 | // An interesting point: if Codec changes, I can presumably handle 82 | // this by simply erasing the keys that raised ByteStream.ReadError. 83 | // So cache is highly resilient to changes in type or codec. Similar 84 | // for missing a hash resource. 85 | 86 | -------------------------------------------------------------------------------- /src/Stowage/Data/FingerTree.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | 3 | /// A Finger Tree in Stowage 4 | /// 5 | /// The Finger Tree is the 'swiss army knife' of data structures. It 6 | /// supports efficient O(1) operations at each edge. Monoidal summaries 7 | /// can support efficient random access, priority, etc. 8 | /// 9 | /// This finger tree assumes relatively cheap computation for measures, 10 | /// and chooses to recompute rather than cache for local data. 11 | /// 12 | /// TODO: This module is still far from complete. 13 | module FingerTree = 14 | 15 | /// Branches for the 2-3 tree structure. 16 | type B<'V> = 17 | | B2 of 'V * 'V 18 | | B3 of 'V * 'V * 'V 19 | 20 | /// A node is the main element we compact for Stowage. 21 | [] 22 | type Node<'V, 'M> = 23 | val M : 'M 24 | val B : CVRef> 25 | new(m,b) = { M = m; B = b } 26 | interface IMeasured<'M> with 27 | member node.Measure = node.M 28 | 29 | /// Digits at each edge of a finger tree. 30 | type D<'V> = 31 | | D1 of 'V 32 | | D2 of 'V * 'V 33 | | D3 of 'V * 'V * 'V 34 | | D4 of 'V * 'V * 'V * 'V 35 | 36 | let private measureD (p:'M -> 'M -> 'M) (m:'V -> 'M) (d:D<'V>) : 'M = 37 | match d with 38 | | D1 (v1) -> m v1 39 | | D2 (v1,v2) -> p (m v1) (m v2) 40 | | D3 (v1,v2,v3) -> p (m v1) (p (m v2) (m v3)) 41 | | D4 (v1,v2,v3,v4) -> p (m v1) (p (m v2) (p (m v3) (m v4))) 42 | 43 | let inline private measure (v:'V) : 'M = (v :> IMeasured<'M>).Measure 44 | 45 | type Tree<'V, 'M when 'V :> IMeasured<'M> and 'M :> Monoid<'M> and 'M:(new:unit -> 'M)> = 46 | | Empty 47 | | Single of 'V 48 | | Many of D<'V> * 'M * Tree,'M> * D<'V> 49 | // note: measure in Many is cache only for central FingerTree. 50 | member t.Monoid with get() = MonoidSource<'M>.Instance 51 | interface IMeasured<'M> with 52 | member t.Measure : 'M = 53 | match t with 54 | | Empty -> t.Monoid.Zero 55 | | Single v -> measure v 56 | | Many (pre,mt,t,suf) -> 57 | let p = t.Monoid.Plus 58 | let m d = measureD p measure d 59 | p (m pre) (p mt (m suf)) 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/Stowage/Data/LVRef.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// Value References with cache-driven 'latency' upon stow and load. 5 | /// 6 | /// When first stowing data, we treat the cache as a buffer, delaying 7 | /// serialization until space is required. When loading data, we will 8 | /// cache data in the reference and hold it in memory until space is 9 | /// required again. 10 | /// 11 | /// Note: LVRef does not attempt sharing of cached resources. That is, 12 | /// there is no equivalent to an ephemeron table. 13 | type LVRef<'V> = VRef<'V> 14 | 15 | module LVRef = 16 | 17 | // I assume some overheads when parsing a value, but it isn't 18 | // clear the size of a value in .Net memory. For now, just using 19 | // an assumption of 50% compression and some base overheads. 20 | let inline private memSize (sz:SizeEst) : SizeEst = 21 | 200UL + (sz <<< 1) 22 | 23 | type private R<'V> = 24 | inherit VRef<'V> 25 | val mutable lvref : Lazy> 26 | val mutable cache : (struct('V * SizeEst)) option 27 | val mutable tc : int 28 | member inline r.Touch() = r.tc <- (r.tc + 1) 29 | member private r.Load() = lock r (fun () -> 30 | match r.cache with 31 | | Some vsz -> r.Touch(); vsz // benign race to load data 32 | | None -> 33 | let vsz = r.lvref.Value.Deref() 34 | r.cache <- Some vsz 35 | let struct(_,sz) = vsz 36 | Cache.receive (r :> Cached) (memSize sz) 37 | vsz 38 | ) // end lock 39 | override r.Addr with get() = r.lvref.Value.Addr 40 | override r.Deref() = 41 | match r.cache with 42 | | Some v -> r.Touch(); v 43 | | None -> r.Load() 44 | interface Cached with 45 | member r.Usage with get() = r.tc 46 | member r.Clear() = 47 | r.lvref.Force() |> ignore> // move data to Stowage 48 | lock r (fun () -> r.cache <- None) // clear data from cache 49 | new(lvref,cache) = 50 | { inherit VRef<'V>() 51 | lvref = lvref 52 | cache = cache 53 | tc = 0 54 | } 55 | 56 | /// Wrap an existing VRef, adding a cache layer. 57 | let wrap (vref:VRef<'V>) : LVRef<'V> = 58 | let lvref = lazy vref 59 | lvref.Force() |> ignore> 60 | (new R<'V>(lvref,None)) :> LVRef<'V> 61 | 62 | /// Non-buffered, immediate stowage, with caching on load. 63 | let inline stow' (cV:Codec<'V>) (db:Stowage) (v:'V) : LVRef<'V> = 64 | wrap (VRef.stow cV db v) 65 | 66 | /// Stow a value lazily, when there is memory pressure in cache or 67 | /// when the RscHash address is first required. Until stowage, the 68 | /// value will dereference using the given value and size estimate. 69 | let stow (c:Codec<'V>) (db:Stowage) (v:'V) (sz:SizeEst) : LVRef<'V> = 70 | let ref = new R<'V>(lazy (VRef.stow c db v), Some (struct(v,sz))) 71 | Cache.receive (ref :> Cached) (memSize sz) 72 | ref :> LVRef<'V> 73 | 74 | /// Non-caching Load. 75 | /// 76 | /// When applied to a wrapped LVRef, this will use the cached 77 | /// data if available, or otherwise loads without caching. If 78 | /// applied to other VRef types, simply loads normally. 79 | let load' (ref:LVRef<'V>) : 'V = 80 | match ref with 81 | | :? R<'V> as r -> 82 | match r.cache with 83 | | Some (struct(v,_)) -> v // use cached value 84 | | None -> VRef.load (r.lvref.Value) // load without caching 85 | | _ -> VRef.load ref // not an LVRef, just load directly 86 | 87 | /// Load a value, caching it briefly for future lookups. (Default Deref()). 88 | /// 89 | /// If the value was already cached, or has never been fully stowed, 90 | /// this will reuse the cached value and extend the lifespan of the 91 | /// cache. Otherwise it will load, parse, and cache the value. 92 | let inline load (ref:LVRef<'V>) : 'V = VRef.load ref 93 | 94 | /// Compared to EncVRef, EncLVRef will perform LVRef.wrap upon read. 95 | module EncLVRef = 96 | let size = EncVRef.size 97 | let inline write (ref:LVRef<_>) (dst:ByteDst) : unit = 98 | EncVRef.write ref dst 99 | let inline read (cV:Codec<'V>) (db:Stowage) (src:ByteSrc) : LVRef<'V> = 100 | LVRef.wrap (EncVRef.read cV db src) 101 | let codec (cV:Codec<'V>) = 102 | { new Codec> with 103 | member __.Write ref dst = write ref dst 104 | member __.Read db src = read cV db src 105 | member __.Compact _ ref = struct(ref, size) 106 | } 107 | 108 | 109 | -------------------------------------------------------------------------------- /src/Stowage/Data/Measured.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | 3 | /// IMeasured indicates a value can be summarized or measured by 4 | /// another value of type M. In general, the summary should be 5 | /// much smaller than the value being summarized! 6 | type IMeasured<'M> = 7 | abstract member Measure : 'M 8 | 9 | -------------------------------------------------------------------------------- /src/Stowage/Data/MemoryCache.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | 3 | open System.Collections.Generic 4 | 5 | module MCache = 6 | 7 | 8 | /// A Memory Cache is essentially an unreliable Dictionary. This 9 | /// is unreliable because elements in the table may be eventually 10 | /// deleted by a Stowage.Cache manager in the background. This is 11 | /// useful for resource lookups, when resources can be reloaded 12 | /// or regenerated if not present. 13 | /// 14 | /// Note: Use of a Lazy<'V> types is appropriate in some cases, 15 | /// to enable use of `tryAdd` without race conditions on load. 16 | type C<'K,'V when 'K : equality> = 17 | val internal M : Cache.Manager 18 | val internal D : Dictionary<'K,E<'K,'V>> 19 | new(cm:Cache.Manager, eq:IEqualityComparer<'K>) = 20 | { D = new Dictionary<'K,E<'K,'V>>(eq) 21 | M = cm 22 | } 23 | new() = 24 | let cm = Cache.defaultManager 25 | let eq = EqualityComparer<'K>.Default 26 | new C<'K,'V>(cm,eq) 27 | // cached element type 28 | and internal E<'K,'V when 'K : equality> = 29 | val K : 'K 30 | val V : 'V 31 | val C : C<'K,'V> 32 | val mutable TC : int 33 | new(k,v,c) = { K = k; V = v; C = c; TC = 0 } 34 | member e.Touch() = e.TC <- (e.TC + 1) 35 | interface Cached with 36 | member e.Usage with get() = e.TC 37 | member e.Clear() = lock (e.C.D) (fun () -> 38 | e.C.D.Remove(e.K) |> ignore) 39 | 40 | 41 | /// Attempt to load data from the cache. Thread-safe. 42 | /// 43 | /// There is no guarantee the key is present, even if recently 44 | /// added, due to background management of the cache. 45 | let tryFind (k:'K) (c:C<'K,'V>) : 'V option = 46 | lock (c.D) (fun () -> 47 | match c.D.TryGetValue(k) with 48 | | true,e -> e.Touch(); Some (e.V) 49 | | _ -> None) 50 | 51 | /// Add and return data if key is new, otherwise return existing 52 | /// data. Atomic. Thread-safe. Consider use of Lazy<'V> type to 53 | /// delay value-load operations. 54 | let tryAdd (k:'K) (v:'V) (sz:SizeEst) (c:C<'K,'V>) : 'V = 55 | lock (c.D) (fun () -> 56 | match c.D.TryGetValue(k) with 57 | | true,e -> e.Touch(); e.V 58 | | _ -> 59 | let e = new E<'K,'V>(k,v,c) 60 | c.D.Add(k,e) 61 | c.M.Receive (e :> Cached) sz 62 | e.V) 63 | 64 | /// Add data, replacing existing object in cache. Thread-safe. 65 | let add (k:'K) (v:'V) (sz:SizeEst) (c:C<'K,'V>) : unit = 66 | let e = new E<'K,'V>(k,v,c) 67 | lock (c.D) (fun () -> c.D.Add(k,e)) 68 | c.M.Receive (e :> Cached) sz 69 | 70 | /// Remove specified key data from cache. Thread-safe. 71 | let remove (k:'K) (c:C<'K,'V>) : unit = 72 | lock (c.D) (fun () -> 73 | c.D.Remove(k) |> ignore) 74 | 75 | /// Remove all keys from cache. Thread-safe. 76 | let clear (c:C<'K,'V>) : unit = 77 | lock (c.D) (fun () -> c.D.Clear()) 78 | 79 | 80 | 81 | type MCache<'K,'V when 'K : equality> = MCache.C<'K,'V> 82 | 83 | 84 | -------------------------------------------------------------------------------- /src/Stowage/Data/Monoid.fs: -------------------------------------------------------------------------------- 1 | 2 | namespace Stowage 3 | 4 | type Monoid<'M> = 5 | abstract member Zero : 'M 6 | abstract member Plus : 'M -> 'M -> 'M 7 | 8 | /// Monoid singletons on demand. 9 | type MonoidSource<'M when 'M :> Monoid<'M> and 'M:(new: unit -> 'M)> private () = 10 | static let zero : 'M = (new 'M() :> Monoid<'M>).Zero 11 | static member Instance = (zero :> Monoid<'M>) 12 | 13 | -------------------------------------------------------------------------------- /src/Stowage/Data/README.md: -------------------------------------------------------------------------------- 1 | # Stowage.Data 2 | 3 | This module aims to provide useful data structures above Stowage. It also includes Stowage concepts: 4 | 5 | * `Stowage` - remote value storage by secure hash 6 | * `RscHash` - concrete secure hash function 7 | * `Codec` - interpret binary data as values 8 | * `DB` - durable software transactional memory 9 | * `VRef` - remote value reference 10 | * `LVRef` - VRef with caching, delayed write 11 | * `CVRef` - LVRef but uses memory for small values 12 | * `IntMap` - sparse associative array, indexed by uint64 13 | * `Trie` - tree with binary keys, prefix sharing 14 | * `LSMTrie` - trie with write buffering 15 | 16 | TODO: 17 | 18 | * finger-tree sequences for vectors, deques 19 | * SeqHash or 20 | 21 | I'm also interested in developing the SeqHash structure, which is roughly a history-independent finger-tree sequence. But that's low priority. I don't fully grok the details yet. 22 | 23 | -------------------------------------------------------------------------------- /src/Stowage/Data/RscHash.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | open Konscious.Security.Cryptography 4 | 5 | /// A RscHash is simply a secure hash of fixed size encoded using 6 | /// an unusual base32 alphabet: bcdfghjklmnpqrstBCDFGHJKLMNPQRST. 7 | /// 8 | /// This string is computed from the Blake2B 320-bit secure hash 9 | /// of a binary, and is used as a name or capability to read a 10 | /// binary resource. 11 | /// 12 | /// Note on 10/27/2017: changed to 320 bits. 13 | type RscHash = ByteString 14 | 15 | module RscHash = 16 | /// the base32 alphabet used for Stowage hash references. 17 | let alphabet = "bcdfghjklmnpqrstBCDFGHJKLMNPQRST" 18 | 19 | /// number of bits encoded in hash 20 | let hashBitLen : int = 320 21 | 22 | let inline private hdiv n = 23 | assert (0 = (hashBitLen % n)) 24 | (hashBitLen / n) 25 | 26 | /// number of base32 ASCII characters in hash 27 | let size : int = hdiv 5 28 | 29 | /// expected length of hash in bytes 30 | let private hashByteLen : int = hdiv 8 31 | 32 | // alphabet encoded as bytes array 33 | let private alphabyte : byte[] = 34 | let s = System.Text.Encoding.UTF8.GetBytes(alphabet) 35 | assert (32 = s.Length) 36 | s 37 | 38 | // table lookup for presence of data 39 | let private alphabool : bool[] = 40 | let memb x = Array.exists (int >> ((=) x)) alphabyte 41 | Array.init 256 memb 42 | 43 | // test whether an element is valid within a UTF8 or ASCII hash. 44 | let isHashByte (b : byte) : bool = alphabool.[int b] 45 | 46 | // encode forty bits from src to dst. 47 | let inline private b32e40 (dst : byte[]) (src : byte[]) off = 48 | let dst_off = (off * 8) 49 | let src_off = (off * 5) 50 | let inline r ix = src.[src_off + ix] 51 | let inline w ix v = dst.[dst_off + ix] <- alphabyte.[int v] 52 | // read forty bits of data 53 | let i4 = r 4 54 | let i3 = r 3 55 | let i2 = r 2 56 | let i1 = r 1 57 | let i0 = r 0 58 | // encode data into eight bytes 59 | do w 7 (((i4 &&& 0x1Fuy) )) 60 | do w 6 (((i4 &&& 0xE0uy) >>> 5) ||| 61 | ((i3 &&& 0x03uy) <<< 3)) 62 | do w 5 (((i3 &&& 0x7Cuy) >>> 2)) 63 | do w 4 (((i3 &&& 0x80uy) >>> 7) ||| 64 | ((i2 &&& 0x0Fuy) <<< 1)) 65 | do w 3 (((i2 &&& 0xF0uy) >>> 4) ||| 66 | ((i1 &&& 0x01uy) <<< 4)) 67 | do w 2 (((i1 &&& 0x3Euy) >>> 1)) 68 | do w 1 (((i1 &&& 0xC0uy) >>> 6) ||| 69 | ((i0 &&& 0x07uy) <<< 2)) 70 | do w 0 (((i0 &&& 0xF8uy) >>> 3)) 71 | 72 | // perform a base32 encoding of the Blake2 hash. 73 | let private b32enc (src : byte[]) : byte[] = 74 | assert ((40 = src.Length) && (64 = size)) 75 | let dst = Array.zeroCreate size 76 | do b32e40 dst src 7 77 | do b32e40 dst src 6 78 | do b32e40 dst src 5 79 | do b32e40 dst src 4 80 | do b32e40 dst src 3 81 | do b32e40 dst src 2 82 | do b32e40 dst src 1 83 | do b32e40 dst src 0 84 | dst 85 | 86 | /// basic bytestring hash 87 | let hash (s : ByteString) : ByteString = 88 | use alg = new HMACBlake2B(hashBitLen) 89 | let bytes = alg.ComputeHash(s.UnsafeArray, s.Offset, s.Length) 90 | BS.unsafeCreateA (b32enc bytes) 91 | 92 | /// Fold over RscHash dependencies represented within a value. 93 | /// 94 | /// Find substrings that look like hashes - appropriate size and 95 | /// character set, separated by non-hash characters. Useful for 96 | /// conservative GC of resources. 97 | let rec foldHashDeps (fn : 's -> RscHash -> 's) (s:'s) (v:ByteString) : 's = 98 | if (v.Length < size) then s else 99 | let hv' = BS.dropWhile (not << isHashByte) v 100 | let struct(h,v') = BS.span isHashByte hv' 101 | let s' = if (size = h.Length) then (fn s h) else s 102 | foldHashDeps fn s' v' 103 | 104 | /// Iterate through RscHash dependencies in a value. 105 | /// Recognizes same RscHash substrings as foldHashDeps. 106 | let rec iterHashDeps (fn : RscHash -> unit) (v:ByteString) : unit = 107 | if (v.Length < size) then () else 108 | let hv' = BS.dropWhile (not << isHashByte) v 109 | let struct(h,v') = BS.span isHashByte hv' 110 | if (size = h.Length) 111 | then fn h 112 | iterHashDeps fn v' 113 | 114 | // for Seq.unfold, which is more efficient than seq {}. 115 | let rec private stepHashDeps (v:ByteString) : (RscHash * ByteString) option = 116 | if (v.Length < size) then None else 117 | let hv' = BS.dropWhile (not << isHashByte) v 118 | let struct(h,v') = BS.span isHashByte hv' 119 | if (size = BS.length h) then Some(h,v') else 120 | stepHashDeps v' 121 | 122 | let seqHashDeps (v:ByteString) : seq = 123 | Seq.unfold stepHashDeps v 124 | 125 | /// Test whether a ByteString matches format of RscHash. 126 | let isValidHash (h:ByteString) : bool = 127 | (size = BS.length h) && (BS.forall isHashByte h) 128 | 129 | 130 | -------------------------------------------------------------------------------- /src/Stowage/Data/Seq.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// A finger-tree based sequence, modeled above Stowage. 5 | /// 6 | /// Finger-trees are an interesting persistent data structure that offer 7 | /// O(lg(min(K,N-K))) access to the Kth of N elements. This reduces to 8 | /// O(1) access to the first or last few elements. Similar logarithmic 9 | /// properties apply to splitting or concatenation of the finger-trees. 10 | /// 11 | /// Consequently, finger-trees can be used for: queues, stacks, dequeues, 12 | /// lists or vectors, ropes, and many things that can be built above a 13 | /// sequence. 14 | /// 15 | /// Finger trees do have weaknesses: weak memory locality, and history 16 | /// dependent structure. They're useful despite these limitations, but in 17 | /// some cases a more specialized structure might be better. 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/Stowage/Data/Stowage.Data.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/Stowage/Data/Stowage.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// Abstract Remote Storage for Binaries referenced by RscHash. 5 | /// 6 | /// Use of secure hashes is a convenient way to reference binary data. 7 | /// They are immutable and acyclic by construction, cacheable, secure, 8 | /// provider-independent, self-authenticating, implicitly shared, 9 | /// automatically named, decentralized, uniformly sized, and smaller 10 | /// than many full URLs or file paths. 11 | /// 12 | /// The idea for Stowage is to build data structures above binaries, 13 | /// in order to represent larger-than-memory data and distributed, 14 | /// purely functional computations. Our stowage layer doubles as a 15 | /// purely functional variant of "virtual memory", since we can move 16 | /// data we won't soon need into higher-latency storage - an external 17 | /// database or filesystem or network. 18 | /// 19 | /// Stowage resources may ultimately be GC'd like normal values, and 20 | /// we can easily use conservative GC for references between binaries. 21 | /// Stowage is usually one aspect of a durable database that determines 22 | /// a durable "root set" for GC. Unlike most databases, Stowage makes 23 | /// it feasible to model entire databases as first-class values within 24 | /// another database - convenient for modeling versioned systems. 25 | /// 26 | type Stowage = 27 | 28 | /// The Stow operation should add a value to the Stowage database 29 | /// and return its RscHash, such that a subsequent Load can access 30 | /// the data. Additionally, it must atomically Incref the RscHash 31 | /// to prevent concurrent GC. 32 | /// 33 | /// Stowage systems should support values of up to 64MB. Most items 34 | /// should be much smaller, at most a few hundred kilobytes. If the 35 | /// item is too large, an appropriate exception should be raised. 36 | /// 37 | /// NOTE: resources containing sensitive data should include a salt, 38 | /// e.g. an 80-bit random string for entropy. Otherwise, an attacker 39 | /// can construct millions of likely hashes and test whether each is 40 | /// present within the system. 41 | abstract member Stow : ByteString -> RscHash 42 | 43 | /// The Load operation should access data from Stowage. If this 44 | /// data cannot be located, a MissingRsc exception must be raised. 45 | /// 46 | /// There is no access control for Stowage, but the RscHash serves 47 | /// as a secure bearer token and read capability. Consequently, it 48 | /// is important that implementations don't expose the full RscHash 49 | /// through timing attacks. (Exposing the first half is acceptable.) 50 | abstract member Load : RscHash -> ByteString 51 | 52 | /// RscHash references to binaries can be understood as a form of 53 | /// unmanaged resource from perspective of our .Net runtime. But 54 | /// a simple reference counting interface can guard hashes in .Net 55 | /// memory from a premature GC. Upon stowage, Incref is implicit. 56 | /// Usually, decref will be performed by .Net finalizer (see VRef). 57 | abstract member Decref : RscHash -> unit 58 | abstract member Incref : RscHash -> unit 59 | 60 | /// Exception on Load failure. 61 | exception MissingRsc of Stowage * RscHash 62 | 63 | // TODO: Develop a useful set of Stowage combinators. (Low Priority.) 64 | // layered, cached, mirrored, distributed hashtables... 65 | 66 | module Stowage = 67 | 68 | /// Fake Stowage. Drops everything you give it! 69 | /// 70 | /// Use cases: testing, default 'null' object, or computing an 71 | /// ETAG based on logical compaction of a large data structure. 72 | let fake : Stowage = 73 | { new Stowage with 74 | member __.Stow s = RscHash.hash s 75 | member db.Load h = raise (MissingRsc (db,h)) 76 | member __.Decref _ = () 77 | member __.Incref _ = () 78 | } 79 | 80 | // when tracking deps, don't want to implicitly hold a huge array 81 | // of origin data in memory, so we'll trimBytes first. 82 | let private depcons lst rsc = (BS.trimBytes rsc) :: lst 83 | 84 | // step through dependencies. for use with Seq.unfold 85 | let rec private streamStep (db:Stowage) (struct(hist,rs)) = 86 | match rs with 87 | | (rsc::remrs) -> 88 | if Set.contains rsc hist then streamStep db (struct(hist,remrs)) else 89 | let hist' = Set.add rsc hist 90 | let data = 91 | try Some (db.Load rsc) 92 | with 93 | | MissingRsc _ -> None 94 | let rs' = 95 | match data with 96 | | Some bytes -> RscHash.foldHashDeps depcons remrs bytes 97 | | None -> remrs 98 | Some((rsc,data),struct(hist',rs')) 99 | | [] -> None 100 | 101 | /// Stream all Stowage data from a given set of roots. This is 102 | /// mostly intended for import/export operations. Missing entries 103 | /// are possible due to Stowage state or false positives (we use 104 | /// a conservative algorithm to recognize references). In those 105 | /// cases, we'll report `None` in the data field. 106 | let streamDeps (db:Stowage) (roots:RscHash list) : seq = 107 | Seq.unfold (streamStep db) (struct(Set.empty,roots)) 108 | 109 | 110 | -------------------------------------------------------------------------------- /src/Stowage/Data/VDiff.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | 3 | /// Value differences for key-indexed values. 4 | type VDiff<'V> = 5 | | InL of 'V // value in left 6 | | InR of 'V // value in right 7 | | InB of 'V * 'V // two values with equality failure 8 | 9 | -------------------------------------------------------------------------------- /src/Stowage/Data/VRef.fs: -------------------------------------------------------------------------------- 1 | namespace Stowage 2 | open Data.ByteString 3 | 4 | /// A VRef is an abstract reference to a value whose binary 5 | /// representation is held in Stowage, addressed by secure 6 | /// hash. The abstract nature is to separate caching, GC, 7 | /// lazy serialization, and similar features. To simplify 8 | /// caching, Deref also returns a serialized size estimate. 9 | /// 10 | /// Comparison of VRefs is based on type and secure hash, so 11 | /// we assume a stable codec per type (at least within context 12 | /// of the comparisons). The ToString method simply returns the 13 | /// "{secureHash}". 14 | [] 15 | type VRef<'V>() = 16 | abstract member Addr : RscHash with get 17 | abstract member Deref : unit -> struct('V * SizeEst) 18 | 19 | override r.ToString() = sprintf "{%s}" (BS.toString r.Addr) 20 | override r.GetHashCode() = ByteString.Hash32 (r.Addr) |> int 21 | override x.Equals yobj = 22 | match yobj with 23 | | :? VRef<'V> as y -> System.Object.ReferenceEquals(x,y) || (x.Addr = y.Addr) 24 | | _ -> false 25 | interface System.IComparable with 26 | member x.CompareTo yobj = 27 | match yobj with 28 | | :? VRef<'V> as y -> 29 | if System.Object.ReferenceEquals(x,y) then 0 else 30 | ByteString.Compare (x.Addr) (y.Addr) 31 | | _ -> invalidArg "yobj" "comparing values of different types" 32 | 33 | module VRef = 34 | 35 | /// Create VRef by packaging Codec, Stowage DB, and RscHash. 36 | /// Assumes caller is passing ownership, so for GC we will 37 | /// call Stowage Decref on the hash when done with the VRef. 38 | /// This is the "simplest" VRef, without any caching. 39 | let wrap' (c:Codec<'V>) (db:Stowage) (h0:RscHash) : VRef<'V> = 40 | assert(BS.length h0 = RscHash.size) // partial validation 41 | let h = BS.trimBytes h0 42 | { new VRef<'V>() with 43 | override __.Addr with get() = h 44 | override ref.Deref() = 45 | let bytes = db.Load h 46 | let szest = uint64 (BS.length bytes) 47 | let value = Codec.readBytes c db bytes 48 | System.GC.KeepAlive(ref) 49 | struct(value,szest) 50 | override __.Finalize() = db.Decref h 51 | } 52 | 53 | /// As wrap', but will first Incref the RscHash to resist premature 54 | /// garbage collection of the referenced data. 55 | let inline wrap (c:Codec<'V>) (db:Stowage) (h:RscHash) : VRef<'V> = 56 | db.Incref h 57 | wrap' c db h 58 | 59 | /// Create VRef by eagerly Stowing a value. 60 | let inline stow (c:Codec<'V>) (db:Stowage) (v:'V) : VRef<'V> = 61 | wrap' c db (Codec.stow c db v) 62 | 63 | /// load the VRef's data from Stowage (ignoring the SizeEst). 64 | let inline load (ref:VRef<'V>) : 'V = 65 | let struct(value,_) = ref.Deref() 66 | value 67 | 68 | module EncRscHash = 69 | let cPrefix = byte '{' 70 | let cSuffix = byte '}' 71 | let size = uint64 (2 + RscHash.size) 72 | let write (h:RscHash) (dst:ByteDst) = 73 | assert(h.Length = RscHash.size) 74 | ByteStream.writeByte cPrefix dst 75 | ByteStream.writeBytes h dst 76 | ByteStream.writeByte cSuffix dst 77 | let read (src:ByteSrc) : RscHash = 78 | let bPrefix = ByteStream.readByte src 79 | if (bPrefix <> cPrefix) then raise ByteStream.ReadError 80 | let h = ByteStream.readBytes (RscHash.size) src 81 | let bSuffix = ByteStream.readByte src 82 | if (bSuffix <> cSuffix) then raise ByteStream.ReadError 83 | h 84 | 85 | 86 | module EncVRef = 87 | let size = EncRscHash.size 88 | let inline write (ref:VRef<_>) (dst:ByteDst) : unit = 89 | EncRscHash.write (ref.Addr) dst 90 | let inline read (cV:Codec<'V>) (db:Stowage) (src:ByteSrc) : VRef<'V> = 91 | let h = EncRscHash.read src 92 | VRef.wrap cV db h 93 | let codec (cV:Codec<'V>) = 94 | { new Codec> with 95 | member __.Write ref dst = write ref dst 96 | member __.Read db src = read cV db src 97 | member __.Compact _ ref = struct(ref, size) 98 | } 99 | 100 | -------------------------------------------------------------------------------- /src/Stowage/RCTable.fs: -------------------------------------------------------------------------------- 1 | 2 | namespace Stowage 3 | 4 | open System 5 | open System.Security 6 | open System.Runtime.InteropServices 7 | 8 | /// Reference Tracking 9 | /// 10 | /// The stowage system tracks secure hash references from .Net memory 11 | /// using a table with reference counts. Resources referenced by this 12 | /// table should not be GC'd from stowage even if they lack persistent 13 | /// roots. The decref will usually be performed via .Net finalizers. 14 | [< SecuritySafeCriticalAttribute >] 15 | module internal RCTable = 16 | // The current implementation is a hierarchical hashtable with 17 | // 64-bit entries where keys use 58 bits and reference counts 18 | // are 6 bits. 19 | // 20 | // The 'hierarchical' aspect solves the problem of saturating 21 | // a small 6-bit refct. If we surpass the max refct, we'll add 22 | // the item to a child table representing a higher 'digit'. We 23 | // can borrow from this digit upon decref. So a few layers of 24 | // table essentially give us extra reference count bits. 25 | // 26 | // This design is thus best for cases where most items have small 27 | // reference counts, which is a reasonable assumption for most 28 | // use cases. 29 | // 30 | // Meanwhile, the 58-bit IDs provide reasonable resistance to 31 | // hash collisions. We'll still likely have a few collisions in 32 | // a billion references. But for Stowage, we shouldn't have a 33 | // billion refs in memory (since that would cost a hundred gigs) 34 | // and a few collisions isn't going to hurt much. 35 | [] 36 | type Elem = 37 | val v : uint64 38 | static member rcBits : int = 6 39 | static member idBits : int = (64 - Elem.rcBits) 40 | static member rcMask : uint64 = (1UL <<< Elem.rcBits) - 1UL 41 | static member rcMax : uint16 = uint16 Elem.rcMask 42 | static member idMask : uint64 = (1UL <<< Elem.idBits) - 1UL 43 | new(ev : uint64) = { v = ev } 44 | new(id : uint64, rc : uint16) = 45 | assert(Elem.rcMask >= (uint64 rc)) 46 | Elem((id <<< Elem.rcBits) ||| (uint64 rc)) 47 | member inline e.rc with get() : uint16 = uint16 (e.v &&& Elem.rcMask) 48 | member inline e.id with get() : uint64 = (e.v >>> Elem.rcBits) 49 | static member size : int = 8 50 | 51 | let inline elemOff (ix:nativeint) : nativeint = ((nativeint Elem.size) * ix) 52 | let inline getElemAt (p:nativeint) (ix:nativeint) : Elem = 53 | // wat? where is ReadUInt64? 54 | let i64 = Marshal.ReadInt64(p + (elemOff ix)) 55 | Elem(uint64 i64) 56 | let inline setElemAt (p:nativeint) (ix:nativeint) (e:Elem) = 57 | Marshal.WriteInt64(p + (elemOff ix), int64 e.v) 58 | let inline clearElemAt p ix = setElemAt p ix (Elem(0UL)) 59 | 60 | let allocData (sz:int) : nativeint = 61 | assert(sz >= 4) 62 | let ct = (1n <<< sz) 63 | let p = Marshal.AllocHGlobal (elemOff ct) 64 | // initialize the memory 65 | let rec loop ix = 66 | clearElemAt p ix 67 | if(0n <> ix) then loop (ix - 1n) 68 | loop (ct - 1n) 69 | p 70 | 71 | // Our table is represented in unmanaged memory in order to guarantee 72 | // it may scale proportional to address space. Geometric growth, powers 73 | // of two sizes, never empty. At most 2/3 fill. The initial size is 74 | // a few kilobytes, enough to track a few hundred items. 75 | // 76 | // This table assumes single-threaded access, and should be locked for 77 | // use from any multi-threaded context. 78 | [] 79 | type Table = 80 | val mutable private Data : nativeint 81 | val mutable private Size : int 82 | val mutable private Fill : nativeint 83 | val mutable private Next : Table // next RC digit or null 84 | 85 | // "digit" size is less than rcMax to provide a small buffer 86 | // between incref and decref touching the `Next` table. 87 | static member private Digit : uint16 = 88 | let buffer = 7us 89 | assert(Elem.rcMax > (2us * buffer)) 90 | (Elem.rcMax - buffer) 91 | 92 | private new(sz:int) = 93 | { Data = allocData sz 94 | Size = sz 95 | Fill = 0n 96 | Next = null 97 | } 98 | new() = new Table(10) 99 | 100 | override tbl.Finalize() = 101 | Marshal.FreeHGlobal(tbl.Data) 102 | tbl.Data <- 0n 103 | 104 | // test for finalization for safer shutdown 105 | member inline private tbl.Finalized() : bool = 106 | (0n = tbl.Data) 107 | 108 | // find returns struct(index * refct). 109 | // 110 | // This assumes the tbl.Fill < tbl.Size and hence we have some 111 | // zero elements. If the identifier isn't part of our table, we 112 | // return the appropriate location (with refct 0) 113 | member private tbl.Find (idFull:uint64) : struct(nativeint * uint16) = 114 | let id = (idFull &&& Elem.idMask) 115 | let mask = ((1n <<< tbl.Size) - 1n) 116 | let rec loop ix = 117 | let e = getElemAt (tbl.Data) ix 118 | if ((e.id = id) || (0us = e.rc)) then struct(ix,e.rc) else 119 | loop ((1n + ix) &&& mask) 120 | loop ((nativeint id) &&& mask) 121 | 122 | /// Test whether ID is present within table. 123 | member tbl.Contains (id:uint64) : bool = 124 | if(tbl.Finalized()) then false else 125 | let struct(_,rc) = tbl.Find id 126 | (0us <> rc) 127 | 128 | // get the index, ignore the refct 129 | member inline private tbl.IndexOf id = 130 | let struct(ix,_) = tbl.Find id 131 | ix 132 | 133 | static member private MaxSize : int = 134 | (8 * (System.IntPtr.Size - 1)) 135 | 136 | // grow table; geometric growth 137 | member private tbl.Grow () : unit = 138 | if (tbl.Finalized()) 139 | then invalidOp "incref after finalize" 140 | let old_size = tbl.Size 141 | let old_data = tbl.Data 142 | if (old_size >= Table.MaxSize) 143 | then raise (System.OutOfMemoryException()) 144 | let new_size = (old_size + 1) 145 | let new_data = allocData new_size 146 | tbl.Data <- new_data 147 | tbl.Size <- new_size 148 | let rec loop ix = 149 | let e = getElemAt old_data ix 150 | if (0us <> e.rc) 151 | then setElemAt (tbl.Data) (tbl.IndexOf (e.id)) e 152 | if (0n = ix) then () else loop (ix - 1n) 153 | loop ((1n <<< old_size) - 1n) 154 | Marshal.FreeHGlobal(old_data) 155 | 156 | member inline private tbl.Reserve() : unit = 157 | let overfilled = ((tbl.Fill * 3n) >>> (1 + tbl.Size)) <> 0n 158 | if overfilled then tbl.Grow() 159 | 160 | /// Add ID to the table, or incref existing ID 161 | member tbl.Incref (id:uint64) : unit = 162 | tbl.Reserve() 163 | let struct(ix,rc) = tbl.Find id 164 | if (0us = rc) then 165 | setElemAt (tbl.Data) ix (Elem(id, 1us)) 166 | tbl.Fill <- (tbl.Fill + 1n) 167 | else if(Elem.rcMax = rc) then 168 | if (null = tbl.Next) 169 | then tbl.Next <- new Table(7) 170 | tbl.Next.Incref id 171 | setElemAt (tbl.Data) ix (Elem(id,(Elem.rcMax - Table.Digit) + 1us)) 172 | else 173 | setElemAt (tbl.Data) ix (Elem(id, rc + 1us)) 174 | 175 | member tbl.Decref (id:uint64) : unit = 176 | if(tbl.Finalized()) then () else 177 | let struct(ix,rc) = tbl.Find id 178 | if (1us = rc) then 179 | let delete = (null = tbl.Next) || not (tbl.Next.Contains id) 180 | if delete then tbl.Delete ix else 181 | // borrow Table.Digit from next table 182 | tbl.Next.Decref id 183 | setElemAt (tbl.Data) ix (Elem(id, Table.Digit)) 184 | else if(0us = rc) then invalidOp "refct already zero!" 185 | else setElemAt (tbl.Data) ix (Elem(id, rc - 1us)) 186 | 187 | // clear the specified index, then walk the hashtable to shift 188 | // potential linear-collision items into appropriate locations 189 | member private tbl.Delete (ixDel:nativeint) : unit = 190 | clearElemAt (tbl.Data) ixDel 191 | tbl.Fill <- (tbl.Fill - 1n) 192 | let mask = ((1n <<< tbl.Size) - 1n) 193 | let rec loop ix = 194 | let e = getElemAt (tbl.Data) ix 195 | if (0us = e.rc) then () else 196 | let ix' = tbl.IndexOf (e.id) 197 | if (ix <> ix') 198 | then clearElemAt (tbl.Data) ix 199 | setElemAt (tbl.Data) ix' e 200 | loop ((1n + ix) &&& mask) 201 | loop ((1n + ixDel) &&& mask) 202 | 203 | 204 | -------------------------------------------------------------------------------- /src/Stowage/README.md: -------------------------------------------------------------------------------- 1 | # Stowage 2 | 3 | This package provides an LMDB-backed implementation for the abstract `Stowage` and `DB` types from the `Stowage.Data` package. This enables representations of durable and larger-than-memory data types with garbage collection, and also for transactional named variables. This is the first and primary backend for Stowage, though we might eventually pursue cloud-based implementations. 4 | 5 | -------------------------------------------------------------------------------- /src/Stowage/Stowage.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netstandard2.0 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /src/Stowage/Test/.gitignore: -------------------------------------------------------------------------------- 1 | testDB 2 | 3 | -------------------------------------------------------------------------------- /src/Stowage/Test/Program.fs: -------------------------------------------------------------------------------- 1 | module Program = let [] main _ = 0 2 | -------------------------------------------------------------------------------- /src/Stowage/Test/Test.fsproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp2.0 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/Wikilon/Database.fs: -------------------------------------------------------------------------------- 1 | namespace Wikilon 2 | open Awelon 3 | open Stowage 4 | open Data.ByteString 5 | 6 | // Wikilon will host multiple dictionaries and multiple users. 7 | // 8 | // Active dictionaries will define as set of bots via `app-*`. But we'll 9 | // also have inactive dictionaries such as checkpoints and histories, or 10 | // perhaps based on quotas and payments. 11 | // 12 | // We could perhaps model Wikilon itself using the same fractal system, 13 | // with a set of bots operating on a partially persistent environment. It 14 | // might also be useful to have a `master` dictionary. 15 | // 16 | // In any case, it should be easy to build projections over variables, 17 | // and to wait on variables in a projection for COMET-style real-time 18 | // updates. 19 | // 20 | //module WikiState = 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/Wikilon/History.fs: -------------------------------------------------------------------------------- 1 | 2 | namespace Wikilon 3 | open Stowage 4 | 5 | /// A time-stamp. Alias to .Net System Ticks. This is mostly for 6 | /// internal use within applications. 7 | type TimeStamp = int64 8 | module TimeStamp = 9 | let now () : TimeStamp = System.DateTime.UtcNow.Ticks 10 | let toDateTime (tm:TimeStamp) : System.DateTime = System.DateTime(tm,System.DateTimeKind.Utc) 11 | let fromDateTime (dt:System.DateTime) : TimeStamp = dt.ToUniversalTime().Ticks 12 | 13 | // A snapshot-based history model. 14 | module Snapshot = 15 | 16 | /// A history is modeled as a list of time-stamped snapshots with 17 | /// the latest snapshots near the head. Exponential decay is used 18 | /// to limit the list length, erasing intermediate snapshots to 19 | /// keep entries fairly well distributed over time. The longer our 20 | /// history, the more snapshots we'll preserve. Length can be set 21 | /// via the codec, to erase entries upon compaction. 22 | type H<'V> = (struct(TimeStamp * 'V)) list 23 | 24 | /// Add an entry to the history. Adds to best location. Under 25 | /// normal circumstances, it's assumed you'll add timestamps 26 | /// near the head (i.e. for more recent entries), so this would 27 | /// be O(1). 28 | let rec add (tm:TimeStamp) (v:'V) (h:H<'V>) : H<'V> = 29 | match h with 30 | | (struct(tmHd,vHd)::h') when (tmHd > tm) -> 31 | struct(tmHd,vHd) :: (add tm v h') 32 | | _ -> (struct(tm,v)::h) 33 | 34 | /// Find historical entry most relevant to a given timestamp. 35 | /// May return None, if no entry exists for the given time. 36 | let rec tryFind (tm:TimeStamp) (h:H<'V>) : 'V option = 37 | match h with 38 | | (struct(tmHd,vHd)::h') -> 39 | if (tm >= tmHd) then Some vHd else 40 | tryFind tm h' 41 | | [] -> None 42 | 43 | let inline private tmDiff tm tm' = abs (tm - tm') 44 | 45 | // find least difference of timestamps within history 46 | let rec private leastTimeDiff tm ltd hst = 47 | match hst with 48 | | (struct(tm',_)::hst') -> 49 | let ltd' = min ltd (tmDiff tm tm') 50 | leastTimeDiff tm' ltd' hst' 51 | | [] -> ltd 52 | 53 | // remove first entry matching a timestamp difference 54 | let rec private delTimeDiff tm ltd hst = 55 | match hst with 56 | | (struct(tm',v)::hst') -> 57 | if ltd = (tmDiff tm tm') then hst' else 58 | struct(tm',v)::(delTimeDiff tm' ltd hst') 59 | | [] -> [] 60 | 61 | // remove one entry from history based on least time difference. 62 | // Does not remove head entry from chunk. 63 | let private decayChunk hst = 64 | match hst with 65 | | (struct(tm0,v0)::hst') -> 66 | let ltd0 = System.Int64.MaxValue 67 | let ltd = leastTimeDiff tm0 ltd0 hst' 68 | struct(tm0,v0)::(delTimeDiff tm0 ltd hst') 69 | | [] -> [] 70 | 71 | // exact sized split-list (may failwith insufficient data). 72 | // I assume `n` is relatively small, so I can use the stack. 73 | let rec private splitListAt n lst = 74 | if (0 = n) then struct([],lst) else 75 | match lst with 76 | | (hd::lst') -> 77 | let struct(tl,rem) = splitListAt (n-1) lst' 78 | struct(hd::tl,rem) 79 | | [] -> failwith "insufficient data" 80 | 81 | // take fair-sized chunks of size at least k. Fair-size means 82 | // the smallest chunk and largest chunk are at most one element 83 | // different in size. When our list is large compared to k, the 84 | // size of our chunks will swiftly approach k (from above). 85 | let private fairChunks (k:int) (lst0:'V list) : ('V list) seq = 86 | assert(k > 0) 87 | let n = List.length lst0 88 | if (0 = n) then Seq.empty else 89 | let ct = n / k // ct chunks of at least size k 90 | if (ct < 2) then Seq.singleton lst0 else 91 | let szMin = (n/ct) // minimum size for each chunk 92 | assert(szMin >= k) // our contract assumption 93 | let rem = (n%ct) // first rem chunks have sz = szMin+1 94 | let step (struct(idx,lst)) = 95 | if (idx = ct) then (assert(List.isEmpty lst); None) else 96 | let sz = szMin + if (idx < rem) then 1 else 0 97 | let struct(chunk,lst') = splitListAt sz lst 98 | Some(chunk, struct(idx+1,lst')) 99 | Seq.unfold step (struct(0,lst0)) 100 | 101 | /// Decay a history by removing about 1/K snapshots, for k at 102 | /// least 3. Based on an exponential decay model with fairness. 103 | let decay (k:int) (hst:H<'V>) : H<'V> = 104 | assert(k >= 3) 105 | hst |> fairChunks k |> Seq.map decayChunk |> List.concat 106 | 107 | // incremental decay until quota is reached (elim 1/K per step) 108 | let rec private decayToQuota (q:int) (k:int) (h:H<'V>) : H<'V> = 109 | if (q >= (List.length h)) then h else 110 | decayToQuota q k (decay k h) 111 | 112 | /// Codec with lossy, exponential-decay inspired compaction model. 113 | /// 114 | /// With this codec, you choose an approximate number of snapshots 115 | /// to retain. Upon compaction, snapshots will incrementally erase 116 | /// until quota is met, using a fair exponential decay algorithm 117 | /// that results in snapshots of the past being further temporally 118 | /// distributed compared to snapshots of recent changes. This will 119 | /// ensure deep history is robust against large numbers of "recent" 120 | /// updates, while keeping size constant (modulo changes in size 121 | /// of individual snapshots). 122 | let codec (entryQuota:int) (cV:Codec<'V>) : Codec> = 123 | let cTV = EncPair.codec' (EncVarInt.codec) cV 124 | { new Codec> with 125 | member __.Write h dst = EncArray.write cTV (List.toArray h) dst 126 | member __.Read db src = List.ofArray (EncArray.read cTV db src) 127 | member __.Compact db h0 = 128 | let hQ = decayToQuota entryQuota 9 h0 129 | let struct(hQA',sz) = EncArray.compact' cTV db (List.toArray hQ) 130 | struct(List.ofArray hQA', sz) 131 | } 132 | 133 | // TODO: snapshot histories are useful for dictionaries, but an alternative 134 | // is an event-based update history. In that case, we would want to merge 135 | // updates during the decay step - i.e. a monoidal update-event type. This 136 | // might be worth developing for some other parts of Wikilon. Eventually. 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /src/Wikilon/README.md: -------------------------------------------------------------------------------- 1 | # Wikilon Library 2 | 3 | This Wikilon library is responsible for providing the web-service interface. 4 | 5 | Desiderata: 6 | 7 | * Defining, Reviewing, and Updating Words 8 | * Dictionary import, export, versioning. 9 | * Multi-Word Views and Edit Sessions 10 | * Simple REPL and Spreadsheet views 11 | * Multiple dictionaries 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/Wikilon/User.fs: -------------------------------------------------------------------------------- 1 | namespace Wikilon 2 | open Data.ByteString 3 | open Stowage 4 | 5 | // Wikilon's "User model". 6 | // 7 | // I'm not familiar with conventional user models. My idea for Wikilon 8 | // is to model a user as something like a game avatar, with an inventory 9 | // and equipment and location. In this case, the 'inventory' can include 10 | // authorities and clipboards, 'equipment' can support preferences, macros, 11 | // and views, and 'location' is something like a navigation history and 12 | // session information. 13 | // 14 | // The concrete representation of a user might be an Awelon dictionary, 15 | // to support easy export, import, and debugging. Alternatively, we could 16 | // use a fragment of a dictionary. 17 | // 18 | // I'm interested in supporting remote authentication, e.g. from Google 19 | // or Facebook. But I could support simple passwords, too, e.g. using a 20 | // salt and encoded password pair. 21 | // 22 | // This is relatively low priority, however. Early on, I can start with 23 | // just admin vs guest. 24 | 25 | (* 26 | module User = 27 | 28 | /// 29 | 30 | let private getEntropy (n : int) : ByteString = 31 | let mem = Array.zeroCreate n 32 | do RandomNumberGenerator.Create().GetBytes(mem) 33 | BS.unsafeCreateA mem 34 | 35 | let createSalt () : ByteString = 36 | getEntropy 64 |> RscHash.hash |> BS.take 24 37 | 38 | 39 | /// Instead of storing plain-text passwords, we'll mangle things. 40 | 41 | let manglePassword salt pass = 42 | *) 43 | -------------------------------------------------------------------------------- /src/Wikilon/WS.fs: -------------------------------------------------------------------------------- 1 | namespace Wikilon 2 | open Stowage 3 | open Data.ByteString 4 | open Suave 5 | 6 | // The main goal right now is to get something useful running ASAP. 7 | 8 | module WS = 9 | 10 | type Params = 11 | { db : Stowage.DB 12 | admin : ByteString 13 | // might add logging, etc. 14 | } 15 | 16 | 17 | 18 | let mkApp (p:Params) = 19 | Successful.OK ("Hello World") 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/Wikilon/Wikilon.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /wikilon.nginx-config: -------------------------------------------------------------------------------- 1 | # This is a NGINX reverse-proxy configuration for Wikilon. 2 | # At the moment, it's in an experimental status/condition. 3 | # 4 | # 5 | server { 6 | listen 443; 7 | listen [::]:443; 8 | 9 | #server_name example.com; 10 | 11 | ssl_certificate /etc/nginx/cert.crt; 12 | ssl_certificate_key /etc/nginx/cert.key; 13 | 14 | ssl on; 15 | ssl_session_cache builtin:1000 shared:SSL:10m; 16 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2; 17 | ssl_ciphers HIGH:!aNULL:!eNULL:!EXPORT:!CAMELLIA:!DES:!MD5:!PSK:!RC4; 18 | ssl_prefer_server_ciphers on; 19 | 20 | #access_log /var/log/nginx/access.log; 21 | 22 | location / { 23 | # default Wikilon listen port is localhost:3000 24 | proxy_pass http://localhost:3000; 25 | #proxy_buffering off; 26 | 27 | proxy_set_header Host $host; 28 | proxy_set_header X-Real-IP $remote_addr; 29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 30 | proxy_set_header X-Forwarded-Proto $scheme; 31 | 32 | #default proxy_redirect should be okay. 33 | #Wikilon shouldn't be using non-relative URIs anyway. 34 | } 35 | 36 | # might need special handlers for websockets? 37 | # But I'll just use AJAX for now. 38 | } 39 | 40 | 41 | 42 | server { 43 | # generic redirect http to https 44 | listen 80; 45 | listen [::]:80; 46 | return 301 https://$host$request_uri; 47 | } 48 | 49 | 50 | --------------------------------------------------------------------------------