├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── app.fsproj
├── docs
├── AwelonFML.md
├── AwelonKPN.md
├── AwelonLang.md
├── Brainstorm.md
├── Generics.md
├── HTXM.md
├── ImmutableAwelon.md
├── Indexing.md
├── KPN_Effects.md
├── Perf.md
├── Runtime.md
├── SLRP-Article.md
├── SLRP.md
└── Todo.md
├── hs
├── Deprecated.md
├── Setup.hs
├── app
│ └── Main.hs
├── src
│ ├── Awelon.hs
│ ├── Awelon
│ │ ├── CX.hs
│ │ ├── Dict.hs
│ │ ├── Dict
│ │ │ ├── Format.hs
│ │ │ └── Rename.hs
│ │ ├── Hash.hs
│ │ └── Syntax.hs
│ ├── Wikilon.hs
│ └── Wikilon
│ │ ├── CBT.hs
│ │ ├── CX.hs
│ │ ├── DB.hs
│ │ ├── KVM.hs
│ │ └── RT.hs
├── stack.yaml
├── test
│ └── Spec.hs
└── wikilon.cabal
├── src
├── Awelon
│ ├── Awelon.fsproj
│ ├── DictIndex.fs
│ ├── DictSearch.fs
│ ├── Dictionary.fs
│ ├── Interpret.fs
│ ├── LocalVar.fs
│ ├── Parse.fs
│ ├── ParseExt.fs
│ ├── Test
│ │ ├── Program.fs
│ │ ├── Test.fsproj
│ │ └── Tests.fs
│ ├── TypeCheck.fs
│ └── WordVersion.fs
├── Data.ByteString
│ ├── ByteString.fs
│ ├── Data.ByteString.fsproj
│ ├── Stream.fs
│ ├── Tests
│ │ ├── Program.fs
│ │ ├── Tests.fs
│ │ └── Tests.fsproj
│ └── Tree.fs
├── Main.fs
├── Stowage
│ ├── Data
│ │ ├── CByteString.fs
│ │ ├── CVRef.fs
│ │ ├── Cache.fs
│ │ ├── Codec.fs
│ │ ├── CommonEncoders.fs
│ │ ├── DB.fs
│ │ ├── DurableCache.fs
│ │ ├── FingerTree.fs
│ │ ├── IntMap.fs
│ │ ├── LSMTrie.fs
│ │ ├── LVRef.fs
│ │ ├── Measured.fs
│ │ ├── MemoryCache.fs
│ │ ├── Monoid.fs
│ │ ├── README.md
│ │ ├── RscHash.fs
│ │ ├── Seq.fs
│ │ ├── Stowage.Data.fsproj
│ │ ├── Stowage.fs
│ │ ├── Trie.fs
│ │ ├── VDiff.fs
│ │ └── VRef.fs
│ ├── LMDB.fs
│ ├── LMDBFFI.fs
│ ├── RCTable.fs
│ ├── README.md
│ ├── Stowage.fsproj
│ └── Test
│ │ ├── .gitignore
│ │ ├── Program.fs
│ │ ├── Test.fsproj
│ │ └── Tests.fs
└── Wikilon
│ ├── Database.fs
│ ├── History.fs
│ ├── README.md
│ ├── User.fs
│ ├── WS.fs
│ └── Wikilon.fsproj
└── wikilon.nginx-config
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.fs text eol=lf
2 | *.fsproj text eol=lf
3 |
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | .stack-work/
3 | .paket/paket.exe
4 | packages/
5 | bin/
6 | obj/
7 | *.userprefs
8 | wiki/
9 |
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright David Barbour (c) 2017
2 |
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright
9 | notice, this list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation and/or other materials provided
14 | with the distribution.
15 |
16 | * Neither the name of David Barbour nor the names of other
17 | contributors may be used to endorse or promote products derived
18 | from this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Wikilon
2 | =======
3 |
4 | Wikilon is a wiki-insipired software platform and development environment for Awelon project and the Awelon programming language.
5 |
6 | [Awelon programming language](docs/AwelonLang.md) has a simplistic, Forth-like syntax, a simple semantics, and a purely functional evaluation model based on confluent rewriting of concatenative combinators. This simple syntax can be leveraged for projectional editing, and views may be preserved over evaluation.
7 |
8 | Awelon computations cannot reference external data. Instead, Awelon codebases may serve as simple databases, documents, filesystems, spreadsheets, depending on how they're used an updated. Awelon explores non-conventional [application models](docs/ApplicationModel.md).
9 |
10 | Wikilon presents Awelon language through a web service, with wiki inspirations.
11 |
12 | ## Installation
13 |
14 | Wikilon is implemented using F# on CLR. The code generation and JIT capabilities of CLR or JVM are convenient for implementing a lightweight compiler for user created code. CodeDOM and support for tail calls have me favoring CLR.
15 |
16 | Dependencies:
17 |
18 | * [LMDB](http://www.lmdb.tech/doc/)
19 | * sudo apt-get install liblmdb-dev
20 | * [.Net core](https://www.microsoft.com/net/core#linuxubuntu) tools
21 | * instructions on linked website
22 |
23 | Assuming the dependencies and a good Internet connection, you may use `dotnet restore` to download required .Net packages. Use `dotnet run -- -help` to view command line options. *Aside:* I'm favoring .Net core over mono largely for the streamlined `dotnet` CLI tooling. This does incur a few unfortunate opportunity costs, such as WebSharper isn't yet available for .Net core.
24 |
25 | ## Components
26 |
27 | Primary project components:
28 |
29 | * `src/Wikilon` - the web application and UI definitions
30 | * `src/Awelon` - Awelon language and runtime model
31 | * `src/Stowage/Data` - non-relational database model
32 | * `src/Stowage` - LMDB-based implementation of database
33 | * `src/Data.ByteString` - bytestring values, builders, parsers
34 |
35 | This toplevel directory also has `app.fsproj` referencing `src/Main.fs`, which simply processes command line configuration options and runs a web server.
36 |
37 | ## Configuration
38 |
39 | Configuration of Wikilon is performed online, generally through a browser. Initial configuration requires the `-admin` flag to provide an ephemeral password for a root administrative account. But the admin may assign administrative authorities to other accounts.
40 |
41 |
--------------------------------------------------------------------------------
/app.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | Exe
4 | netcoreapp2.0
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/docs/AwelonFML.md:
--------------------------------------------------------------------------------
1 |
2 | # Awelon with First-Class Modules
3 |
4 | Awelon is currently a very simplistic language with four primitive combinators. This gives the language a very dynamic feel. Many programs can be statically typed, but it requires a lot of non-local knowledge. I'd like to consider a variation of Awelon with some nicer static reasoning properties:
5 |
6 | * unify values and modules, per [Rossberg's 1ML](https://people.mpi-sws.org/~rossberg/1ml/1ml.pdf)
7 | * dependent types so we can express rich theories
8 | * advanced effects model, such as [Frank's Do Be Do Be Do](https://arxiv.org/pdf/1611.09259.pdf)
9 |
10 | If we start with a richer language, we can go a lot further with Awelon. For example, it becomes feasible to use Awelon as a systems programming language (without relying too much on accelerators).
11 |
12 | Awelon's current language has some nice properties: point-free, concatenative, deterministic, simple syntax, local rewriting. Those latter three properties - deterministic, simple syntax, local rewriting - are the essential properties for Awelon's goals of unifying PX and UX. The simple syntax enables us to project code into a graphical interface, determinism enables sharing code change in meaning, and local rewriting allows projection of the results. Point-free and concatenative are convenient for streaming a program, but are not essential.
13 |
14 | So the goal with this Awelon variant is to preserve those essential properties, while also enabling types and effects.
15 |
16 | ## Modules?
17 |
18 | The basic module interface is described by:
19 |
20 | * a collection of associated types
21 | * a collection of operations on them
22 |
23 | Types and operations are identified symbolically. Description of the types and interfaces might be a result of integrating other interfaces - a type also has a module interface. We can instantiate an interface, or partially instantiate one.
24 |
25 | ## Module Rewriting?
26 |
27 | In the original Awelon, all values are functions. Functions are convenient because the only way to observe a function is to apply it, and the only way to construct a function is to bind an argument. Thus, we only need combinators `a` and `b` to model observations and constructions. Besides these, we had extra combinators for data plumbing.
28 |
29 | With modules, what kinds of observations and constructors do we require?
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | For constructors, we might:
38 |
39 | * parameterize a module
40 | * abstract a module
41 | *
42 | wish to instantiate a module with a given type.
43 |
44 |
45 |
46 |
47 |
48 |
49 | so we have primitive combinators to observe and construct functions, in addition to combinators for basic data plumbing.
50 |
51 | If all values are modules, then we'll instead want combinators for observing and constructing modules.
52 |
53 | and vice versa, then what does it mean to rewrite them?
54 |
55 |
56 |
57 |
58 | There are other approaches, such as use of pattern-match rules. In that case, we might follow 'order' of patterns to achieve determinism. However, pattern matching is a little awkward in general: we need to know a lot about representations or views of the arguments. I think with modules, I'd prefer to follow Awelon's original example: simple confluent combinators.
59 |
60 | With functions, we observe a function by applying it, or we may construct a function by binding an argument. These correspond to Awelon's `a` and `b` primitive combinators, respectively. Besides these, we can copy or drop a function.
61 |
62 | With modules, observations and constructors are much richer, we'll need more combinators.
63 |
64 |
65 |
66 |
67 | * to use one module to parameterize another
68 | * to use one module to instantiate another
69 | * to compose modules, unifying some shared types by name
70 | * to abstract a module, forgetting some details
71 | *
72 |
73 |
74 |
75 | ## First Class Modules
76 |
77 | Rossberg's 1ML proposes to
78 |
79 |
80 | ## Effects Model
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docs/AwelonKPN.md:
--------------------------------------------------------------------------------
1 |
2 | # Kahn Process Networks in Awelon
3 |
4 | [Awelon language](AwelonLang.md) is purely functional. Although purely functional code permits a lot of simple fork-join parallelism, it presents a challenge for distributed parallel computation, modeling of parallel pipelines and so on. Mostly, the issue regards routing of messages. A purely functional representation of routing will be relatively deterministic and centralized, so it's difficult to leverage arrival-order non-determinism (race conditions) to improve parallelism.
5 |
6 | To resolve this, my current intention is to borrow inspiration from a deterministic concurrency model - especially one such as [Kahn Process Networks (KPNs)](https://en.wikipedia.org/wiki/Kahn_process_networks) that does not require external effects to model channels or single-assignment variables.
7 |
8 | KPNs are also very useful as a potential [effects model](KPN_Effects.md) and process or application model, essentially capable of representing monads and comonads with multiple IO ports. KPNs support reactive or real-time systems by adding a simple time model, e.g. the ability to deliver `Tick + a` messages on every channel instead of just `a` messages. Pushback can also be modeled explicitly by adding acknowledgement feedback messages to the downstream processes.
9 |
10 | ## Single Assignment Placeholders
11 |
12 | We can conceptually model single assignment variables using pseudo-words:
13 |
14 | [A] /put_K => (single assignment)
15 | /get_K => [A] (after assignment)
16 |
17 | Awelon doesn't support single assignment variables directly, but an optimizer or interpreter could presumably introduce placeholders to improve parallelism. In particular, `\get_K` may behave as a value word to be delivered through a computation in parallel with the computation that produces the final value `[A]`.
18 |
19 | Using this technique, we can shift some computations (specifically those with a known output type) into a separate "parallel region" such that the parallel region will ultimately have an identity behavior.
20 |
21 | [Expr](par)(t2) => [/get_1 /get_2] (locally)
22 | Expr /put_2 /put_1 (parallel region)
23 |
24 | We'll create fresh variables as needed, i.e. very `/put_K` will use a distinct `K`. If computation terminates successfully, all of these put and get pseudo-words should be eliminated. However, if we halt due to error or quota, we may need to extract these placeholders. This can be achieved via simple value extraction algorithm:
25 |
26 | T(X,E) - extract variable X from E
27 | X T(X,E) == E
28 | T(X,E) when E does not have X => d
29 | T(X,X) =>
30 | T(X,[E]) => [T(X,E)]b
31 | T(X,F G)
32 | when only F has X => T(X,F) G
33 | when only G has X => [F]a T(X,G)
34 | when both have X => c [T(X,F)] a T(X,G)
35 |
36 | Essentially, we can prefix incomplete parallel computations onto our programs then rewrite them back to equivalent inline programs.
37 |
38 | ## Stable Frame Routing
39 |
40 | To model routing of messages, one option is to process available messages in large "frames", touching every channel even if no message is available for that frame. Doing so naively is inefficient but does have potential advantages for batching and especially for routing of *placeholders* because we don't need to know anything about the potential messages before we route them.
41 |
42 | An interesting possibility is to leverage knowledge about stable routes such that we only need to route placeholders once for a frame then deliver multiple times by reusing the existing routes. Essentially, routes become `/put_K` and `/get_K` pairs reusable across multiple update frames without explicitly performing the data plumbing each time.
43 |
44 | ## Identity Frames and Fixpoints
45 |
46 | A KPN won't update further when no messages are added. Essentially, we have a clear fixpoint condition - something easy to check dynamically, and potentially to verify statically. Evaluation of KPNs will always proceed to this fixpoint. Knowing this condition, we can optimize by avoiding routing to a subnetwork when there are no messages. More importantly, we can support "race conditions" within the KPN so long as we clearly determine which subnets might update.
47 |
48 | However, we might need to pay attention to representation of pending messages to ensure this fixpoint behavior. For example, we might need to know we're appending lists.
49 |
50 | If we route stable frames in a fixpoint loop, with a clear halting condition, we'll have the essence of KPNs.
51 |
52 | ## Acceleration
53 |
54 | Ideally, acceleration of KPNs in Awelon shouldn't rely too much on a specialized KPN data structure. We could do that, but I'd prefer something more generalized based on lightweight static analysis involving stable routes and identity frames.
55 |
56 |
57 |
--------------------------------------------------------------------------------
/docs/Brainstorm.md:
--------------------------------------------------------------------------------
1 |
2 | # Ideas of Dubious Utility
3 |
4 | Just a scratch pad for ideas I'm contemplating but not committed to in context of Awelon.
5 |
6 | ## Automatic Definitions
7 |
8 | Awelon currently provides automatic definitions for:
9 |
10 | * natural numbers `[1-9][0-9]*`
11 | * embedded texts
12 |
13 | I find it tempting to introduce more general support for automatic definitions, e.g. such that we could derive `cddadr`. I also like the idea of automatic definition for lists/tables based on common word structure. At the moment, we must explicitly manage any lists. But we could feasibly support `foo-*` words that automatically compose a list of words such as `foo-1` and `foo-42`.
14 |
15 | Effectively, these functions would either require some form of macro definitions or some built-in schema. Macro definitions would be more flexible, but also rather more difficult to index for reactive computations (e.g. to automatically add `foo-36` to the appropriate `foo-*` list). I'd prefer to avoid the resulting complexities in my implementations and tooling.
16 |
17 | For the moment, we can shove this issue to external software agents or interactive editing tools, which could automatically define words as-needed based on rules represented within the dictionary. Similarly, we could automatically import or maintain definitions from trusted external sources.
18 |
19 | ## Exchange and Apply
20 |
21 | Right now I have:
22 |
23 | [B][A]a == B[A] (apply)
24 |
25 | But I could split the exchange aspect as follows:
26 |
27 | [B][A]a == B[A] (apply')
28 | [B][A]e == [A][B] (exchange)
29 |
30 | This would result in greater separation of responsibilities. It simplifies the inline function to just `[] a d`. But the benefits ultimately seem limited. OTOH, the current 'apply' has a nice symmetry with bind, regarding the second argument:
31 |
32 | [B][A]a == A[B] apply to all but second element
33 | [B][A]b == [[B]A] apply to just the second element
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/docs/Generics.md:
--------------------------------------------------------------------------------
1 | # Generic Programming in Awelon
2 |
3 | Generic programming is about writing programs in terms of requirements, the abstraction and deferral of non-essential decisions such as data types, sorting algorithms, dependencies, configuration, and extensions. Ideally, this should be achieved without significant overheads for syntax or performance.
4 |
5 | Awelon directly supports parametric polymorphism, which is a useful but limited form of generic programming. Advanced forms of generic programming will require explicit modeling and suitable projections. Unfortunately, popular approaches like multi-methods or type-classes rely on a global registry or type system, and are awkward in Awelon.
6 |
7 | I have some vague ideas involving a monadic constraint system.
8 |
9 | Constraint systems are close in nature to generic programming: a constraint can express a requirement, or further refine one, while the search space serves as a registry of possible and partial solutions. With soft constraints, we can also model preferences. The monadic API would provide a framework for constructing our model, registering the requirements and options.
10 |
11 | A preliminary API:
12 |
13 | type CX v a -- monadic
14 | type V v a -- single assignment variables
15 |
16 | -- model construction
17 | alloc : CX v (Var v a)
18 | read : Var v a -> CX v a
19 | write : Var v a -> a -> CX v a
20 |
21 | -- constraint system
22 | choice : List of CX v a -> CX v a
23 | fork : CX v () -> CX v ()
24 | weight : Nat -> CX v ()
25 |
26 |
27 |
28 |
29 |
30 | The monadic API can be convenient for abstracting and composing our constraints. Unfortunately, it is somewhat awkward to 'defer' a choice based on downstream requirements in the monadic API. This means we either must front-load our requirements (which is awkward, but doable idiomatically) or find an alternative to true deferal (such as iterative search).
31 |
32 |
33 |
34 |
35 | In this design, we'll have a monad that supports 'search' via alternatives and failure, and also supports single-assignment variables. A variable may only be assigned once, but the assignment may be separated from variable declaration. The idea, then, is to express multiple alternative assignments as a search space, and to represent ad-hoc constraints by search failures. Importantly, variable assignments are interdependent. We'll consider only groups of assignments.
36 |
37 | To this we might add a 'fork' operation that allows us to defer reads, decisions, requirements, and assignments. There is no return value from 'fork'. However, every fork must halt successfully for our constraint system to succeed.
38 |
39 | A reasonable question is how to model a shared collection, such as a list of plugins. My idea is this: we model our plugins as a linked list of single-assignment variables to which we might addend. Each addend is potentially the last one, so we represent this using a local alternative. This pattern is inefficient if implemented naively, but we could optimize it with a dedicated API extension.
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | Ideas:
60 |
61 | * single-assignment variables combined with a search model
62 | * answer-set programming - variables are not independent
63 | * attaching tactical solutions to a variable (or set thereof)
64 | * option to 'fail'
65 |
66 | How would we support extensions, too? I suppose it could be encoded by building a channel using constraint variables and prioritized selection. Intriguingly, this constraint system could enable a high level of parallel computation insofar as different parts are reading or writing.
67 |
68 | type GX e a -- monadic, with error type 'e'
69 | try : GX e a -> GX e' (Either e a)
70 | fail : e -> GX e a
71 | return : a -> GX e a
72 |
73 | This design is sufficient to give us the ability to try
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | The monad provides an API to read and write an ad-hoc registry while computing a value, and also some APIs for failure and fallback, e.g. an exception-safe `try` that rolls back writes upon error. This would be sufficient to model lightweight forms of search, for example. This monad can perform ad-hoc computations, and would return the final result and registry.
83 |
84 |
85 |
86 | We could perform computations
87 |
88 | Further, the monad may support failure and fallback behavior, like a try-catch, allowing for program search. Further, the monad may support a "scoring" mechanism
89 |
90 |
91 | This would allow us to construct collections and options
92 |
93 | while computing a value, and perhaps support ad-hoc alternatives and partial failures.
94 |
95 |
96 |
97 |
98 |
99 |
100 | A viable idea for generic programming is to develop a monadic API for a lightweight constraint programming model. We would simultaneously declare constraint variables and add options to them, while also computing a value.
101 |
102 | our options, while computing a value. This would
103 |
104 | with inspiration from declar constraint systems. We could declare constraint variables, the
105 |
106 |
107 | The monad would allow us to construct a registry and a program at the same time.
108 |
109 | could declare various rules: how to build a set of resources given another set.
110 |
111 | - how to build a resource given some other resources, and which new rules become available and requirements (whi
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 | In many cases, such as multi-methods, we must model a registry of instances. To fit Awelon's overall design vision, this registry must be *local* to a program.
120 |
121 | (for caching, control, etc.) this should not be a global registry - reject a global `mm-registry` for multi-methods. Indeed, it's best if the registry is local to each program definition.
122 |
123 | of the generic programming system. If we intend to simulate typeclasses or multi-methods, we must model a registry of instances.
124 |
125 | classes and methods.
126 |
127 | It is convenient to represent registration of resources monadically, mixed together with specification of the program. Intriguingly, we could generalize monadic mechanisms to support dependency-injection or configuration. With support for alternatives and program search, it would also be convenient for rapid prototyping.
128 |
129 | This is a direction I'd very much like to pursue for Awelon, long term. We may need to build upon projections to make it usable.
130 |
131 |
--------------------------------------------------------------------------------
/docs/HTXM.md:
--------------------------------------------------------------------------------
1 | Hierarchical transaction machines (HTXMs) are an interesting idea I've been developing recently. HTXMs accomplish my goals of live, resilient, declarative, extensible software systems. Further, HTXMs accomplish these goals in a way that's easy to integrate with existing service and devices, thus avoiding the major weakness of RDP.
2 |
3 | First, let's ignore that 'hierarchical' adjective. I'll get back to it!
4 |
5 | A transaction machine (TXM) is essentially: a transaction, repeated indefinitely. We assume our system has a set of TXMs.
6 |
7 | TXMs intrinsically support extension and liveness: We can model our set of TXMs as a transaction variable. This allows us to modify our system behavior atomically, adding new machines and removing others. If necessary, we can simultaneously transition our state resources.
8 |
9 | TXMs intrinsically have nice resilience properties: transactions are fail-safe, they can degrade gracefully by fallback behavior or halting individual machines, and the system will recover swiftly after a problem is corrected. Further, they can easily support a user interface with consistent views and transactional intervention, allowing for administration or debugging.
10 |
11 | TXMs intrinsically are idempotent: having more than one copy of a TXM won't change the system's behavior. This allows us to reason simply about whether or not we have a behavior. A list of TXMs is also commutative, but that's not special for concurrency models.
12 |
13 | TXMs intrinsically support reactive, concurrent coordination behavior: If a deterministic transaction does not modify state, then it will not if repeated under the same conditions. So we can wait for conditions to change. We can leverage this, e.g. using voluntary failure to make TXMs wait for arbitrary conditions.
14 |
15 | For example, a TXM implementing a stream processor in each step will drain an input queue, maintain some state, and push to an output queue. If input is empty or output is at capacity, it can abort (voluntarily fail) to force a wait. Haskellers might already be familiar with this technique, via the software transactional memory package and use of TQueue or TChan.
16 |
17 | Beyond stream processing, it's easy for TXMs to model reactive constraint systems (maintaining cells according to some goal), blackboard systems (read blackboard, opportunistically contribute), publish-subscribe. Object-oriented code is not difficult to model within the transaction. To more precisely support larger-scale reactive systems, we could include logical timestamps with streaming values. Multiple models can be easily mixed.
18 |
19 | External systems, such network or display, can effectively be integrated as shared memory, or perhaps writing to a system task queue. The main limitation is that we cannot directly model synchronous wait. To indirectly model synchronous wait, we can add a request to a system queue including a reply-to variable, modify some state so we know to wait on that channel, commit, then wait for a reply in a subsequent transaction.
20 |
21 | These are all wonderful properties, and achieve several of my goals. However, an essential feature is missing: support for divide-and-conquer tactics.
22 |
23 | Imagine we have a dynamic set of input queues to handle, with over a hundred elements. In this case, I would like the option to spawn one TXM per input queue. Detached spawning would be trivial to implement. Unfortunately, it also escapes user control, hurting liveness and resilience.
24 |
25 | The idea of attached spawn leads me to Hierarchical TXMs (HTXMs).
26 |
27 | In each step, our machines can fork subordinate behaviors, but these behaviors only remain active until just before the next step. A failed transaction cannot fork. A successful transaction, unless it results in a stable state, will immediately begin the next step. Thus, only successful, stable-state transactions will result in active subordinates.
28 |
29 | To recognize successful, stable-state transactions is not difficult. For a singular transaction, only a read-write behavior on a variable can result in unstable behaviors. For example, if we read a queue, take one element, write modified queue back, then when we repeat our transaction we'll have different observed conditions. If we only read a variable, or blindly write a variable, we're stable. Further, for data types that support efficient equivalence checks, we can stabilize writes that are equivalent to the current value.
30 |
31 | Programmers of HTXM systems will want to control stability. Fortunately, there are many patterns they can use to filter noise, cache stable conditions, partition data into relatively stable volumes, etc.. The desire for stability certainly isn't unique to HTXMs.
32 |
33 | I haven't implemented HTXMs yet. But I believe this model has much to offer, and I'll continue exploring it for now.
34 |
--------------------------------------------------------------------------------
/docs/ImmutableAwelon.md:
--------------------------------------------------------------------------------
1 | # Immutable Awelon
2 |
3 | Paul Chiusano's Unison Web project takes an interesting approach to modeling a codebase: instead of mutable definitions (e.g. via files in a filesystem), a program only ever references external code via self-authenticating secure hash. This gives us *immutable* definitions at any given development step. Conveniently, this simplifies indexing, static typing, cached compilation, acceleration, etc.. The advantage is we can assign global meaning for any program.
4 |
5 | Awelon as it currently exists uses an immutable *dictionary*, within which definitions are mutable. Consequently, it is the pairing of dictionary-program that is immutable. But there is an important difference: the meaning of programs can change indirectly due to updates within the dictionary. This gives us ad-hoc spreadsheet-like characteristics, but it also complicates caching and sharing of code.
6 |
7 | With immutable code, our "dictionary" is replaced by a "development environment" within which we maintain a bi-directional relationship between secure hashes and local nicknames. References between code always use secure hashes. We may render and edit code using the nicknames in place of secure hashes. To simulate file-like edits, we can provide means to select and to edit multiple locations concurrently.
8 |
9 | That's the idea.
10 |
11 | Whether it's a "good" idea is a separate question.
12 |
13 | The advantage is that we simplify the sharing of code in a global codebase, and we simplify caching because the evaluation, type, etc. may be associated directly with the secure hash. The disadvantage is that maintaining and distributing data becomes relatively difficult. The codebase is no longer the "living sea of data" I desire for Awelon [application models](ApplicationModel.md). The spreadsheet-like characteristic of Awelon dictionaries is convenient for many of Awelon's use cases.
14 |
15 | Further, it's unclear how to support convenient embedding of data if we must use huge secure hashes for data constructors. I suppose we could [add first-class modules to Awelon](AwelonFML.md) with local symbols to serve the same role, in which case we have first-class dictionary values.
16 |
17 |
--------------------------------------------------------------------------------
/docs/Indexing.md:
--------------------------------------------------------------------------------
1 | # Indexing Dictionaries
2 |
3 | I need to index dictionaries for effective use.
4 |
5 | Which indices are most useful?
6 |
7 | * word (or resource) → version
8 | * version → evaluated definition
9 | * indirectly? version → evaluated version + version → definition
10 | * version → link optimized or compiled versions
11 | * version → inferred type
12 | * Reverse Lookup: symbol → client words
13 | * symbols include annotations
14 | * symbols include secure-hash resources
15 | * find obviously erroneous (undefined, cyclic, etc.) words
16 | * potentially find full-text fragments
17 |
18 | A challenge for indexing is that Wikilon will have many dictionaries, both for multiple projects/users and histories and hierarchical structure. I assume that dictionaries will be 99% similar, but the differences might be private to a project or user. So we both need to somehow share indices and ensure secure separation thereof. Association of definitions with deep versions based on secure hash should help, since we can securely index from version to many kinds of computed results (like evaluation or type). This may also be treated similarly to memoization!
19 |
20 | For an interactive development environment, I also want the ability to find type-appropriate words within a dictionary. Maintaining a reverse lookup index for type information is non-trivial because reverse-lookup is always specific to each dictionary. But presumably we could just ask developers or software agents to augment the dictionary with some metadata that can be used in the reverse lookup. E.g. for `foo` we could have `foo-meta-kw` that includes ad-hoc key-words or tags to swiftly discover the word `foo`.
21 |
22 | So, I think the primary indices to maintain are for deep-version and a simple reverse-lookup. Fortunately, these two indices can be maintained incrementally. A relevant concern is that a version update for a 'deep' word (e.g. the initial state in a command pattern) could result in a long, cascading chains of version invalidations with high cost. And if we have a lot of updates, we shouldn't recompute all versions after each update.
23 |
24 | This could be ameliorated by making the deep-version index lazier in nature (so we only invalidate once for multiple updates), or otherwise batching our updates.
25 |
26 | In most cases, we can assume that the more a word is used, the more stable its definition will be. So eager versioning could also work. Or perhaps we should treat version IDs as a memory cached index, recomputed as needed, rather than a fully durable index.
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/docs/KPN_Effects.md:
--------------------------------------------------------------------------------
1 | # KPNs as an Effects Model
2 |
3 | I believe [Kahn Process Networks (KPNs)](https://en.wikipedia.org/wiki/Kahn_process_networks) or variants would offer a better foundation for purely functional effects models and applications compared to monads. KPNs offer a convenient basis for pipeline style concurrency, working with multiple concurrent input sources, and distributed computation.
4 |
5 | Desiderata:
6 |
7 | * Confluent. KPNs have a deterministic result no matter the order of scheduling.
8 | * Concurrent. Order of inputs or outputs on separate channels is not relevant.
9 | * Monotonic. No rollbacks or rework. Every output produced is final.
10 | * Composable. We can compose entire networks externally.
11 | * First Class. KPNs are immutable values that can be passed around.
12 | * Reactive. We can merge and interleave asynchronous input sources.
13 | * Dynamic. Network structure may change incrementally based on input.
14 |
15 | The first several features are implicit for KPNs.
16 |
17 | KPNs as values is implicit to how I plan to represent them, using accelerated evaluation of a DSL-like network description. Explcit copy in Awelon helps: I can use linear update between copies. By passing KPNs around, they can serve as a sort of functional 'object' with asynchronous update and result.
18 |
19 | But reactive and dynamic behavior requires some attention.
20 |
21 | ## Reactive
22 |
23 | Reactivity is not a built-in feature of KPNs, but can be supported indirectly by adding a simple time model to our KPNs. The simplest option, perhaps, is to modify every message type from `a` to `(Tick + a)` such that we can express waiting on a message until time advances on a pipeline.
24 |
25 | ## Pushback
26 |
27 | Pushback is when sending a message causes a process to wait, in order to limit the number of pending messages. KPNs don't usually have pushback, but it's a useful constraint for limiting memory usage. It's easiest to explicitly model pushback via explicit acknowledgement channels, rather than adding them to the model directly. That is, after sending we await acknowledgement - but we can start with a few 'acks' already in the queue to represent bounded buffer channels.
28 |
29 | ## Dynamic
30 |
31 | Dynamic structure is a big challenge for KPNs, and is a potential weakness compared to monadic programming. How should we handle this?
32 |
33 | My best idea so far:
34 |
35 | * a process network has a set of labeled open input and output ports
36 | * a stateful process may 'become' a process network with the same ports
37 | * special support for composable processes (like an `fmap` process).
38 |
39 | With this much, a single process could rewrite itself into multiple smaller processes. Some smaller processes, such as a pair a sequence of two `fmap` processes, could collapse into a single process. Others may be garbage collected when awaiting 'dead' channels (assuming we'll wait forever for an input).
40 |
41 | Failed ideas: first class channels are problematic due to the issue of naming channels, unique names when composing networks, need for linear types, etc.. Explicit delegation is too inefficient for pipelining.
42 |
43 | ## Effects Models
44 |
45 | Modeling effects means providing outputs that result in some feedback. We could do this by convention with a small set of channels labeled based on the types of effects or expected independent sources of input. We can model message buses or routing within a KPN as needed to manage arbitrary effects.
46 |
47 | IO involving independent channels can feasibly be decentralized by sharding a process network, assuming an appropriate context.
48 |
49 |
50 |
--------------------------------------------------------------------------------
/docs/Perf.md:
--------------------------------------------------------------------------------
1 | # Performance Monitoring
2 |
3 | To avoid blind optimization, I'll try to develop a useful set of benchmarks with local metrics.
4 |
5 | Benchmarks:
6 | * **bench.repeat10M** a trivial `0 [4 +] 10000000 repeat` benchmark.
7 |
8 | * benchmark ideas
9 | * implement μKanren relational language.
10 | * text processing - regex, parsers, backtracking, etc..
11 | * Pov-Ray style scene computing programs
12 | * vector and matrix processing, machine learning
13 | * big data processing, stowage tests
14 | * look into computer benchmarks game
15 |
16 | Interpreters:
17 | * **old aoi** - From the `awelon` project.
18 | * Runs a simplifier by default.
19 | * Times include linking.
20 | * **alpha runABC Haskell**
21 | * No simplifier or accelerators.
22 | * Doesn't validate copyable/droppable.
23 | * Time excludes linking.
24 | * **alpha runABC Wikilon**
25 | * No simplifier or accelerators.
26 | * No shared objects.
27 | * Time excludes linking.
28 | * **runABC 32 bit Wikilon**
29 | * memory relative offsets
30 | * smaller integer types
31 |
32 |
33 |
34 | ## Repeat 10M
35 |
36 | * old aoi interpreter: 213 seconds
37 | * alpha runABC Haskell: 413 seconds
38 | * alpha runABC Wikilon: 19.0 seconds
39 | * runABC 32 bit Wikilon: 19.7 seconds
40 |
41 | This is a promising start. A 10x improvement over my old `aoi` interpreter even before optimization work or JIT. This also demonstrates that the 32-bit vs. 64-bit doesn't make a significant difference on CPU performance, while effectively doubling the working context space.
42 |
43 | However, objectively this is not *good* performance. 20 seconds for 10M loops is ~2 microseconds per loop. I hope to achieve *at least* another 10x improvement here in the medium term, via fixpoint accelerators. Long term, a ~100x improvement is desirable via JIT.
44 |
45 | Unfortunately, with my recent (2016 November) change to Awelon's definition, these results won't hold. I'll need to re-implement most of the interpreter and compiler both. But it may be feasible to create a specialized runtime just for super-fast interpretation without all the linking.
46 |
47 | A theoretical limit is closer to a 1000x improvement.
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/docs/SLRP.md:
--------------------------------------------------------------------------------
1 |
2 | # Stream Language Rewrite Processing (SLRP)
3 |
4 | See the [SLRP-Article.md](SLRP-Article.md).
5 |
6 | The essential idea for SLRP is that we have fixed-memory processors rewrite an input stream in multiple passes. This has a many benefits for pipelining, partitioning, and parallelism. In the article, I outline an initial SLRP machine code based on a concatenative combinatory logic.
7 |
8 | Unfortunately, data plumbing (dip, dup, swap, loop, etc.) is too expensive in our initial example language. I can hack a solution with virtual memory. However, if this were adequately solved in pure SLRP, SLRP could become a very nice, practical foundation for computing.
9 |
10 | ## Data Plumbing
11 |
12 | The initial SLRP machine code has several binary operators. However, these operators only "see" one operand within our stream. To overcome this limit, we're using expensive divide-and-conquer tactics to ferry `xyz` buffers of data. This can be mitigated by large buffers, such that `xyz` may involve tens of kilobytes. However, even if we mitigate this, it still requires our processor's full attention, and occurs too frequently for common operations.
13 |
14 | To solve this problem, we need processors that support multiple input streams. This way, an operator could observe two streams and either merge them element-wise (zip, merge-sort, etc.) or at least reorder large volumes efficiently (e.g. all of X followed by all of Y, no ferrying small buffers).
15 |
16 | However, divided streams introduce semantic and mechanical challenges.
17 |
18 | Where does that second stream come from? How does it interact with abstraction?
19 |
20 | ## Brainstorming
21 |
22 | No solid solutions yet, just some ideas.
23 |
24 | ## Multiple Channel SLRP
25 |
26 | With a multi-channel SLRP, processors have a fixed number K of input and output streams. This fixed number would be determined by our machine-code and hardware, and is program-independent. The processor must rewrite these several streams collectively.
27 |
28 | To support pipeline processing with bounded buffers, these channels must be synchronous. If the channels were asynchronous, then bounded buffers will fill at variable rates. This would result in pushback, and downstream processors will become "stuck" without input on every channel. It is certainly feasible to represent 'whitespace' NOPs, though. It is feasible to model channels that are synchronous at different 'rates' however. For example, two bytes of channel B for every byte in channel A.
29 |
30 | In mechanical terms, a multi-channel SLRP is equivalent to a single-stream SLRP with a word-size proportional to the number of bytes transferred in a synchronous time-step. Thus, a "multi-channel" behavior must be part of our semantics, all about how we interpret and leverage these extra bytes.
31 |
32 | *Note:* For convenient programming, I would favor a semantics where all valid programs can be represented as one stream, assuming the other streams are initially just whitespace. We should also be able to extract the interleaved stream back into a single stream.
33 |
34 | ## Bi-directional SLRP
35 |
36 | The idea with bi-directional SLRP is to have input-output channels flowing in opposite directions.
37 |
38 | A difficulty with this is that, unless we're willing to accept a non-deterministic model, we need to clearly understand where each "rendezvous" between channels should occur. (And for non-determinism, I'd rather just use effects for this role.)
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | Awkwardly, to support pipeline processing, these channels must be 'synchronous' at the byte level. Otherwise, bounded buffers will at variable rates and downstream processors may become stuck, unable to observe at least one of the K in
64 |
65 | It is feasible to have some channels with larger "word sizes" than others, e.g. so channel B has 3 bytes for every 1 byte in channel A. However, without a loss of generality, we could simply divide B into 3 channels that are byte-for-byte synchronous with A.
66 |
67 |
68 |
69 | That is, every output from one channel must be accompanied by outputs on other channels.
70 |
71 | To support pipeline processing, these channels must (rather awkwardly) be synchronous at the byte-level. The mechanical reason for this is that we'll have bounded buffers between processors.
72 |
73 | These channels would need to be *synchronous*. The reason for this: if we 'pushback' on a stream
74 |
75 |
76 |
77 | Can this work?
78 |
79 | Ignoring pipelining, we can guarantee our processor has inputs on every port. Thus, we could develop a set of rewrite rules that always make progress. Unfortunately, this no longer works when we pipeline our processors together. Relevantly,
80 |
81 |
82 |
83 |
84 |
85 | ## Concrete Channels?
86 |
87 |
88 | , and we rewrite them collectively into several unbounded output streams. Processors may pushback on their unbounded input sources, and are not guaranteed to provide output on any given output channel in a timely fashion.
89 |
90 | A consequence of 'pushback' means we cannot usually expand a si
91 |
92 |
93 |
94 | Mechanically, this seems feasible.
95 |
96 | Semantically, this is a challenge. That is, assuming we intend to maintain nice features such as pipeline processing, easy partitioning, confluence up to effects, virtual memory, etc..
97 |
98 | What is our mechanical foundation upon which we may construct a semantics?
99 |
100 | * Processors are physically wired to K input streams.
101 | * Processors are wired to K' output streams. K = K'?
102 | * Operations and operands are encoded in the streams.
103 | * Stream states: terminated, waiting, data available.
104 | * We can only see the "header" for larger operands.
105 |
106 | If we assume multiple rewrites are required to complete a computation, and that pipeline processing might be useful, then our number of outputs channels must match the number of input channels, and routing must be simple and linear.
107 |
108 | If we want confluent computation, then we must not observe the "waiting" condition for any input stream. If we want local rewrites, then we also shouldn't be able to observe the "terminated" condition. Instead, we should have a simple approach to determine how much data a given operation is waiting on. It is feasible to achieve limited parallelism if rewrites on multiple inputs are not conflicting.
109 |
110 | Our implementation must look at a small buffer of data available on each stream and simply *know* what to do next.
111 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/docs/Todo.md:
--------------------------------------------------------------------------------
1 | # Recent
2 |
3 | Performance profiling. Comparisons in Perf.md.
4 |
5 | # TODO
6 |
7 | My primary efforts at the moment should be:
8 |
9 | * simplify and accelerate ABC code!
10 | * recognize existing accelerators
11 | * develop MORE accelerators
12 | * fast fixpoint inline (fix.i)
13 | * maybe as an attribute?
14 | * faster conditional behavior.
15 | * `if` and `if_` behaviors.
16 | * common EQ/NEQ comparisons.
17 | * sum to boolean conversion.
18 | * some deep stack manipulations
19 | * develop specialized accelerators:
20 | * deep structural manipulations
21 | * list processing accelerators
22 | * consider support for 'compact' texts
23 |
24 | * compact, fast bytecode
25 | * optionally compact during parse
26 | * high performance quotations
27 | * directly contain binaries, texts, blocks.
28 | * explicit reserve for structured data
29 | * design with eye toward shared objects.
30 |
31 | * persistence and stowage
32 | * potential for shared, stable object models
33 | * use for web service storage!
34 |
35 | * [parallelism](Parallelism.md)!
36 | * get `{&fork}` and `{&join}` working
37 |
38 | * shared bytecode objects
39 | * avoid full copies for large bytecode.
40 | * for now, maybe separate from stowage.
41 |
42 | * profiling and debugging of AO code
43 | * simplified stack traces
44 | * other profiling options
45 |
46 | * get web service active again!
47 | * develop useful application models
48 | * develop some console style apps
49 |
50 | * command line utilities
51 | * favor via web service, not filesystem
52 | * support queries, claw, computations
53 | * support import/export to AO files
54 | * with or without history
55 | * FUSE adapter (maybe)
56 |
57 | * runtime performance
58 | * compact bytecode
59 | * shared memory (loopy code, binaries, texts)
60 | * simplification and optimizations
61 | * partial evaluation and applications
62 | * dead code and data elim - trash, etc..
63 |
64 | * extraction of data
65 | * texts, images/video, sound/music
66 | * binaries
67 | * cacheable results, please.
68 |
69 | ## Lower Priority (for now)
70 |
71 | * faster basic bytecode!
72 | * typechecked, preallocated variants.
73 | * explicit memory reservations.
74 | * ability to run a block unchecked.
75 | * explicit type tests. Basic Block Value?
76 |
77 | * programmable completion for linkAO
78 | * maybe add default linkAO file via env.
79 | * really want to write `linkAO -w foo[TAB]`.
80 | * but it's low priority
81 |
82 | * claw to AO conversion (and vice versa)
83 | * simplify command line testing, please.
84 |
85 | * dictonary applications
86 | * model of edit and debug sessions
87 | * multi-media interactive fictions
88 | * multi-media multi user dungeons
89 | * quests
90 |
91 | * persistence and reflection
92 | * dictionaries as runtime values
93 | * Wikilon state as runtime values
94 | * eliminate dependency on VCache
95 |
96 | * extraction of executables or libraries
97 | * extract to Rust, C, JavaScript
98 | * perhaps via reflection of dictionaries
99 |
100 | * background computations
101 | * for types, errors, cycles, undefined words
102 | * continuous zero-button testing, live tests
103 | * quickly find type errors and failed tests
104 | * background compuations and eventual consistency
105 | * maintaining lists of words with properties
106 | * caching, recomputation, wide reuse of cache
107 | * preferably: over history and between dicts
108 | * leverage structure sharing and stowage
109 | * index to find words with a suitable type
110 | * index for fast fuzzy find, tab completion
111 | * index for full text search
112 |
113 | * debuggable runtimes
114 | * replays in debug view: animations, etc.
115 | * automatic debugging and error isolation
116 |
117 | * heavier focus on Claw views and dictionary apps
118 | * visual claw, stateful widgets in source
119 | * named variables and closures (?)
120 | * I haven't found any variation of this I like
121 | * maybe to fix a local `x y z` variable environment
122 |
123 | * Tunable claw (not critical)
124 | * bytecode views defined within dictionaries?
125 |
126 | * Neat features
127 | * render environments, animate evaluations
128 | * color-coded namespaces
129 | * bolder or larger words for expensive words
130 |
131 | * less ad-hoc web application views
132 | * application logic into AO dictionaries
133 | * subscriptions model: long polling, web-sockets
134 | * shared, updateable views as the default
135 | * favor semantic spans/divs, forms, and CSS
136 | * try for generic designs (MFlow, etc.)
137 | * use ETags correctly (e.g. If-None-Match)
138 |
139 | * model metadata per dictionary
140 | * edit sessions
141 | * bug trackers
142 | * stats and logs
143 | * geneology: fork, merge, branch
144 | * users and activities, collision prevention
145 | * recent changes
146 |
147 | * generic web resources
148 | * support for image resources (PNG, SVG, favicon)
149 | * applications for edit sessions, bug trackers
150 | * support to upload/download binaries and texts
151 | * model REPLs, forums, wikis, spreadsheets, etc.
152 | * box and wires or VRML systems above visual claw
153 | * support for video-audio resources (GIF, HTML5 video, etc.)
154 | * turtle graphics, iPython notebooks, xiki
155 |
156 | * extraction of applications
157 | * Optimized ABC
158 | * JavaScript + DOM
159 | * Unikernels or Docker apps
160 |
161 | * user model
162 | * names and logins
163 | * themes, configuration
164 | * quotas, etc.
165 | * presence (tracking activity via dictionary)
166 | * support for 'secured' dictionary apps (games, etc.)
167 |
168 | Note that I've decided to model almost everything as dictionary apps, including stuff like edit sessions, bug trackers, and users. The idea, or goal, is to keep all of this information uniformly available for import, export, history, views of the dictionary, etc..
169 |
170 | The main challenge with this concept is security. Security *within* an AO dictionary isn't really a feasible thing. What we can probably do, however, is control interactions (merges, dataflows, etc.) between dictionaries, and perhaps also constrain merging of value sealers. An external agent would make decisions given a set of dictionaries.
171 |
--------------------------------------------------------------------------------
/hs/Deprecated.md:
--------------------------------------------------------------------------------
1 | Development of Wikilon has been started and aborted for Haskell several times now, largely due to the performance concerns surrounding dynamic code. Currently I'm migrating to F# on the CLR
2 |
3 |
4 |
--------------------------------------------------------------------------------
/hs/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 |
--------------------------------------------------------------------------------
/hs/app/Main.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE ViewPatterns, PatternGuards, OverloadedStrings #-}
2 |
3 | module Main (main) where
4 |
5 | import Control.Monad
6 | import Control.Monad.Loops (anyM)
7 | import Control.Exception
8 | import qualified Data.ByteString as BS
9 | import qualified Data.ByteString.UTF8 as U8
10 | import qualified System.IO as Sys
11 | import qualified System.Exit as Sys
12 | import qualified System.Environment as Env
13 | import qualified Data.List as L
14 | import qualified System.EasyFile as FS
15 | import qualified System.Entropy as Sys
16 | import qualified Network.Wai.Handler.Warp as WS
17 | import qualified Network.Wai.Handler.WarpTLS as WS
18 | import qualified Network.Wai as Wai
19 | import Awelon.Hash (hash)
20 | import qualified Wikilon.DB as DB
21 | import qualified Wikilon as Wiki
22 |
23 | helpMsg :: String
24 | helpMsg =
25 | "Expected Usage:\n\
26 | \\n\
27 | \ wikilon [-pPORT] [-dbMB] [-admin] \n\
28 | \ -pPORT listen for HTTP requests on given port (default 3000) \n\
29 | \ -dbMB configure maximum database file size (default 4T) \n\
30 | \ -admin print admin password (valid until process restart)\n\
31 | \\n\
32 | \ Environment variables:\n\
33 | \ WIKILON_HOME: directory for persistent data.\n\
34 | \ defaults to OS dependent user app directory.\n\
35 | \\n\
36 | \ For TLS and HTTPS:\n\
37 | \ add 'wiki.key' and 'wiki.crt' files to WIKILON_HOME\n\
38 | \ if TLS is enabled, insecure connections are denied\n\
39 | \\n\
40 | \The wikilon web server is primarily accessed and configured through a\n\
41 | \normal browser. Documentation online. Initial configuration requires\n\
42 | \an -admin password, but persistent administrators may be configured.\n\
43 | \"
44 |
45 | -- should I allow users to *provide* the admin password? For now, I
46 | -- think that's probably unwise. Better to just treat admin as the
47 | -- bootstrap account, discourage unnecessary use in scripts etc.
48 |
49 | -- TLS files
50 | crt, key :: FS.FilePath
51 | crt = "wiki.crt"
52 | key = "wiki.key"
53 |
54 | data Args = Args
55 | { a_port :: Int
56 | , a_dbsz :: Int
57 | , a_admin :: Bool
58 | , a_help :: Bool
59 | , a_bad :: [String]
60 | }
61 | defaultArgs :: Args
62 | defaultArgs = Args
63 | { a_port = 3000
64 | , a_dbsz = 4 * 1000 * 1000
65 | , a_admin = False
66 | , a_help = False
67 | , a_bad = []
68 | }
69 |
70 | readSizeMul :: String -> Maybe (Int -> Int)
71 | readSizeMul "" = Just id
72 | readSizeMul "M" = Just id
73 | readSizeMul "G" = Just $ (* (1000))
74 | readSizeMul "T" = Just $ (* (1000 * 1000))
75 | readSizeMul _ = Nothing
76 |
77 | readSize :: String -> Maybe Int
78 | readSize s = case reads s of
79 | [(a,m)] | (a > 0) ->
80 | readSizeMul m >>= \ f ->
81 | return (f a)
82 | _ -> Nothing
83 |
84 | readPort :: String -> Maybe Int
85 | readPort s = case reads s of
86 | [(a,"")] | (a > 0) -> Just a
87 | _ -> Nothing
88 |
89 | procArgs :: [String] -> Args
90 | procArgs = L.foldr (flip pa) defaultArgs where
91 | pa a ('-': 'p' : (readPort -> Just p)) = a { a_port = p }
92 | pa a ('-': 'd' : 'b': (readSize -> Just mb)) = a { a_dbsz = mb }
93 | pa a "-admin" = a { a_admin = True }
94 | pa a "-?" = a { a_help = True }
95 | pa a s = let bad' = s : a_bad a in a { a_bad = bad' }
96 |
97 | useWikilonHome :: IO ()
98 | useWikilonHome = do
99 | e <- Env.getEnvironment
100 | home <- case L.lookup "WIKILON_HOME" e of
101 | Just h -> return h
102 | Nothing -> FS.getAppUserDataDirectory "wikilon"
103 | FS.createDirectoryIfMissing True home
104 | FS.setCurrentDirectory home
105 |
106 | -- create fresh admin password, as needed
107 | mkAdminPass :: IO BS.ByteString
108 | mkAdminPass = BS.take 24 . hash <$> Sys.getEntropy 48
109 |
110 | putErrLn :: String -> IO ()
111 | putErrLn = Sys.hPutStrLn Sys.stderr
112 |
113 | -- run Warp using TLS if avaialable, otherwise insecure
114 | runWarp :: Int -> Wai.Application -> IO ()
115 | runWarp port app = do
116 | bUseTLS <- anyM FS.doesFileExist [crt, key]
117 | let tlsOpt = WS.tlsSettings crt key
118 | let warpOpt = WS.setPort port
119 | $ WS.setTimeout 120 -- allow longer heartbeats
120 | $ WS.defaultSettings
121 | if bUseTLS then WS.runTLS tlsOpt warpOpt app
122 | else WS.runSettings warpOpt app
123 |
124 | haltOnError :: SomeException -> IO a
125 | haltOnError e = do
126 | putErrLn "Wikilon halted with exception:"
127 | putErrLn (show e)
128 | putErrLn "Aborting."
129 | Sys.exitFailure
130 |
131 | -- | This essentially just passes the buck to Wikilon.
132 | main :: IO ()
133 | main = body `catch` haltOnError where
134 | getArgs = procArgs <$> Env.getArgs
135 | body =
136 | getArgs >>= \ args ->
137 | if a_help args then Sys.putStrLn helpMsg else
138 | let badArgs = not (L.null (a_bad args)) in
139 | if badArgs then failWithBadArgs args else do
140 | admin <- if not (a_admin args) then return Nothing else do
141 | pass <- mkAdminPass
142 | Sys.putStrLn ("admin:" ++ U8.toString pass)
143 | return (Just pass)
144 | runServer args admin
145 | runServer args admin = do
146 | useWikilonHome
147 | let wikiOpts = Wiki.setAdmin admin
148 | $ Wiki.defaultOpts
149 | db <- DB.open "db" (a_dbsz args)
150 | app <- Wiki.mkWaiApp wikiOpts db
151 | runWarp (a_port args) app
152 | failWithBadArgs args = do
153 | putErrLn $ "Unrecognized arguments (try `-?` for help): "
154 | putErrLn $ show (a_bad args)
155 | Sys.exitFailure
156 |
157 |
158 |
159 |
160 |
--------------------------------------------------------------------------------
/hs/src/Awelon.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE NoImplicitPrelude #-}
2 | -- | Awelon Language
3 | --
4 | -- Awelon language has a very simple syntax and semantics. The syntax
5 | -- is essentially a stream of words (with special forms for texts and
6 | -- annotations) and first-class subprograms. The semantics is based on
7 | -- rewriting with only four primitive combinators.
8 | --
9 | -- The syntax is intended for use with projectional editing. Purely
10 | -- textual projections might introduce decimal numbers, lists, DSLs,
11 | -- local variables and lambdas and let expressions, and so on. But
12 | -- we can feasibly build graphical programming environments above the
13 | -- simple syntax.
14 | --
15 | -- The semantics is based on confluent rewriting, i.e. it's purely
16 | -- functional and deterministic but may also work with incomplete
17 | -- programs, and any subprogram may be evaluated. This works with
18 | -- the syntactic projections, too, since the output may be subject
19 | -- to the same view models as the input.
20 | --
21 | -- Awelon has only four primitive rewrite rules, one primitive data
22 | -- element (the block of code):
23 | --
24 | -- [B][A]a == A[B] (apply)
25 | -- [B][A]b == [[B]A] (bind)
26 | -- [A]c == [A][A] (copy)
27 | -- [A]d == (drop)
28 | --
29 | -- While these semantics are simple, they're far from bare metal and
30 | -- some extra work is required to achieve performance. Arithmetic and
31 | -- number models must be recognized by the runtime and accelerated to
32 | -- perform efficient number crunching. Linear algebra too, ideally.
33 | --
34 | -- So the job of a runtime or compiler becomes software acceleration
35 | -- for a variety of widely useful models, in addition to more basic
36 | -- optimizations.
37 | --
38 | -- Awelon is complicated further by annotations, parenthetical words
39 | -- that extend the runtime but do not affect formal program semantics.
40 | -- These extensions may affect performance (e.g. adding parallelism,
41 | -- memoizing computations, optimizing representations) or cause an
42 | -- invalid program to fail faist (e.g. simple type assertions).
43 | --
44 | -- Wikilon aims to support Awelon with reasonable performance.
45 | --
46 | module Awelon () where
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/hs/src/Awelon/CX.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE TypeFamilies, FlexibleContexts, ExistentialQuantification #-}
2 |
3 | -- | Abstract Runtime Context for Awelon
4 | --
5 | -- Although Awelon is purely functional, it does leverage annotations
6 | -- and accelerators to support performance or debugging. Rather than
7 | -- a concrete implementation in IO, Awelon.CX represents the required
8 | -- evaluation contexts with an abstract typeclass-based API admitting
9 | -- some purely functional implementations.
10 | --
11 | module Awelon.CX
12 | ( Par(..), Async(..), race'
13 | , Lazy(..), Trace(..)
14 | , Stowage(..), load
15 | , Cache(..)
16 | , CX(..)
17 | , ByteString
18 | , module Awelon.Hash
19 | ) where
20 |
21 | import Control.Applicative
22 | import Data.ByteString.Lazy (ByteString)
23 | import Awelon.Hash
24 |
25 | -- | Fork-join parallelism via composable futures.
26 | class (Monad (F m), Monad m) => Par m where
27 | data F m :: * -> * -- ^ a future value
28 | fork :: m a -> m (F m a) -- ^ parallel future
29 | join :: F m a -> m a -- ^ wait for result
30 |
31 | -- Might need some means to extend monadic operations.
32 | -- Something like the following:
33 | --
34 | -- andThen :: F m a -> (a -> m b) -> m (F m b) -- ^ extend parallel task
35 | -- might be able to provide a default impl with fork and join
36 | --
37 | -- Or perhaps:
38 | --
39 | -- liftF :: m a -> F m a
40 | --
41 | -- Not really sure what's best here, nor how much expressiveness
42 | -- we need.
43 |
44 | -- | Concurrency via arrival-order indeterminism.
45 | --
46 | -- The `race` operation enables clients to process results based on
47 | -- the order in which they become available. This introduces a form
48 | -- of non-determinism, which is generally impure. But indeterminism
49 | -- can be leveraged to accelerate confluent computations. So this is
50 | -- a feature to use, albeit with some care.
51 | class (Par m) => Async m where
52 | race :: F m a -> F m b -> F m (Either a b)
53 |
54 | -- | Data-preserving race, for linearity.
55 | race' :: Async m => F m a -> F m b -> F m (Either (a, F m b) (F m a, b))
56 | race' fa fb = k <$> race fa fb where
57 | k (Left a) = Left (a,fb)
58 | k (Right b) = Right (fa,b)
59 |
60 | -- | Note on Acceleration of Kahn Process Networks
61 | --
62 | -- Use of Async peek and wait can support KPN accelerators via the
63 | -- arrival-order merge for outputs from child processes. This can
64 | -- reduce processing latencies, and improve utilization of our CPUs.
65 | -- Further, except where a message's destination depends on content,
66 | -- it is feasible to route future message values, such that routing
67 | -- is performed ahead of processing. (With a little filtering, we
68 | -- may even route some messages to the wrong destination.)
69 | --
70 | -- It is feasible to perform better by explicitly routing with shared
71 | -- memory, for example by use of `ivar :: m (a -> m (), F m a)` for
72 | -- single-assignment variables, to model channels with push behavior.
73 | -- However, ivars align poorly to Awelon's semantics, serializability,
74 | -- memoization, persistence, local reasoning. And they're also bad for
75 | -- physical distribution, requiring shared memory and distributed GC.
76 | --
77 | -- Fortunately, explicit routing is unlikely to be our bottleneck. So
78 | -- the clean but inexpressive `Async` class is favored at this time.
79 |
80 | -- | Lazy (Call by Need) Evaluation
81 | --
82 | -- A lazy computation will be performed only once, and only when some
83 | -- demand exists, with the result being memoized. This could be useful
84 | -- for some evaluation modes, JIT, lazy linking and evaluation of the
85 | -- dependencies, CSE optimization, etc.. Some accelerators might also
86 | -- use laziness.
87 | class Lazy m where
88 | lazy :: m a -> m a -- ^ wrap computation for memoization
89 |
90 | -- | Trace Message Debugging
91 | --
92 | -- Debug messages are a simple, convenient technique for debugging.
93 | -- Awelon may support debug messages via (trace) annotations, and
94 | -- thus generate an additional sequence of messages.
95 | class Trace m where
96 | trace :: ByteString -> m ()
97 |
98 | -- | Data Stowage
99 | --
100 | -- Awelon heavily utilizes a notion of 'secure hash resources', which
101 | -- are essentially just binary resources that are globally identified
102 | -- by secure hash, specifically Awelon.Hash.
103 | --
104 | -- This API supports asynchronous loads, useful if stowage is backed
105 | -- by a networked service. However, failure is not handled directly.
106 | -- If any resource cannot be loaded, that should be represented as an
107 | -- exception at the the monad layer.
108 | --
109 | -- Resources may generally be garbage collected. It's up to the monad
110 | -- to provide sensible GC that mitigates common sources of failure,
111 | -- e.g. to avoid GC of newly stowed resources except at transaction
112 | -- or checkpoint boundaries (provided through a persistence API).
113 | --
114 | class (Monad m) => Stowage m where
115 | stow :: ByteString -> m Hash
116 | load_async :: Hash -> m (m ByteString)
117 |
118 | load :: (Stowage m) => Hash -> m ByteString
119 | load h = load_async h >>= id
120 |
121 | -- question: how do
122 |
123 | -- | Cached Computations
124 | --
125 | -- Caching plays an essential role in Awelon systems, the basis for
126 | -- incremental computing. Caching may be implicit for toplevel words,
127 | -- or explicit for compositional views of persistent data structures.
128 | --
129 | -- The latter requires the compositional property:
130 | --
131 | -- ∃F.∀X,+,Y. V(X + Y) == F(V(X), '+', V(Y))
132 | --
133 | -- That is, the view on a composite is a function of the views on the
134 | -- individual components. With this, we can easily cache V(X) and V(Y)
135 | -- so when we update Y and compute V(X+Y') we can reuse V(X). If this
136 | -- caching is performed deeply, computing V(Y') might then reuse some
137 | -- structure internally.
138 | --
139 | -- I favor a monotonic cache, which avoids the challenges surrounding
140 | -- cache invalidation. The key must uniquely identify the computation,
141 | -- including the dependencies (e.g. by including a secure hash). The
142 | -- runtime may heuristically delete cache entries to recover space.
143 | --
144 | -- To maximize reuse, cache keys should also be precise, such that
145 | -- most irrelevant changes to a codebase don't affect the key. But
146 | -- there is a heuristic tradeoff between precision and performance
147 | -- of the key computation.
148 | --
149 | class Cache m where
150 | cacheGet :: ByteString -> m (Maybe ByteString) -- ^ cacheGet key
151 | cachePut :: ByteString -> ByteString -> m () -- ^ cachePut key val
152 |
153 | -- | An Awelon context simply aggregates the aformentioned features.
154 | class ( Monad m, Monad (F m)
155 | , Par m, Async m, Lazy m
156 | , Stowage m, Cache m, Trace m
157 | ) => CX m
158 |
159 | -- Am I missing anything?
160 | --
161 | -- It isn't clear whether I should handle quotas here. Nor how.
162 | --
163 | -- The Haskell runtime cannot easily track unique references, so I
164 | -- won't support in-place list manipulations. That said, I could
165 | -- leverage Data.Sequence or persistent-vector in that role.
166 |
167 |
168 |
--------------------------------------------------------------------------------
/hs/src/Awelon/Dict.hs:
--------------------------------------------------------------------------------
1 |
2 | -- | Awelon Dictionary Format
3 | --
4 | -- Awelong language specifies a simple, DVCS-inspired dictionary
5 | -- format that is designed for scalability and structure sharing
6 | -- between versions and developers. It looks roughly like this:
7 | --
8 | -- secureHashOfPatch1
9 | -- secureHashOfPatch2
10 | -- @word1 def1
11 | -- @word2 def2
12 | -- @@ns secureHashOfSubDict
13 | -- @word3 def3
14 | --
15 | -- Awelon uses secure hashes to identify binary resources instead of
16 | -- filenames or URLs. The resources identified by the initial hashes
17 | -- are logically inlined. The hash after `@ns` supports hierarchical
18 | -- containmenent of one dictionary within another. At the top level,
19 | -- a dictionary may be precisely versioned by a root secure hash. The
20 | -- last definition of any symbol wins.
21 | --
22 | -- Thus we have a deeply immutable, persistent data structure that
23 | -- can be updated using flexible update models. But this flexibility
24 | -- does create some challenges for indexing. We cannot assume sorted
25 | -- words, for example.
26 | --
27 | -- And as for scale, Awelon's application model unifies the database
28 | -- and the codebase. Thus, we must think about dictionaries the size
29 | -- of popular forums or Wikipedia, at least in the long term. Scale
30 | -- creates its own challenges, since we cannot expect to keep a full
31 | -- codebase in memory, nor even an index on the codebase.
32 | --
33 | -- Ideally, I will want several indices:
34 | --
35 | -- * quickly find definition for a word
36 | -- * reverse lookup, find uses of a word
37 | -- * find words matching a suffix or prefix
38 | -- * fuzzy find, spellcheck: words by similarity
39 | -- * full text search
40 | -- * cached evaluated definitions, types, optimized link defs, etc.
41 | -- * word to a deep-version behavior hash? (a vanderhoof versum?)
42 | -- * reverse type lookup, hoogle style for code completion
43 | --
44 | -- Also, I will also want to keep histories for dictionaries, and to
45 | -- merge updates on similar dictionaries, and support transactional
46 | -- updates, and so on. Everything will need to be incremental, so I
47 | -- can quickly account for changes or differences in code. Memoized
48 | -- computation of partial, composable indices is a promising approach,
49 | -- though it may constrain which indices can efficiently be maintained.
50 | --
51 | -- There is much to do here. It's very daunting. But I'll just add one
52 | -- feature at a time, as needed.
53 | --
54 | module Awelon.Dict
55 | (
56 | ) where
57 |
58 |
59 |
--------------------------------------------------------------------------------
/hs/src/Awelon/Dict/Format.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE BangPatterns #-}
2 | -- | This module is concerned with parsing or serializing the Awelon
3 | -- dictionary representation, which essentially consists of:
4 | --
5 | -- secureHashOfPatch1
6 | -- secureHashOfPatch2
7 | -- @word1 def1
8 | -- @word2 def2
9 | -- @@ns secureHashOfDict
10 | -- ...
11 | --
12 | -- Secure hashes in Awelon identify external resources (see Stowage
13 | -- under Awelon.CX). A dictionary starts with a sequence of secure
14 | -- hashes (one per line) that are logically included, followed by
15 | -- definitons. Each definition may specify a namespace or a word,
16 | -- indicated by prefix - `@@ns` or `@word`.
17 | --
18 | -- A namespace is defined with a secure hash (or may be empty), while
19 | -- a word should be defined with valid Awelon code. Awelon code also
20 | -- has a very simple syntactic structure (see Awelon.Syntax).
21 | --
22 | -- Order of definitions for distinct symbols is irrelevant. But if a
23 | -- word or namespace is defined more than once, the 'last' definition
24 | -- wins. This provides a simple, compositional patch and update model.
25 | -- A dictionary might be treated as an append-only update log. A full
26 | -- dictionary may be understood as a patch on the empty dictionary.
27 | --
28 | -- By convention, a word may be 'undefined' by defining it to itself,
29 | -- e.g. `@foo foo`. Cyclic definitions aren't valid Awelon, but we'll
30 | -- not call out the trivial cycle any more than we normally would an
31 | -- undefined word. A namespace may be undefined via the empty string.
32 | --
33 | -- This module only detects trivial parse errors. A lot more work must
34 | -- be done to search for cyclic definitions, type or totality errors,
35 | -- and so on.
36 | --
37 | module Awelon.Dict.Format
38 | ( Dict(..), emptyDict
39 | , decode, DictErr
40 | , encodeBB, encode
41 | , splitDict, splitLL, findLLSep
42 | ) where
43 |
44 | import Prelude hiding (Word)
45 | import qualified Data.ByteString as BS
46 | import qualified Data.ByteString.Builder as BB
47 | import qualified Data.ByteString.Lazy as LBS
48 | import qualified Data.ByteString.Lazy.UTF8 as LU8
49 | import qualified Data.List as L
50 | import Data.Int (Int64)
51 | import Data.Word (Word8)
52 | import qualified Data.Map as M
53 | import Data.Monoid
54 | import Awelon.Syntax (NS(..), Word(..), Prog(..), validWordByte)
55 | import qualified Awelon.Syntax as P
56 | import Awelon.Hash
57 |
58 | -- | A parsed dictionary binary or patch file.
59 | --
60 | -- This representation loses irrelevant information such as whitespace
61 | -- formatting, order of definitions, and definitions later overridden.
62 | --
63 | -- For the namespaces, the `Nothing` value indicates an empty string was
64 | -- specified, so we only use a Hash value where we actually have a valid
65 | -- hash string.
66 | data Dict = Dict { dictInc :: [Hash]
67 | , dictNS :: M.Map NS (Maybe Hash)
68 | , dictDefs :: M.Map Word Prog
69 | } deriving (Eq, Ord)
70 |
71 | emptyDict :: Dict
72 | emptyDict = Dict mempty mempty mempty
73 |
74 | isWS :: Word8 -> Bool
75 | isWS c = (10 == c) || (32 == c)
76 |
77 | skipWS :: LBS.ByteString -> LBS.ByteString
78 | skipWS = LBS.dropWhile isWS
79 |
80 | readHash :: LBS.ByteString -> Maybe (Hash, LBS.ByteString)
81 | readHash s =
82 | let (h, s') = LBS.span validHashByte s in
83 | if (validHashLen /= LBS.length h) then Nothing else
84 | Just (LBS.toStrict h, s')
85 |
86 | readHashes :: LBS.ByteString -> ([Hash], LBS.ByteString)
87 | readHashes = rd [] . skipWS where
88 | rd r s = case readHash s of
89 | Just (h, s') -> rd (h:r) (skipWS s')
90 | Nothing -> (L.reverse r, s)
91 |
92 | -- | Dictionary decode errors.
93 | --
94 | -- At the moment, errors are simply returned in terms of unprocessed
95 | -- entries, or any unrecognized suffix of the header. I might need to
96 | -- improve error reporting in the future, e.g. to include some line
97 | -- numbers. But this should be sufficient for simple error reports.
98 | type DictErr = (LBS.ByteString, [LBS.ByteString])
99 |
100 | -- | Decode (parse) a dictionary binary.
101 | --
102 | -- This is a lenient decoder. It parses as much as it can while
103 | -- returning both the dictionary and parse errors.
104 | decode :: LBS.ByteString -> (DictErr, Dict)
105 | decode binary = ((eInc, eEnt), Dict inc ns defs) where
106 | (hd, bdy) = splitDict binary
107 | (inc, eInc) = readHashes hd
108 | ents = fmap decodeEnt bdy
109 | eEnt = [e | Right (Right e) <- ents]
110 | ns = M.fromList [x | Right (Left x) <- ents]
111 | defs = M.fromList [x | Left x <- ents]
112 |
113 | decodeEnt :: LBS.ByteString -> Either (Word, Prog) (Either (NS, Maybe Hash) LBS.ByteString)
114 | decodeEnt s =
115 | let badEnt = Right (Right s) in
116 | case LBS.uncons s of
117 | Just (64, s') ->
118 | let (w,d) = LBS.span validWordByte s' in
119 | if (LBS.null w) then badEnt else
120 | let ns = NS (Word (LBS.toStrict w)) in
121 | case decodeNSDef d of
122 | Just h -> Right (Left (ns, h))
123 | _ -> badEnt
124 | _ ->
125 | let (w,d) = LBS.span validWordByte s in
126 | if (LBS.null w) then badEnt else
127 | let w' = Word (LBS.toStrict w) in
128 | case P.decode d of
129 | Right prog -> Left (w', prog)
130 | _ -> badEnt
131 |
132 | -- | A namespace definition is either a single hash or empty string.
133 | -- Here 'Nothing' means parse failure, while 'Just Nothing' means we
134 | -- received an empty string. Whitespace is trimmed.
135 | decodeNSDef :: LBS.ByteString -> Maybe (Maybe Hash)
136 | decodeNSDef s =
137 | let s' = skipWS s in
138 | if LBS.null s' then Just Nothing else
139 | case readHash s' of
140 | Just (h, rem) | LBS.null (skipWS rem) -> Just (Just h)
141 | _ -> Nothing
142 |
143 | -- | Our encoder will write out a dictionary binary.
144 | encode :: Dict -> LBS.ByteString
145 | encode = BB.toLazyByteString . encodeBB
146 |
147 | -- | Encode with a flexible output strategy. This uses a very
148 | -- simple and consistent space formatting: one SP after each
149 | -- word or namespace, one LF at the end.
150 | encodeBB :: Dict -> BB.Builder
151 | encodeBB (Dict inc ns defs) = bbInc <> bbNS <> bbDefs where
152 | bbInc = mconcat $ fmap bbIncElem inc
153 | bbNS = mconcat $ fmap bbNSElem (M.toList ns)
154 | bbDefs = mconcat $ fmap bbDefElem (M.toList defs)
155 | bbIncElem h = BB.byteString h <> BB.word8 10
156 | bbNSElem (ns, def) =
157 | BB.word8 64 <> BB.word8 64 <> BB.byteString (wordBytes (nsWord ns)) <>
158 | BB.word8 32 <> maybe mempty BB.byteString def <> BB.word8 10
159 | bbDefElem (w, def) =
160 | BB.word8 64 <> BB.byteString (wordBytes w) <> BB.word8 32 <>
161 | P.encodeBB def <> BB.word8 10
162 |
163 | instance Show Dict where
164 | showsPrec _ = showString . LU8.toString . encode
165 |
166 | -- | An Awelon dictionary separates logical lines with the `LF @`
167 | -- sequence. This function returns the index to the next `LF @`
168 | -- line separator (returning the index for the LF).
169 | findLLSep :: LBS.ByteString -> Maybe Int64
170 | findLLSep = f 0 where
171 | f !k s = case LBS.elemIndex 10 s of
172 | Nothing -> Nothing
173 | Just ix ->
174 | let skip = (1 + ix) in
175 | let s' = LBS.drop skip s in
176 | case LBS.uncons s' of
177 | Just (64, _) -> Just (k + ix)
178 | _ -> f (k + skip) s'
179 |
180 | -- | split the logical lines
181 | splitLL :: LBS.ByteString -> [LBS.ByteString]
182 | splitLL s = case findLLSep s of
183 | Nothing -> [s]
184 | Just ix -> (LBS.take ix s) : splitLL (LBS.drop (ix + 2) s)
185 |
186 | -- | break dictionary into header and list of definition entries.
187 | splitDict :: LBS.ByteString -> (LBS.ByteString, [LBS.ByteString])
188 | splitDict s = case LBS.uncons s of
189 | Just (64, s') -> (mempty, splitLL s')
190 | _ -> case findLLSep s of
191 | Nothing -> (s, mempty)
192 | Just ix -> (LBS.take ix s, splitLL (LBS.drop (ix + 2) s))
193 |
194 | -- note: we might wish to annotate line numbers in case of a parse error.
195 |
196 | -- parse hashes
197 | -- parse entries
198 | -- map entries
199 |
200 |
201 |
--------------------------------------------------------------------------------
/hs/src/Awelon/Dict/Rename.hs:
--------------------------------------------------------------------------------
1 |
2 | -- | Renaming words is a common refactoring that is relatively easy
3 | -- to automate, assuming we can perform lookups and reverse lookups
4 | -- in context of the dictionary.
5 | data Awelon.Dict.Rename
6 | -- ( renameWord, renameWords
7 | -- , renameDict, renameDicts
8 | -- ) where
9 |
10 | -- TODO:
11 | --
12 | -- I need an appropriate reverse lookup index, to find words to
13 | -- reference a given word or namespace.
14 |
15 |
--------------------------------------------------------------------------------
/hs/src/Awelon/Hash.hs:
--------------------------------------------------------------------------------
1 | module Awelon.Hash
2 | ( Hash
3 | , hash, hashL
4 | , hashAlphabet
5 | , validHashByte, validHashLen, validHash
6 | ) where
7 |
8 | import qualified Data.ByteString.Lazy as LBS
9 | import qualified Data.ByteString as BS
10 | import qualified Data.ByteString.Internal as BS
11 | import qualified Data.ByteString.UTF8 as U8
12 | import qualified Crypto.Hash.BLAKE2.BLAKE2b as B2b
13 | import qualified Data.List as L
14 | import qualified Data.Array.Unboxed as A
15 | import Data.Word (Word8, Word32)
16 | import Data.Char (chr, ord)
17 | import Data.Bits
18 | import Data.String
19 | import Foreign.ForeignPtr (withForeignPtr)
20 | import Foreign.Ptr
21 | import Foreign.Storable
22 | import System.IO.Unsafe (unsafeDupablePerformIO)
23 | import Control.Exception (assert)
24 |
25 | -- | A Hash is just a bytestring, for now. But it should be valid.
26 | --
27 | -- Note: In context of lookups for secure hash resources, we may
28 | -- need to ensure constant-time comparisons.
29 | type Hash = BS.ByteString
30 |
31 | -- | Awelon language makes widespread use of secure hashes to name
32 | -- various resources. In particular, we use a 280-bit Blake2b hash
33 | -- and we encode this using a specilized base32 alphabet.
34 | --
35 | -- > :set -XOverloadedStrings
36 | -- > hash "test"
37 | -- "HSjFNGRnqHpFFbPhlThmqCbqkmDSHCBlJNnmDPnDtnCpKHqtNgqhRMJG"
38 | --
39 | hash :: BS.ByteString -> Hash
40 | hash = encodeHash . B2b.hash 35 mempty
41 |
42 | hashL :: LBS.ByteString -> Hash
43 | hashL = encodeHash . b2b_digest 35
44 |
45 | b2b_digest :: Int -> LBS.ByteString -> BS.ByteString
46 | b2b_digest z = B2b.finalize z
47 | . L.foldl (flip B2b.update) (B2b.initialize z)
48 | . LBS.toChunks
49 |
50 | -- | Awelon's unusual Base32 alphabet for hashes.
51 | --
52 | -- This alphabet does not conflict with human meaningful words or
53 | -- numbers, nor structural conventions that use punctuation. And it
54 | -- is unlikely to appear by accident in context of conservative GC.
55 | hashAlphabet :: String
56 | hashAlphabet = assert (32 == L.length alphabet) $ alphabet where
57 | alphabet = "bcdfghjklmnpqrstBCDFGHJKLMNPQRST"
58 |
59 | -- | The length of a valid hash string.
60 | validHashLen :: (Num a) => a
61 | validHashLen = 56
62 |
63 | hashBytesArray :: A.UArray Word8 Bool
64 | hashBytesArray = A.listArray (0,255) $ fmap accept [0..255] where
65 | accept = flip L.elem hashAlphabet . chr
66 |
67 | -- | Really tests whether a byte is a valid base64url character.
68 | validHashByte :: Word8 -> Bool
69 | validHashByte = (A.!) hashBytesArray
70 |
71 | validHash :: BS.ByteString -> Bool
72 | validHash s = (validHashLen == BS.length s)
73 | && (BS.all validHashByte s)
74 |
75 | -- encoding
76 | type Word5 = Word8
77 |
78 | eW5A :: A.UArray Word5 Word8
79 | eW5A = A.listArray (0,31) $ fmap (fromIntegral . ord) hashAlphabet
80 |
81 | eW5 :: Word5 -> Word8
82 | eW5 = (A.!) eW5A
83 |
84 | encodeHash :: BS.ByteString -> BS.ByteString
85 | encodeHash (BS.PS fptr off sz) =
86 | unsafeDupablePerformIO $
87 | BS.create validHashLen $ \ dst ->
88 | withForeignPtr fptr $ \ src ->
89 | packHash (src `plusPtr` off) dst
90 |
91 | -- pack hash assuming sufficient data in source and space in destination.
92 | -- Note that this is highly specialized for the validHashlen. It does not
93 | -- try for a generic hash encoding.
94 | --
95 | -- We want to output 8 bytes for every 5 bytes of input. Following normal
96 | -- conventions for base32 (RFC4648) the high order bit of the first byte
97 | -- will be the first bit in the stream.
98 | --
99 | -- We never actually 'decode' this base32 string. It's more convenient and
100 | -- debuggable to stick with the ASCII encoding after the hash is formed.
101 | packHash :: Ptr Word8 -> Ptr Word8 -> IO ()
102 | packHash s d = p280 where
103 | p280 = do { p40 0; p40 1; p40 2; p40 3; p40 4; p40 5; p40 6 }
104 | p40 n = do
105 | let r ix = peekElemOff (s `plusPtr` (5 * n)) ix
106 | let w ix v = pokeElemOff (d `plusPtr` (8 * n)) ix (eW5 v)
107 | -- read five bytes
108 | i0 <- r 0
109 | i1 <- r 1
110 | i2 <- r 2
111 | i3 <- r 3
112 | i4 <- r 4
113 | -- write eight bytes
114 | w 0 $ ((i0 .&. 0xF8) `unsafeShiftR` 3)
115 | w 1 $ ((i0 .&. 0x07) `unsafeShiftL` 2) .|.
116 | ((i1 .&. 0xC0) `unsafeShiftR` 6)
117 | w 2 $ ((i1 .&. 0x3E) `unsafeShiftR` 1)
118 | w 3 $ ((i1 .&. 0x01) `unsafeShiftL` 4) .|.
119 | ((i2 .&. 0xF0) `unsafeShiftR` 4)
120 | w 4 $ ((i2 .&. 0x0F) `unsafeShiftL` 1) .|.
121 | ((i3 .&. 0x80) `unsafeShiftR` 7)
122 | w 5 $ ((i3 .&. 0x7C) `unsafeShiftR` 2)
123 | w 6 $ ((i3 .&. 0x03) `unsafeShiftL` 3) .|.
124 | ((i4 .&. 0xE0) `unsafeShiftR` 5)
125 | w 7 $ ((i4 .&. 0x1F) )
126 |
127 |
--------------------------------------------------------------------------------
/hs/src/Wikilon.hs:
--------------------------------------------------------------------------------
1 |
2 | -- | Wikilon ultimately provides a WAI web service.
3 | module Wikilon
4 | ( mkWaiApp
5 | , WSOpts, defaultOpts
6 | , setAdmin, getAdmin
7 | ) where
8 |
9 | import Data.Monoid
10 | import Data.ByteString (ByteString)
11 | import Wikilon.DB (DB)
12 | import Network.Wai (Application)
13 | import qualified Network.Wai as Wai
14 |
15 | -- | Extra configuration for the Wikilon service.
16 | --
17 | -- Most configuration will be performed at runtime, by users or agents
18 | -- with administrative authorities. But a few features may need to be
19 | -- handled up front.
20 | data WSOpts = WSOpts
21 | { ws_admin :: Maybe ByteString
22 | }
23 |
24 | defaultOpts :: WSOpts
25 | defaultOpts = WSOpts
26 | { ws_admin = mempty
27 | }
28 |
29 | -- | Administration of Wikilon
30 | --
31 | -- The 'admin' account is reserved for administrative bootstrapping
32 | -- and troubleshooting. Enabled by setting a password at startup. This
33 | -- password is usable only for the process lifetime.
34 | setAdmin :: Maybe ByteString -> WSOpts -> WSOpts
35 | setAdmin v ws = ws { ws_admin = v }
36 |
37 | getAdmin :: WSOpts -> Maybe ByteString
38 | getAdmin = ws_admin
39 |
40 | -- other potential options:
41 | --
42 | -- support for web service composition (very Low priority)
43 | -- access to a higher level rep. of application
44 | -- prefix for keys in the DB
45 | -- extra prefix for URLs
46 | --
47 | -- The following might be better configured at runtime by
48 | -- administrative agents.
49 | --
50 | -- Integration of external resources:
51 | -- files, and resources in the filesystem
52 | -- distributed computation setup, authorities
53 | -- cooperative Wikilon nodes
54 | -- creation of VMs
55 | -- OpenCL clouds
56 | -- backups
57 | -- logging
58 | --
59 | -- Tab icons, default CSS, etc..
60 |
61 |
62 | -- | The Wikilon web service
63 | mkWaiApp :: WSOpts -> DB -> IO Application
64 | mkWaiApp opts db = undefined
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/hs/src/Wikilon/CBT.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE BangPatterns, TypeFamilies #-}
2 |
3 | -- | Crit-bit Tree (Variant)
4 | --
5 | -- This is an in-memory crit-bit tree, a variant with key-value pairs
6 | -- and that keeps the least key in the parent. This largely acts as a
7 | -- reference for development of stowage-based KVM, but it may find
8 | -- use elsewhere.
9 | --
10 | module Wikilon.CBT
11 | ( CBK(..)
12 | , CBT(..)
13 | , Node(..)
14 | , null, member, union
15 | , insert, delete, lookup
16 | , fromList, toList
17 | ) where
18 |
19 | import Prelude hiding (lookup, null)
20 | import qualified Data.ByteString as BS
21 | import qualified Data.ByteString.Unsafe as BS
22 | import qualified Data.ByteString.Lazy as LBS
23 | import qualified Data.ByteString.Lazy.Internal as LBS
24 | import qualified Data.List as L
25 | import Data.Maybe
26 | import Data.Word
27 | import Data.Bits
28 |
29 |
30 |
31 | -- | Requirements for a crit-bit key.
32 | --
33 | -- thoughts: it might be better to change this to a `getByte` and a
34 | -- `length`, or perhaps a `getWord` where a word has a finite number
35 | -- of bytes. As is, too much crit-bit logic is embedded in this class.
36 | class (Eq k) => CBK k where
37 | -- | Determine bit at specified offset within key.
38 | getBit :: Int -> k -> Bool
39 |
40 | -- | Seek offset of first difference between two keys
41 | -- that is no less than the initial offset.
42 | critBit :: Int -> k -> k -> Maybe Int
43 |
44 | instance CBK Word8 where
45 | getBit = getBitFB
46 | critBit = critBitFB
47 | {-# INLINE getBit #-}
48 | {-# INLINE critBit #-}
49 |
50 | instance CBK Word16 where
51 | getBit = getBitFB
52 | critBit = critBitFB
53 | {-# INLINE getBit #-}
54 | {-# INLINE critBit #-}
55 |
56 | instance CBK Word32 where
57 | getBit = getBitFB
58 | critBit = critBitFB
59 | {-# INLINE getBit #-}
60 | {-# INLINE critBit #-}
61 |
62 | instance CBK Word64 where
63 | getBit = getBitFB
64 | critBit = critBitFB
65 | {-# INLINE getBit #-}
66 | {-# INLINE critBit #-}
67 |
68 | getBitFB :: FiniteBits b => Int -> b -> Bool
69 | getBitFB n a = testBit a ((finiteBitSize a) - n)
70 | {-# INLINE getBitFB #-}
71 |
72 | critBitFB :: FiniteBits b => Int -> b -> b -> Maybe Int
73 | critBitFB n a b =
74 | let x = (a `xor` b) `shiftL` n in
75 | if (zeroBits == x) then Nothing else
76 | Just $! (n + (countLeadingZeros x))
77 | {-# INLINE critBitFB #-}
78 |
79 |
80 | instance CBK BS.ByteString where
81 | getBit n s =
82 | let (q,r) = n `divMod` 8 in
83 | let s' = BS.drop q s in
84 | if BS.null s' then False else
85 | getBit r (BS.unsafeHead s')
86 | {-# INLINE getBit #-}
87 |
88 | critBit = begin where
89 | -- special handling for first byte
90 | begin n a b =
91 | let (q,r) = n `divMod` 8 in
92 | let a' = BS.drop q a in
93 | let b' = BS.drop q b in
94 | let hd s = if BS.null s then 0 else BS.unsafeHead s in
95 | case critBit r (hd a') (hd b') of
96 | Just off -> Just $! (n + (off - r))
97 | Nothing -> go2 (q + 1) (BS.drop 1 a') (BS.drop 1 b')
98 |
99 | -- compare whole bytes
100 | go2 !q !a !b =
101 | if BS.null a then go1 q b else
102 | if BS.null b then go1 q a else
103 | let ca = BS.unsafeHead a in
104 | let cb = BS.unsafeHead b in
105 | if (ca == cb)
106 | then go2 (q+1) (BS.unsafeTail a) (BS.unsafeTail b)
107 | else Just $! (8 * q) + countLeadingZeros (ca `xor` cb)
108 |
109 | -- search for a non-zero byte
110 | go1 !q !s = case BS.uncons s of
111 | Just (c, s')
112 | | (0 == c) -> go1 (q + 1) s'
113 | | otherwise -> Just $! (8 * q) + countLeadingZeros c
114 | Nothing -> Nothing
115 |
116 | data CBT k v
117 | = Empty
118 | | Root k (Node k v)
119 | deriving (Eq, Ord)
120 |
121 | data Node k v
122 | = Leaf v
123 | | Inner {-# UNPACK #-} !Int (Node k v) k (Node k v)
124 | deriving (Eq, Ord)
125 |
126 | -- question: should I add size information to nodes?
127 | --
128 | -- I'm not sure whether O(1) sizes are useful enough when they aren't
129 | -- being used to balance trees. But they might prove more useful for
130 | -- the KVMs, e.g. to keep size information for the stowage nodes so we
131 | -- can always determine size without high-latency lookups.
132 | --
133 | -- More generally, it might be convenient to compute monoidal meta-data
134 | -- that is a result of composition. Size information would be a simple
135 | -- monoid, but it seems feasible to compute useful summary data back
136 | -- up the tree. Still, might be better to focus on the plain old data
137 | -- type first, and create the variant as another module.
138 |
139 | instance (Show k, Show v) => Show (CBT k v) where
140 | showsPrec _ m = showString "fromList " . shows (toList m)
141 |
142 | instance (CBK k) => Monoid (CBT k v) where
143 | mempty = Empty
144 | mappend = union
145 |
146 | instance Functor (CBT k) where
147 | fmap _ Empty = Empty
148 | fmap fn (Root k n) = (Root k (fmap fn n))
149 |
150 | instance Functor (Node k) where
151 | fmap fn (Leaf v) = Leaf (fn v)
152 | fmap fn (Inner cb l k r) = Inner cb (fmap fn l) k (fmap fn r)
153 |
154 | -- | Convert CBT to list.
155 | toList :: CBT k v -> [(k,v)]
156 | toList Empty = []
157 | toList (Root k n) = go [] k n where
158 | go p k (Inner _ l k' r) = go ((k',r):p) k l
159 | go p k (Leaf v) = (k,v) : more p
160 | more ((k',r):p) = go p k' r
161 | more [] = []
162 |
163 | -- | Convert list to CBT.
164 | fromList :: (CBK k) => [(k,v)] -> CBT k v
165 | fromList = L.foldl' ins mempty where
166 | ins m (k,v) = insert k v m
167 |
168 | null :: CBT k v -> Bool
169 | null Empty = True
170 | null _ = False
171 |
172 | member :: (CBK k) => k -> CBT k v -> Bool
173 | member k m = not (isNothing (lookup k m))
174 |
175 | lookup :: (CBK k) => k -> CBT k v -> Maybe v
176 | lookup k = ini where
177 | ini Empty = Nothing
178 | ini (Root lk l) = go lk l
179 | go lk (Inner cb l rk r) =
180 | if getBit cb k then go rk r
181 | else go lk l
182 | go lk (Leaf v) =
183 | if (lk == k) then Just v
184 | else Nothing
185 |
186 | -- update leftmost item
187 | updL :: (v -> v) -> Node k v -> Node k v
188 | updL fn (Leaf v) = Leaf (fn v)
189 | updL fn (Inner cb l k r) = Inner cb (updL fn l) k r
190 |
191 | -- insert value to left at given depth, using the given
192 | -- key as the least key for the given node.
193 | insL :: Int -> v -> k -> Node k v -> Node k v
194 | insL cb = insLN cb . Leaf
195 |
196 | -- insert node to left of tree with depth and least key
197 | insLN :: Int -> Node k v -> k -> Node k v -> Node k v
198 | insLN cb n' k' = ins where
199 | ins (Inner ncb l k r) | (cb > ncb) =
200 | Inner ncb (ins l) k r
201 | ins n = Inner cb n' k' n
202 |
203 | -- insert node to right of tree
204 | insert :: CBK k => k -> v -> CBT k v -> CBT k v
205 | insert = insertWith const
206 |
207 | -- Could we insert more efficiently just looking at the crit-bits?
208 | -- note: might be better to model this as a union with a singleton.
209 | insertWith :: CBK k => (v -> v -> v) -> k -> v -> CBT k v -> CBT k v
210 | insertWith fn kIns vIns = ini where
211 | ini Empty = Root kIns (Leaf vIns)
212 | ini (Root k n) = uncurry Root (ins 0 k n)
213 |
214 | ins cb k n = case critBit cb kIns k of
215 | Just cb -> case getBit cb kIns of
216 | False -> (kIns, insL cb vIns k n) -- insert as new least node
217 | True -> (k, insR cb n) -- insert to right after cb
218 | Nothing -> (k, updL (fn vIns) n) -- update in place
219 |
220 | -- insR inserts to the right of the least key,
221 | -- just after a specified critbit.
222 | insR cb (Inner ncb l k r)
223 | | (cb > ncb) = Inner ncb (insR cb l) k r -- differentiates further down
224 | | otherwise = uncurry (Inner cb l) (ins cb k r)
225 | insR cb v0@(Leaf _) = Inner cb v0 kIns (Leaf vIns) -- right of leaf
226 |
227 | -- delete a specific node.
228 | delete :: CBK k => k -> CBT k v -> CBT k v
229 | delete kDel = ini where
230 | ini Empty = Empty
231 | ini (Root k n) = case critBit 0 kDel k of
232 | Nothing -> Empty
233 | Just cb -> undefined
234 |
235 | -- | left-biased union
236 | union :: CBK k => CBT k v -> CBT k v -> CBT k v
237 | union = unionWith const
238 |
239 | -- | union
240 | --
241 | -- ideally I'll provide a more efficient option than inserting each
242 | -- element independently. But for now, I'm just making it work.
243 | unionWith :: CBK k => (v -> v -> v) -> CBT k v -> CBT k v -> CBT k v
244 | unionWith fn l = L.foldl' ins l . toList where
245 | ins m (k,v) = insertWith (flip fn) k v m
246 |
247 |
--------------------------------------------------------------------------------
/hs/src/Wikilon/CX.hs:
--------------------------------------------------------------------------------
1 | -- | Abstract Wikilon Runtime Context
2 | --
3 | -- Wikilon operates in a similar context as Awelon, but we also need
4 | -- some access to persistent data storage in form of a key-value DB.
5 | -- And it might need some access to secure entropy and other features.
6 | module Wikilon.CX
7 | ( Storage(..)
8 | , module Awelon.CX
9 | ) where
10 |
11 | import Data.ByteString.Lazy (ByteString)
12 | import Awelon.CX
13 |
14 | -- | Access to a key-value registry (usually persistent)
15 | --
16 | -- Storage operates on a simple key-value registry, representing a
17 | -- persistent store such as a filesystem or database. In Wikilon,
18 | -- this is probably Wikilon.DB.
19 | class Storage m where
20 | regGet :: ByteString -> m ByteString
21 | regPut :: ByteString -> ByteString -> m ()
22 |
23 |
24 | -- NOTE: other potential features to consider are checkpointing or
25 | -- backtracking. But I'd prefer to wait for a real requirement.
26 |
27 |
--------------------------------------------------------------------------------
/hs/src/Wikilon/KVM.hs:
--------------------------------------------------------------------------------
1 |
2 | -- | First-Class Key-Value Databases
3 | --
4 | -- Wikilon.DB offers a mutable key-value database with stowage and GC.
5 | -- This KVM models a key-value map above stowage, enabling first class
6 | -- database values to potentially be larger than memory.
7 | --
8 | -- The tree structure used for KVM is a variant of a crit-bit tree or
9 | -- trie. Differences from conventional crit-bit tree:
10 | --
11 | -- - least key is held by parent, to support full tree diffs and merges
12 | -- - each key is associated with a binary value (which may be empty)
13 | -- - keys, values, nodes may be stowed outside of volatile memory
14 | --
15 | -- Keys and values within the KVM are free to reference other stowage
16 | -- resources. But keys mustn't have any trailing null bytes.
17 | --
18 | -- Batched updates are important: it's inefficient to allocate lots of
19 | -- short-lived nodes at the stowage layers. This module will aim to make
20 | -- batching and buffering relatively simple and easy.
21 | --
22 | -- First-class database values offer a lot of benefits over conventional
23 | -- key-value databases: histories, forking, diffs, composition. Wikilon
24 | -- relies on KVM for most data indexing and processing.
25 | --
26 | module Wikilon.KVM
27 | ( Key, Val
28 |
29 | ) where
30 |
31 | import qualified Data.ByteString.Lazy as LBS
32 | import Control.Exception
33 |
34 | type Key = LBS.ByteString
35 | type Val = LBS.ByteString
36 |
37 | -- our crit-bit tree doesn't distinguish keys with trailing NULLs,
38 | -- instead treating every key as having an infinite extent of zero
39 | -- bits following the final non-zero bit.
40 | --
41 | -- Since we can't distinguish keys with trailing NULLs, we also should
42 | -- not accept them into our trees.
43 | validKey :: Key -> Bool
44 | validKey s = LBS.null s || (0 /= LBS.last s)
45 |
46 |
47 |
48 |
49 |
50 | -- | A simple trie with bytestring data.
51 |
52 |
--------------------------------------------------------------------------------
/hs/src/Wikilon/RT.hs:
--------------------------------------------------------------------------------
1 |
2 | -- | Wikilon runtime types.
3 | module Wikilon.RT
4 | ( RTE(..)
5 | ) where
6 |
7 | import Control.Monad.Trans.Reader
8 | import Wikilon.DB (DB)
9 | import qualified Wikilon.DB as DB
10 |
11 | data RTE = RTE
12 | { rte_db :: DB
13 | }
14 |
15 | -- Adding some form of checkpoint model to our RTE seems potentially
16 | -- useful.
17 |
18 |
19 |
--------------------------------------------------------------------------------
/hs/stack.yaml:
--------------------------------------------------------------------------------
1 | # This file was automatically generated by 'stack init'
2 | #
3 | # Some commonly used options have been documented as comments in this file.
4 | # For advanced use and comprehensive documentation of the format, please see:
5 | # http://docs.haskellstack.org/en/stable/yaml_configuration/
6 |
7 | # Resolver to choose a 'specific' stackage snapshot or a compiler version.
8 | # A snapshot resolver dictates the compiler version and the set of packages
9 | # to be used for project dependencies. For example:
10 | #
11 | # resolver: lts-3.5
12 | # resolver: nightly-2015-09-21
13 | # resolver: ghc-7.10.2
14 | # resolver: ghcjs-0.1.0_ghc-7.10.2
15 | # resolver:
16 | # name: custom-snapshot
17 | # location: "./custom-snapshot.yaml"
18 | resolver: lts-8.18
19 |
20 | # User packages to be built.
21 | # Various formats can be used as shown in the example below.
22 | #
23 | # packages:
24 | # - some-directory
25 | # - https://example.com/foo/bar/baz-0.0.2.tar.gz
26 | # - location:
27 | # git: https://github.com/commercialhaskell/stack.git
28 | # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
29 | # - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
30 | # extra-dep: true
31 | # subdirs:
32 | # - auto-update
33 | # - wai
34 | #
35 | # A package marked 'extra-dep: true' will only be built if demanded by a
36 | # non-dependency (i.e. a user package), and its test suites and benchmarks
37 | # will not be run. This is useful for tweaking upstream packages.
38 | packages:
39 | - '.'
40 | # Dependency packages to be pulled from upstream that are not in the resolver
41 | # (e.g., acme-missiles-0.3)
42 | extra-deps:
43 | - containers-0.5.10.2
44 |
45 | # Override default flag values for local packages and extra-deps
46 | flags: {}
47 |
48 | # Extra package databases containing global packages
49 | extra-package-dbs: []
50 |
51 | # Control whether we use the GHC we find on the path
52 | # system-ghc: true
53 | #
54 | # Require a specific version of stack, using version ranges
55 | # require-stack-version: -any # Default
56 | # require-stack-version: ">=1.3"
57 | #
58 | # Override the architecture used by stack, especially useful on Windows
59 | # arch: i386
60 | # arch: x86_64
61 | #
62 | # Extra directories used by stack for building
63 | # extra-include-dirs: [/path/to/dir]
64 | # extra-lib-dirs: [/path/to/dir]
65 | #
66 | # Allow a newer minor version of GHC than the snapshot specifies
67 | # compiler-check: newer-minor
68 |
--------------------------------------------------------------------------------
/hs/test/Spec.hs:
--------------------------------------------------------------------------------
1 | {-# LANGUAGE OverloadedStrings #-}
2 |
3 | import Control.Monad
4 | import qualified Data.ByteString.Lazy as LBS
5 | import qualified Data.ByteString.Lazy.UTF8 as LU8
6 | import qualified System.EasyFile as FS
7 | import qualified Wikilon.DB as DB
8 | import qualified Data.List as L
9 | import Control.Exception
10 | import Debug.Trace
11 | import System.Mem
12 |
13 | -- TODO: learn and use a Haskell test framework.
14 |
15 | main :: IO ()
16 | main = withTmpDir "wikilon-test" $ do
17 | (Right db) <- DB.open "test-db" 1000
18 | testDB db
19 | return ()
20 |
21 |
22 |
23 |
24 |
25 |
26 | -- utilities to simplify the read-write labor
27 |
28 | utf8 :: Show a => a -> LU8.ByteString
29 | utf8 = LU8.fromString . show
30 |
31 | -- just take the first value read
32 | writeBatch :: (Show a, Show b) => DB.DB -> [(a,b)] -> IO ()
33 | writeBatch db kvList = do
34 | tx <- DB.newTX db
35 | mapM_ (uncurry (write tx)) kvList
36 | ok <- DB.commit tx
37 | unless ok $ fail ("failed to write batch " ++ show kvList)
38 | return ()
39 |
40 | write :: (Show a, Show b) => DB.TX -> a -> b -> IO ()
41 | write tx a b = DB.writeKey tx (utf8 a) (utf8 b)
42 |
43 | tryRead :: (Read a) => LU8.ByteString -> a
44 | tryRead s = case reads (LU8.toString s) of
45 | ((a,_):_) -> a
46 | [] -> error $ "could not read: " ++ show s
47 |
48 | read :: (Show a, Read b) => DB.TX -> a -> IO b
49 | read tx = fmap tryRead . DB.readKey tx . utf8
50 |
51 | readDB :: (Show a, Read b) => DB.DB -> a -> IO b
52 | readDB db = fmap tryRead . DB.readKeyDB db . utf8
53 |
54 | testDB :: DB.DB -> IO ()
55 | testDB db = do
56 | -- NOTE: I'll be able to test the database more thoroughly
57 | -- once I have some support for large tries or similar.
58 |
59 | tx0 <- DB.newTX db
60 | DB.writeKey tx0 "red" "green"
61 | DB.writeKey tx0 "blue" "orange"
62 | DB.writeKey tx0 "yellow" "violet"
63 | DB.commit tx0
64 |
65 | r <- DB.readKeyDB db "red"
66 | tx1 <- DB.newTX db
67 | b <- DB.readKey tx1 "blue"
68 | y <- DB.readKey tx1 "yellow"
69 | DB.commit tx1
70 | rby <- DB.readKeysDB db ["red","blue","yellow"]
71 |
72 | unless ((r == "green") && (y == "violet") && (rby == [r,b,y])) $
73 | fail "unexpected results for rby"
74 |
75 | tx2 <- DB.newTX db
76 | h0 <- DB.stowRsc tx2 "hello"
77 | h1 <- DB.stowRsc tx2 "goodbye"
78 | DB.writeKey tx2 "hello" (LBS.fromStrict h1)
79 | DB.clearRsc tx2
80 | DB.commit tx2
81 |
82 | let ns = [0..100]
83 | let sel = L.take 20 . L.drop 30
84 | tx3 <- DB.newTX db
85 | hs <- mapM (DB.stowRsc tx3 . utf8) ns
86 | DB.writeKey tx3 "p" (utf8 (sel hs))
87 | DB.commit tx3
88 | DB.clearRsc tx3
89 | DB.gcDB db
90 | --DB.gcDB db
91 | -- traceIO ("hs: " ++ show hs)
92 | ns2 <- mapM (DB.loadRscDB db) hs
93 | traceIO ("ns2 : " ++ show ns2)
94 |
95 |
96 | hs' <- DB.readKeyDB db "p"
97 | unless (utf8 (sel hs) == hs') $
98 | fail ("failure to read expected hashes")
99 |
100 |
101 |
102 | withTmpDir :: FilePath -> IO a -> IO a
103 | withTmpDir subdir action = do
104 | initialPath <- FS.getCurrentDirectory
105 | tmp <- FS.getTemporaryDirectory
106 | let myTmpDir = tmp FS.> subdir
107 | FS.createDirectoryIfMissing False myTmpDir
108 | FS.setCurrentDirectory myTmpDir
109 | r <- try action
110 | FS.setCurrentDirectory initialPath
111 | -- FS.removeDirectoryRecursive myTmpDir
112 | reraise r
113 |
114 |
115 | reraise :: Either SomeException a -> IO a
116 | reraise = either throw return
117 |
118 |
119 |
--------------------------------------------------------------------------------
/hs/wikilon.cabal:
--------------------------------------------------------------------------------
1 | name: wikilon
2 | version: 0.1.0.0
3 | synopsis: a wiki-inspired development environment for Awelon Language
4 | -- description:
5 | homepage: https://github.com/dmbarbour/wikilon#readme
6 | license: BSD3
7 | -- license-file: LICENSE
8 | author: David Barbour
9 | maintainer: dmbarbour@gmail.com
10 | copyright: 2017 David Barbour
11 | category: Web
12 | build-type: Simple
13 | extra-source-files:
14 | cabal-version: >=1.10
15 |
16 | library
17 | hs-source-dirs: src
18 | exposed-modules:
19 | Awelon
20 | Awelon.Syntax
21 | Awelon.Hash
22 | Awelon.CX
23 | Awelon.Dict
24 | Awelon.Dict.Format
25 |
26 | Wikilon
27 | Wikilon.CX
28 | Wikilon.DB
29 | Wikilon.KVM
30 | Wikilon.CBT
31 | Wikilon.RT
32 |
33 | build-depends: base >= 4.7 && < 5
34 | -- CONTROL
35 | , transformers
36 | , monad-loops
37 | -- VOLATILE DATA
38 | , deepseq
39 | , array
40 | , containers (>= 0.5.8)
41 | , bytestring
42 | , utf8-string
43 | , blake2
44 | -- PERSISTENT DATA
45 | , lmdb
46 | , filelock
47 | , easy-file
48 | -- WEB SERVICES
49 | , servant
50 | , wai
51 | , wai-websockets
52 | , websockets
53 | , yesod-form
54 | default-language: Haskell2010
55 |
56 | executable wikilon
57 | hs-source-dirs: app
58 | main-is: Main.hs
59 | ghc-options: -threaded -rtsopts -with-rtsopts=-N
60 | build-depends: base
61 | , wikilon
62 | , bytestring
63 | , utf8-string
64 | , monad-loops
65 | , wai
66 | , warp
67 | , warp-tls
68 | , entropy
69 | , easy-file
70 | default-language: Haskell2010
71 |
72 | test-suite wikilon-test
73 | type: exitcode-stdio-1.0
74 | hs-source-dirs: test
75 | main-is: Spec.hs
76 | build-depends: base
77 | , wikilon
78 | , bytestring
79 | , utf8-string
80 | , easy-file
81 | ghc-options: -threaded -rtsopts -with-rtsopts=-N
82 | default-language: Haskell2010
83 |
84 | source-repository head
85 | type: git
86 | location: https://github.com/dmbarbour/wikilon
87 |
88 |
89 |
--------------------------------------------------------------------------------
/src/Awelon/Awelon.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/src/Awelon/DictSearch.fs:
--------------------------------------------------------------------------------
1 | namespace Awelon
2 |
3 |
4 | // The goal for DictSearch is to model an index data structure
5 | // with the following features:
6 | //
7 | // - we can index each dictionary node independently
8 | // - we can efficiently merge indices on Dict nodes
9 | // - we can store computed values - types, versions
10 | //
11 | // Ideally, we can achieve these features using one index model.
12 | // This would allow efficient indexing across multiple versions
13 | // of a dictionary.
14 | //
15 | // Computing values is difficult because the value computed at
16 | // a node may depend on other nodes in its context.
17 | //
18 | // If we achieve these features together, we can maintain our
19 | // indices across multiple versions of a dictionary, and produce
20 | // a new index as needed.
21 | //
22 | // One viable option is to maintain global index on secure hash
23 | // resources
24 | // hash resources to their
25 | //
26 | //
27 | // Besides search, I also want effective caching of Dict
28 |
29 |
--------------------------------------------------------------------------------
/src/Awelon/Interpret.fs:
--------------------------------------------------------------------------------
1 | namespace Awelon
2 | open Data.ByteString
3 | open System.Collections.Generic
4 |
5 | // Intepretation of Awelon.
6 | //
7 | // Awelon programs logically rewrite to equivalent Awelon programs.
8 | // But Awelon can be naively implemented as a stack language, we
9 | // only need some care to preserve or recover words that should not
10 | // link/rewrite at evaluation time. It seems that for every word we
11 | // might link, we should compute static link conditions.
12 | //
13 | // For performance, acceleration is essential. At this time, use of
14 | // acceleration is simplified to `[F](accel-nat-add) => [!nat-add]`.
15 | // That is, we don't need to validate that `F` correctly implements
16 | // the accelerated nat-add operation at the interpreter layer. We can
17 | // and should validate it at other layers, such as a linter or type
18 | // checker. In any case, this makes it easy to treat accelerators as
19 | // built-in functions for fast interpretation.
20 | //
21 | // Besides accelerators, other important concerns are memoization,
22 | // caching, stowage, parallelism, and debug trace outputs. Interpreter
23 | // requires a full environment model, with some implicit sharing under
24 | // the hood. Further, we might also need to track *quotas*, to limit
25 | // evaluation efforts.
26 | module Interpret =
27 |
28 | /// Linker Conditions
29 | ///
30 | /// A word will link into a computation under two conditions:
31 | ///
32 | /// - Arity: There are sufficient arguments on the stack to link.
33 | /// - Avail: A word's evaluated definition has available outputs.
34 | ///
35 | /// Arity can range 0..9, due to the (a2)..(a9) annotations. Arity
36 | /// zero implies a word is not awaiting further input. Avail has
37 | /// no upper limit, but there's simply no need to calculate it above
38 | /// the maximum arity 9.
39 | ///
40 | /// A "noun" will have arity=0, avail=1. A noun may simply be treated
41 | /// as a value for binding, etc..
42 | type LinkArity = int // how many inputs this observes on data stack
43 | type LinkAvail = int // how many outputs are immediately available
44 |
45 | ///
46 | ///
47 | ///
48 |
49 |
50 |
51 | // Stowage References
52 |
53 | type Env =
54 | val Src : Dict
55 |
56 | /// Our Interpreter will process programs as follows:
57 | ///
58 | /// - Nouns and Verbs are distinct.
59 | /// - Evaluated data is distinct from un-evaluated.
60 | /// -
61 |
62 |
63 | /// Interpreter-layer values should be optimized towards runtime
64 | /// interpretations. Relevantl
65 | ///
66 |
67 |
68 |
69 | /// I assume `[code](accel)` will be used within source to
70 | /// indicate accelerated implementations. Wh
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/src/Awelon/ParseExt.fs:
--------------------------------------------------------------------------------
1 | namespace Awelon
2 | open Data.ByteString
3 |
4 | // ParseExt is a 'default' textual projection for Awelon with all
5 | // the bells and whistles we can implement that *don't* depend on
6 | // special knowledge about a dictionary. This is intended both as
7 | // an initial scaffolding for programming Awelon, and a fallback
8 | // or 'safe mode' textual projection for debugging (if needed).
9 | //
10 | // In older versions of Awelon, I might have called this `CLAW` as
11 | // a command language for Awelon. However,
12 | //
13 | module ParseExt =
14 |
15 | // SUPPORTABLE NOW (based on developed techniques):
16 | // - local variables
17 | // - monadic sequences
18 | // - simple list literals
19 | // - labeled data (records, variants)
20 | // - basic numeric notations
21 | // - raw vs preprocessed literals
22 | // - lightweight qualified namespaces
23 |
24 | type NatRep = uint64
25 | type IntRep = int64
26 | type RatRep = (struct(IntRep * NatRep))
27 | type DecRep = (struct(IntRep * int16)) // 3141 -3 => 3.141
28 | type SciRep = (struct(DecRep * IntRep)) // Decimal*10^Exponent
29 |
30 | type Number =
31 | | Nat of NatRep
32 | | Int of IntRep
33 | | Rat of RatRep
34 | | Dec of DecRep
35 | | Sci of SciRep
36 |
37 |
38 | // Potential Future Extensions:
39 | //
40 | // - pattern matching (parser combinators)
41 | // - concise type descriptors
42 |
43 |
--------------------------------------------------------------------------------
/src/Awelon/Test/Program.fs:
--------------------------------------------------------------------------------
1 | module Program = let [] main _ = 0
2 |
--------------------------------------------------------------------------------
/src/Awelon/Test/Test.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netcoreapp2.0
5 |
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/Awelon/Test/Tests.fs:
--------------------------------------------------------------------------------
1 | module Tests
2 |
3 | open System
4 | open System.IO
5 | open Xunit
6 | open Stowage
7 | open Awelon
8 | open Data.ByteString
9 |
10 | // For parser tests, it's convenient to parse then print and
11 | // compare the printed string. If parse fails, we can instead
12 | // test the remainder with "?rem".
13 | let ps s =
14 | match Parser.parse (BS.fromString s) with
15 | | Parser.ParseOK p -> BS.toString (Parser.write p)
16 | | Parser.ParseFail st -> BS.toString (BS.cons (byte '?') (st.s))
17 |
18 | // Shuffle an array for various tests.
19 | let shuffle (rng:System.Random) (a : 'T[]) : unit =
20 | let rec shuffleIx ix =
21 | if (ix = a.Length) then () else
22 | let ixSwap = rng.Next(ix, Array.length a)
23 | let tmp = a.[ix]
24 | a.[ix] <- a.[ixSwap]
25 | a.[ixSwap] <- tmp
26 | shuffleIx (ix + 1)
27 | shuffleIx 0
28 |
29 | let clearTestDir path =
30 | if Directory.Exists(path)
31 | then Directory.Delete(path,true)
32 |
33 | let readNat (s:ByteString) : int =
34 | let readByte acc c = (acc * 10) + int (c - byte '0')
35 | BS.fold readByte 0 s
36 |
37 | []
38 | let ``basic parser tests`` () =
39 | Assert.Equal("", ps " ")
40 | Assert.Equal("1 2 3", ps " 1 2 3 ")
41 | Assert.Equal("\"hello\" world", ps " \"hello\" world ")
42 | Assert.Equal("?\"test", ps " 1 2 \"hello\" \"test")
43 | Assert.Equal("[[] [[] []]] []", ps "[[][[][]]][]")
44 | Assert.Equal("?@ ", ps "1 2 3 @ ")
45 | Assert.Equal("?→", ps "→")
46 | Assert.Equal("?", ps "[0[1[2")
47 | Assert.Equal("?]]]", ps "[]0]]]")
48 | Assert.Equal("hello/world/[1]", ps " hello/world/[ 1 ] ")
49 | Assert.Equal("a/b/c/d", ps " a/b/c/d ")
50 | Assert.Equal("? []", ps "hello/world/ []")
51 | Assert.Equal("(hello)", ps " (hello) ")
52 | Assert.Equal("?(hel ", ps " (hel ")
53 | Assert.Equal("?) ", ps " hello) ")
54 | Assert.Equal("?%bbb", ps " %bbb")
55 | let h = RscHash.hash (BS.fromString "test")
56 | let asBin = BS.toString (BS.cons (byte '%') h)
57 | let asRsc = BS.toString (BS.cons (byte '$') h)
58 | Assert.Equal(asBin, ps asBin)
59 | Assert.Equal(asRsc, ps asRsc)
60 |
61 | // TODO: test interpreters
62 |
63 | let testDefStr n =
64 | let s = if (0 = n) then "[zero]" else
65 | "[" + string (n - 1) + " succ] (nat)"
66 | BS.fromString s
67 |
68 | let inline bs n = BS.fromString (string n)
69 | let inline addN (n:int) (d:Dict) : Dict = Dict.add (bs n) (Dict.Def(testDefStr n)) d
70 | let inline remN (n:int) (d:Dict) : Dict = Dict.remove (bs n) d
71 | let inline flip fn a b = fn b a
72 |
73 | []
74 | let ``simple dict tests`` () =
75 | Assert.True(Dict.isEmpty (Dict.empty))
76 | Assert.False(Dict.isEmpty (Dict.empty |> addN 1))
77 | Assert.True(Dict.isEmpty (Dict.empty |> addN 1 |> remN 1))
78 | Assert.False(Dict.isEmpty (Dict.empty |> addN 1 |> addN 2 |> remN 1))
79 | let d = seq { for i = 1 to 2000 do yield i }
80 | |> Seq.fold (flip addN) Dict.empty
81 | let has n d = Dict.contains (bs n) d
82 | let d10 = Dict.dropPrefix (bs 10) d
83 | Assert.True(has 10 d)
84 | Assert.False(has 10 d10)
85 | Assert.True(has 101 d)
86 | Assert.False(has 101 d10)
87 | Assert.True(has 1023 d)
88 | Assert.False(has 1023 d10)
89 | Assert.True(has 1 d10)
90 | Assert.True(has 11 d10)
91 | Assert.Equal(2000 - 111, Seq.length (Dict.toSeq d10)) // 1..2000 - 10,100..109,1000..1099
92 |
93 | let d30 = Dict.selectPrefix (bs 30) d
94 | Assert.True(has 30 d30)
95 | Assert.True(has 300 d30)
96 | Assert.True(has 308 d30)
97 | Assert.True(has 310 d)
98 | Assert.False(has 310 d30)
99 | Assert.True(has 31 d)
100 | Assert.False(has 31 d30)
101 | Assert.True(has 200 d)
102 | Assert.False(has 200 d30)
103 | Assert.Equal(11, Seq.length (Dict.toSeq d30)) // 30,300,301,302,..309
104 |
105 | // a fixture is needed to load the database
106 | type TestDB =
107 | val s : LMDB.Storage
108 | val db : DB
109 | new () =
110 | let path = "testDB"
111 | let maxSizeMB = 1000
112 | do clearTestDir path
113 | let s = new LMDB.Storage(path,maxSizeMB)
114 | { s = s
115 | db = DB.fromStorage (s :> DB.Storage)
116 | }
117 | interface System.IDisposable with
118 | member this.Dispose() =
119 | this.db.Flush()
120 | (this.s :> System.IDisposable).Dispose()
121 |
122 | let bsPair ((a,b)) = (BS.fromString a, BS.fromString b)
123 |
124 | type DBTests =
125 | val s : LMDB.Storage
126 | val db : DB
127 | new (fixture : TestDB) =
128 | let s = fixture.s
129 | { s = fixture.s
130 | db = fixture.db
131 | }
132 | interface IClassFixture
133 |
134 | member inline t.DB with get() = t.db
135 | member inline t.Stowage with get() = (t.s :> Stowage)
136 | member inline t.Storage with get() = (t.s :> DB.Storage)
137 | member inline t.Flush() = DB.flushStorage (t.Storage)
138 |
139 | member t.TryLoad (h:RscHash) : ByteString option =
140 | try t.Stowage.Load h |> Some
141 | with
142 | | MissingRsc _ -> None
143 |
144 | member t.FullGC() =
145 | System.GC.Collect()
146 | let rec gcLoop ct =
147 | t.s.GC()
148 | let ct' = t.s.Stats().stow_count
149 | //printfn "GC - elements in stowage: %A" ct'
150 | if (ct' <> ct) then gcLoop ct'
151 | gcLoop 0UL
152 |
153 | member tf.CompactionTest (alen:int) (frac:int) (rng:System.Random) : unit =
154 | let sw = new System.Diagnostics.Stopwatch()
155 | let cc d = Codec.compact (Dict.node_codec) (tf.Stowage) d
156 | let compactK k d = if (0 <> (k % frac)) then d else cc d
157 | let add n d = compactK n (addN n d)
158 | let rem n d = compactK n (remN n d)
159 | let a = [| 1 .. alen |]
160 | let r = [| 1 .. (alen / 3) |]
161 | shuffle rng a
162 | shuffle rng r
163 |
164 | printfn "building a test dictionary"
165 | sw.Restart()
166 | let dRef = Dict.empty
167 | |> Array.foldBack add a
168 | |> Array.foldBack rem r
169 | |> cc
170 | |> VRef.stow (Dict.codec) (tf.Stowage)
171 | tf.Flush()
172 | sw.Stop()
173 | printfn "test dictionary built (%A ms)" (sw.Elapsed.TotalMilliseconds)
174 | let write_op_ct = Array.length a + Array.length r
175 | let write_op_usec = (1000.0 * sw.Elapsed.TotalMilliseconds) / (double write_op_ct)
176 | printfn "usec per write op: %A" write_op_usec
177 |
178 | // really doing iteration (sequential read). I might need to test
179 | // performance of random reads, seperately. But this isn't primarily
180 | // a performance test.
181 | let accum s n = s + uint64 n
182 | let array_sum arr = Array.fold accum 0UL arr
183 | let sum_expected = array_sum a - array_sum r
184 |
185 | sw.Restart()
186 | let dR = VRef.load dRef
187 | let sum = Dict.toSeq dR
188 | |> Seq.map (fst >> readNat)
189 | |> Seq.fold accum 0UL
190 | sw.Stop()
191 | let read_op_ct = (Array.length a) - (Array.length r)
192 | let read_op_usec = (1000.0 * sw.Elapsed.TotalMilliseconds) / (double read_op_ct)
193 | printfn "usec per read op: %A" read_op_usec
194 | Assert.Equal(sum, sum_expected)
195 |
196 | // random-read performance test
197 | shuffle rng a
198 | sw.Restart()
199 | let dRR = VRef.load dRef
200 | let fnAccum acc k = acc + if Dict.contains (bs k) dRR then uint64 k else 0UL
201 | let rsum1 = Array.fold fnAccum 0UL a
202 | sw.Stop()
203 | let rread_op_ct = Array.length a
204 | let rread_op_usec = (1000.0 * sw.Elapsed.TotalMilliseconds) / (double rread_op_ct)
205 | printfn "usec per random read op: %A" rread_op_usec
206 | Assert.Equal(rsum1,sum_expected)
207 |
208 | []
209 | member tf.``test dict compaction`` () =
210 | let rng = new System.Random(1)
211 |
212 | printfn "size 70k, 10 compactions"
213 | tf.CompactionTest 70000 7000 rng
214 | printfn "size 70k, 100 compactions"
215 | tf.CompactionTest 70000 700 rng
216 | printfn "size 70k, 1 compactions"
217 | tf.CompactionTest 70000 700000 rng
218 |
219 | // following test takes about 30 seconds on my machine
220 | //printfn "size 700k, 100 compactions"
221 | //tf.CompactionTest 700000 7000 rng
222 |
223 | // TODO: test splitAtKey, etc.
224 |
225 |
226 |
227 |
228 |
229 |
--------------------------------------------------------------------------------
/src/Awelon/TypeCheck.fs:
--------------------------------------------------------------------------------
1 | namespace Awelon
2 |
3 | // Awelon language can support static typing based on arities
4 | // and annotations. However, Awelon does not specify any type
5 | // model. This is because we might wish to type some programs
6 | // using simple static types, and others using sophisticated
7 | // dependent types, and some might require dynamic analysis.
8 | //
9 | // Ultimately, I'd like for most type checking to be modeled
10 | // within the dictionary. But in the short term, this module
11 | // can provide ad-hoc static type analysis for Awelon code.
12 | //
13 | // My primary concerns for type analysis will be:
14 | //
15 | // - static analysis of arities
16 | // - accelerated data types
17 | // - simple type annotations
18 | // - dynamic types for macros
19 | //
20 |
21 |
22 |
--------------------------------------------------------------------------------
/src/Awelon/WordVersion.fs:
--------------------------------------------------------------------------------
1 | namespace Awelon
2 |
3 | // We can associate an Awelon program with a deep, behavioral version
4 | // by taking the secure hash of the program together with versions of
5 | // its dependencies. Doing so is rather useful, and the result is easy
6 | // to cache and use for memoization of other details (such as inferred
7 | // types or evaluations).
8 | //
9 | // Although we could keep a permanent index of versions, it might prove
10 | // simpler to recompute versions as needed. The cost is predictable and
11 | // we can still benefit from temporary caching.
12 |
13 |
--------------------------------------------------------------------------------
/src/Data.ByteString/Data.ByteString.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/src/Data.ByteString/Stream.fs:
--------------------------------------------------------------------------------
1 |
2 | namespace Data.ByteString
3 |
4 | module ByteStream =
5 |
6 | /// A ByteString Writer.
7 | ///
8 | /// This is similar to System.IO.MemoryStream in write mode, albeit
9 | /// with a simpler and more restrictive access limitations. Clients
10 | /// can capture only the data that they write to the stream. Older
11 | /// data cannot be observed or overwritten.
12 | ///
13 | /// Assumes single-threaded use.
14 | ///
15 | /// The buffer is resized on write. A System.OutOfMemoryException
16 | /// is possible if the buffer cannot be resized sufficiently.
17 | type Dst =
18 | val mutable internal Data : byte[] // resizable bytes array
19 | val mutable internal Pos : int // current writer head
20 | internal new() = { Data = Array.empty; Pos = 0 }
21 |
22 | let inline private resize (sz:int) (d:Dst) : unit =
23 | assert(sz >= d.Pos)
24 |
25 | // reallocate array with sufficient space relative to Pos
26 | let private alloc (amt:int) (d:Dst) : unit =
27 | let maxAmt = System.Int32.MaxValue - d.Pos
28 | if (amt > maxAmt)
29 | then raise (new System.OutOfMemoryException("ByteStream reserve"))
30 | // adjust for geometric growth
31 | let newSize = d.Pos + max amt (min maxAmt (max 200 d.Pos))
32 | let mem = Array.zeroCreate newSize
33 | Array.blit (d.Data) 0 mem 0 (d.Pos)
34 | d.Data <- mem
35 |
36 | let inline private requireSpace (amt:int) (dst:Dst) : unit =
37 | let avail = dst.Data.Length - dst.Pos
38 | if (amt > avail) then alloc amt dst
39 |
40 | /// Reserve space for writing.
41 | ///
42 | /// If this is the first operation on the stream, it performs an
43 | /// initial allocation of the exact size requested. Otherwise, it
44 | /// ensures there is space to write the amount requested without
45 | /// further reallocation.
46 | let reserve (amt:int) (dst:Dst) : unit =
47 | assert(amt > 0)
48 | if (Array.isEmpty dst.Data)
49 | then dst.Data <- Array.zeroCreate amt
50 | else requireSpace amt dst
51 |
52 | let writeByte (b:byte) (dst:Dst) : unit =
53 | requireSpace 1 dst
54 | dst.Data.[dst.Pos] <- b
55 | dst.Pos <- (1 + dst.Pos)
56 |
57 | let writeBytes (bs:ByteString) (dst:Dst) : unit =
58 | requireSpace (bs.Length) dst
59 | Array.blit (bs.UnsafeArray) (bs.Offset) (dst.Data) (dst.Pos) (bs.Length)
60 | dst.Pos <- (bs.Length + dst.Pos)
61 |
62 | let inline private captureBytes (p0:int) (dst:Dst) : ByteString =
63 | BS.unsafeCreate (dst.Data) p0 (dst.Pos - p0)
64 |
65 | /// Capture writes to a Dst.
66 | ///
67 | /// This allows a client to observe whatever they have written
68 | /// without extra intermediate buffers or arrays. However, the
69 | /// initial Dst must be sourced at a `write` operation.
70 | let capture (dst:Dst) (writer:Dst -> unit) : ByteString =
71 | let p0 = dst.Pos
72 | writer dst
73 | captureBytes p0 dst
74 |
75 | /// Capture with an extra result.
76 | let capture' (dst:Dst) (writer:Dst -> 'X) : (ByteString * 'X) =
77 | let p0 = dst.Pos
78 | let x = writer dst
79 | let b = captureBytes p0 dst
80 | (b,x)
81 |
82 | /// Capture writes to a new stream.
83 | ///
84 | /// Use of a `write` operation is the only means to construct the
85 | /// output stream, ensuring that all data is captured by at least
86 | /// one observer. You can use `reserve` immediately to provide an
87 | /// initial capacity.
88 | let write (writer:Dst -> unit) : ByteString =
89 | capture (new Dst()) writer
90 |
91 | /// Write with an extra result.
92 | let write' (writer: Dst -> 'X) : (ByteString * 'X) =
93 | capture' (new Dst()) writer
94 |
95 | /// A ByteString Reader.
96 | ///
97 | /// This is similar to System.IO.MemoryStream in read-only mode, albeit
98 | /// without the ability to seek backwards and with alternative features
99 | /// for lookahead parsing. The motivation is to make it easier to reason
100 | /// about program behavior, and convenient integration with ByteString.
101 | ///
102 | /// Assumes single-threaded use.
103 | type Src =
104 | val internal Data : byte[] // const
105 | val internal Limit : int // max Pos
106 | val mutable internal Pos : int
107 | internal new(s:ByteString) =
108 | { Data = s.UnsafeArray
109 | Limit = (s.Offset + s.Length)
110 | Pos = s.Offset
111 | }
112 |
113 | /// Generic exception for insufficient or unexpected data.
114 | exception ReadError
115 |
116 | /// End-of-Stream check.
117 | let eos (src:Src) : bool =
118 | (src.Limit = src.Pos)
119 |
120 | /// Check how many bytes remain in a stream.
121 | let bytesRem (src:Src) : int =
122 | (src.Limit - src.Pos)
123 |
124 | /// Observe remaining bytes in stream without removing them.
125 | let peekRem (src:Src) : ByteString =
126 | BS.unsafeCreate (src.Data) (src.Pos) (bytesRem src)
127 |
128 | /// Read remaining bytes in stream. Removes them from stream.
129 | let readRem (src:Src) : ByteString =
130 | let result = peekRem src
131 | src.Pos <- src.Limit
132 | result
133 |
134 | /// Observe next byte in stream (or raise ReadError)
135 | let peekByte (src:Src) : byte =
136 | if eos src then raise ReadError else
137 | src.Data.[src.Pos]
138 |
139 | /// Read a single byte (or raise ReadError)
140 | let readByte (src:Src) : byte =
141 | let result = peekByte src
142 | src.Pos <- (1 + src.Pos)
143 | result
144 |
145 | /// Observe a run of remaining bytes without removing them.
146 | let peekBytes (len:int) (src:Src) : ByteString =
147 | if(len < 0) then invalidArg "len" "negative byte count" else
148 | if(len > bytesRem src) then raise ReadError else
149 | BS.unsafeCreate (src.Data) (src.Pos) len
150 |
151 | /// Read run of bytes, removing them from the stream.
152 | let readBytes (len:int) (src:Src) : ByteString =
153 | let result = peekBytes len src
154 | src.Pos <- (len + src.Pos)
155 | result
156 |
157 | /// Ignore several bytes.
158 | let skip (len:int) (src:Src) : unit =
159 | readBytes len src |> ignore
160 |
161 | /// Attempt to read a byte-stream, but backtrack on ReadError
162 | let tryRead (reader:Src -> 'X) (src:Src) : 'X option =
163 | let p0 = src.Pos
164 | try Some (reader src)
165 | with
166 | | ReadError -> src.Pos <- p0; None
167 |
168 | /// Attempt to read a byte-stream, but backtrack on ReadError or None.
169 | let tryMatch (reader:Src -> 'X option) (src:Src) : 'X option =
170 | let p0 = src.Pos
171 | try let result = reader src
172 | if Option.isNone result then src.Pos <- p0
173 | result
174 | with
175 | | ReadError -> src.Pos <- p0; None
176 |
177 | /// Read a ByteString.
178 | ///
179 | /// Note: This will raise a ReadError if we're NOT at the end-of-stream
180 | /// after performing a read. You might need to add a final readRem if
181 | /// you aren't at the end of stream.
182 | let read (reader:Src -> 'X) (b:ByteString) : 'X =
183 | let src = new Src(b)
184 | let x = reader src
185 | if not (eos src) then raise ReadError
186 | x
187 |
188 |
189 |
190 | type ByteSrc = ByteStream.Src
191 | type ByteDst = ByteStream.Dst
192 |
193 |
--------------------------------------------------------------------------------
/src/Data.ByteString/Tests/Program.fs:
--------------------------------------------------------------------------------
1 | module Program = let [] main _ = 0
2 |
--------------------------------------------------------------------------------
/src/Data.ByteString/Tests/Tests.fs:
--------------------------------------------------------------------------------
1 | module Tests.Data.ByteString
2 |
3 | #nowarn "988" // suppress warning for empty program
4 |
5 | open System
6 | open System.Runtime.InteropServices
7 | open Xunit
8 | open Data.ByteString
9 |
10 |
11 | []
12 | let ``empty is length zero`` () =
13 | Assert.Equal(0, BS.empty.Length)
14 |
15 | []
16 | let ``empty equals empty`` () =
17 | Assert.Equal(BS.empty, BS.empty)
18 | Assert.NotEqual(BS.empty, BS.singleton 101uy)
19 |
20 | []
21 | let ``isEmpty empty`` () =
22 | Assert.True(BS.isEmpty BS.empty)
23 | Assert.False(BS.isEmpty (BS.singleton 101uy))
24 |
25 | []
26 | let ``isEmpty emptyString`` () =
27 | Assert.True(BS.isEmpty (BS.fromString ""))
28 |
29 | []
30 | let ``empty string converts to empty bytestring`` () =
31 | Assert.Equal(BS.empty, BS.fromString "")
32 |
33 |
34 | []
35 | let ``using FNV-1a hash`` () =
36 | let arr = BS.unsafeCreateA [| 116uy; 101uy; 115uy; 116uy |] // test
37 | let h32 = ByteString.Hash32 arr
38 | let h64 = ByteString.Hash64 arr
39 | Assert.True((2949673445u = h32))
40 | Assert.True((18007334074686647077UL = h64))
41 |
42 | []
43 | let ``basic structural equality`` () =
44 | Assert.Equal(BS.fromString "hello", BS.fromString "hello")
45 | Assert.NotEqual(BS.fromString "hello", BS.fromString "goodbye")
46 |
47 |
48 | []
49 | let ``basic string conversions`` () =
50 | let testStr = "→↑←"
51 | let a = BS.fromString testStr
52 | Assert.Equal(a.Length, 9) // UTF-8 conversions
53 | Assert.Equal(testStr, a.ToString())
54 | Assert.Equal(testStr, BS.toString a)
55 |
56 | []
57 | let ``empty slice is empty`` () =
58 | let foo = (BS.fromString "xyzzy").[3..2]
59 | Assert.True(BS.isEmpty foo)
60 |
61 | []
62 | let ``non-empty slices equality`` () =
63 | let foo = BS.fromString "xyzxyz"
64 | Assert.Equal(foo.[0..2], foo.[3..5])
65 | Assert.NotEqual(foo.[0..1], foo.[2..3])
66 |
67 | []
68 | let ``slices share underlying array`` () =
69 | let foo = BS.fromString "xyzzy"
70 | Assert.Equal(foo.[0..2].UnsafeArray, foo.[2..3].UnsafeArray)
71 |
72 | []
73 | let ``simple cons`` () =
74 | Assert.Equal(BS.fromString "test", BS.cons (byte 't') (BS.fromString "est"))
75 |
76 | []
77 | let ``simple append`` () =
78 | Assert.Equal(BS.fromString "hello, world",
79 | BS.append (BS.fromString "hello,") (BS.fromString " world"))
80 |
81 | []
82 | let ``simple concat`` () =
83 | Assert.Equal(BS.fromString "hello, world",
84 | ["hello"; ","; " "; "world"] |> Seq.map BS.fromString |> BS.concat)
85 |
86 | []
87 | let ``empty is smallest`` () =
88 | Assert.True(BS.empty < BS.unsafeCreateA [| 0uy |])
89 |
90 | []
91 | let ``lexicographic order`` () =
92 | Assert.True(BS.fromString "x" < BS.fromString "xx")
93 | Assert.True(BS.fromString "xx" < BS.fromString "xy")
94 | Assert.True(BS.fromString "xy" < BS.fromString "yx")
95 |
96 | []
97 | let ``ordering on slices`` () =
98 | let foo = BS.fromString "xyzxyz"
99 | Assert.True(foo.[1..2] > foo.[3..4])
100 | Assert.True(foo.[0..1] < foo.[3..5])
101 |
102 | []
103 | let ``simple enumerator`` () =
104 | let x : ByteString = BS.fromString "test==="
105 | let mutable sum = 0
106 | for c in (x.[..3]) do
107 | sum <- (int c) + sum
108 | Assert.Equal(448, sum)
109 |
110 | []
111 | let ``simple fold`` () =
112 | let x : ByteString = BS.fromString "===test"
113 | let accum s c = s + int c
114 | let sum = BS.fold accum 0 (x.[3..])
115 | Assert.Equal(448, sum)
116 |
117 | []
118 | let ``pinned data access`` () =
119 | let x : ByteString = (BS.fromString "==test==").[2..5]
120 | BS.withPinnedBytes x (fun p ->
121 | Assert.Equal(116uy, Marshal.ReadByte(p,0))
122 | Assert.Equal(101uy, Marshal.ReadByte(p,1))
123 | Assert.Equal(115uy, Marshal.ReadByte(p,2)))
124 |
125 | []
126 | let ``trivial byte writer`` () =
127 | let x : ByteString = (BS.fromString "==test==").[2..5]
128 | let x' = ByteStream.write(fun dst ->
129 | ByteStream.writeByte x.[0] dst
130 | ByteStream.writeBytes x.[1..] dst)
131 | Assert.Equal(x,x')
132 |
133 | []
134 | let ``trivial byte reader`` () =
135 | let x = (BS.fromString "==test==").[2..5]
136 | let rd src =
137 | let t = ByteStream.readByte src
138 | let est = ByteStream.readBytes 3 src
139 | (t,est)
140 | let (t,est) = ByteStream.read rd x
141 | Assert.Equal(t, byte 't')
142 | Assert.Equal(est, x.[1..])
143 |
144 | []
145 | let ``test critbit basics`` () =
146 | let x = BS.unsafeCreateA [| 0xFFuy; 0x00uy; 0xC1uy |]
147 | Assert.True(CritbitTree.testCritbit 0 x)
148 | Assert.True(CritbitTree.testCritbit 8 x)
149 | Assert.True(CritbitTree.testCritbit 9 x)
150 | Assert.False(CritbitTree.testCritbit 10 x)
151 | Assert.False(CritbitTree.testCritbit 17 x)
152 | Assert.True(CritbitTree.testCritbit 18 x)
153 | Assert.True(CritbitTree.testCritbit 19 x)
154 | Assert.True(CritbitTree.testCritbit 20 x)
155 | Assert.False(CritbitTree.testCritbit 21 x)
156 | Assert.False(CritbitTree.testCritbit 25 x)
157 | Assert.True(CritbitTree.testCritbit 26 x)
158 | Assert.False(CritbitTree.testCritbit 27 x)
159 |
160 | []
161 | let ``find critbit basics`` () =
162 | let x = BS.unsafeCreateA [| 0xFFuy; 0x00uy; 0x00uy |]
163 | let y = BS.unsafeCreateA [| 0xCCuy; 0x00uy |]
164 | Assert.Equal(Some 3, CritbitTree.findCritbit 0 x y)
165 | Assert.Equal(Some 3, CritbitTree.findCritbit 1 x y)
166 | Assert.Equal(Some 3, CritbitTree.findCritbit 2 x y)
167 | Assert.Equal(Some 3, CritbitTree.findCritbit 3 x y)
168 | Assert.Equal(Some 4, CritbitTree.findCritbit 4 x y)
169 | Assert.Equal(Some 7, CritbitTree.findCritbit 5 x y)
170 | Assert.Equal(Some 7, CritbitTree.findCritbit 7 x y)
171 | Assert.Equal(Some 8, CritbitTree.findCritbit 8 x y)
172 | Assert.Equal(Some 18, CritbitTree.findCritbit 9 x y)
173 |
174 | []
175 | let ``tree basics`` () =
176 | let add s t = CritbitTree.add (BS.fromString s) s t
177 | let d1 : string list = ["bar"; "band"; "bald"; "bandit"; "bald eagle"; "bard"; "barrister"]
178 | let t1 = List.fold (fun t s -> add s t) CritbitTree.empty d1
179 | Assert.True(CritbitTree.validate t1)
180 | Assert.Equal(d1.Length, CritbitTree.size t1)
181 |
182 | Assert.True(CritbitTree.exists (fun k _ -> (byte 'r' = k.[2])) t1)
183 | Assert.False(CritbitTree.exists (fun k _ -> (byte 'z' = k.[1])) t1)
184 | Assert.True(CritbitTree.forall (fun k _ -> (byte 'b' = k.[0])) t1)
185 |
186 | //CritbitTree.iter (fun k v -> printfn "%s" v) t1
187 |
188 | let has t s = CritbitTree.containsKey (BS.fromString s) t
189 | Assert.True(has t1 "band")
190 | Assert.True(has t1 "bard")
191 | Assert.True(has t1 "bald")
192 | Assert.True(has t1 "bar")
193 | Assert.False(has t1 "test")
194 | Assert.False(has t1 "")
195 | Assert.False(has t1 "apple")
196 | Assert.False(has t1 "barrier")
197 | Assert.False(has t1 "bardiche")
198 |
199 | Assert.Equal(Some "bard", CritbitTree.tryFind (BS.fromString "bard") t1)
200 | Assert.Equal(Some "bar", CritbitTree.tryFind (BS.fromString "bar") t1)
201 | Assert.Equal(Some "bald", CritbitTree.tryFind (BS.fromString "bald") t1)
202 |
203 | let rem s t = CritbitTree.remove (BS.fromString s) t
204 | let t2 = t1 |> rem "apple" |> rem "bard" |> rem "band" |> rem "bald"
205 | Assert.True(CritbitTree.validate t2)
206 | Assert.Equal(d1.Length, 3 + CritbitTree.size t2)
207 | Assert.False(has t2 "apple")
208 | Assert.False(has t2 "bard")
209 | Assert.False(has t2 "band")
210 | Assert.True(has t2 "bald eagle")
211 | Assert.True(has t2 "barrister")
212 |
213 | Assert.Equal(t1,t1)
214 | Assert.Equal(t1, CritbitTree.selectPrefix (BS.fromString "ba") t1)
215 | Assert.Equal(3, CritbitTree.size (CritbitTree.selectPrefix (BS.fromString "bar") t1))
216 | Assert.Equal(2, CritbitTree.size (CritbitTree.selectPrefix (BS.fromString "ban") t1))
217 | Assert.Equal(4, CritbitTree.size (CritbitTree.dropPrefix (BS.fromString "bar") t1))
218 | Assert.Equal(5, CritbitTree.size (CritbitTree.dropPrefix (BS.fromString "ban") t1))
219 |
220 | let p t s =
221 | let struct(a,b) = CritbitTree.splitAtKey (BS.fromString s) t
222 | (a,b)
223 | Assert.Equal(t1, snd (p t1 "bald")) // "bald" is least key
224 | Assert.Equal(2, CritbitTree.size (fst (p t1 "ban"))) // bald and bald eagle to left
225 | Assert.Equal(4, CritbitTree.size (fst (p t1 "bar")))
226 |
227 | let d1' = List.map snd (CritbitTree.toList t1)
228 | Assert.Equal(d1', List.sort d1)
229 |
230 |
231 |
232 |
233 |
--------------------------------------------------------------------------------
/src/Data.ByteString/Tests/Tests.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netcoreapp2.0
5 |
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/Main.fs:
--------------------------------------------------------------------------------
1 | module Wikilon.Main
2 |
3 | open System
4 | open System.IO
5 | open System.Threading
6 | open System.Security.Cryptography
7 | open Stowage
8 | open Suave
9 | open Data.ByteString
10 |
11 | let helpMsg = """
12 | Wikilon is a wiki-inspired development environment for Awelon. This program
13 | starts a local web server, which is mostly configured online. Arguments:
14 |
15 | [-help] print this help message
16 | [-p Port] bind specified port (default 3000)
17 | [-ip IP] bind IP or DNS (default 127.0.0.1)
18 | [-dir Dir] where to store data (default wiki)
19 | [-size GB] maximum database size (default 100)
20 | [-cache MB] space-speed tradeoff (default 100)
21 | [-admin] print a temporary admin password
22 |
23 | Configuration of Wikilon is managed online. Requesting an `-admin` password
24 | makes the admin account available until process reset. The admin can create
25 | other accounts with administrative authorities.
26 |
27 | Wikilon does not have built-in support for TLS. Try a reverse proxy, such as
28 | NGINX, to add the TLS layer if needed.
29 | """
30 |
31 | type Args = {
32 | help : bool;
33 | port : int;
34 | ip : string;
35 | home : string;
36 | size : int;
37 | cache : int;
38 | admin : bool;
39 | bad : string list;
40 | }
41 |
42 | let defaultArgs : Args = {
43 | help = false;
44 | port = 3000;
45 | ip = "127.0.0.1";
46 | home = "wiki";
47 | size = 100
48 | cache = 100
49 | admin = false;
50 | bad = [];
51 | }
52 |
53 | let (|Nat|_|) (s : string) : int option =
54 | let mutable ival = 0
55 | if System.Int32.TryParse(s, &ival) && (ival > 0)
56 | then Some ival
57 | else None
58 |
59 | let rec procArgs xs (a : Args) : Args =
60 | match xs with
61 | | [] -> {a with bad = List.rev a.bad }
62 | | "-help"::xs' -> procArgs xs' { a with help = true }
63 | | "-p"::(Nat p)::xs' -> procArgs xs' { a with port = p }
64 | | "-ip"::ip::xs' -> procArgs xs' { a with ip = ip }
65 | | "-dir"::dir::xs' -> procArgs xs' { a with home = dir }
66 | | "-size"::(Nat n)::xs' -> procArgs xs' { a with size = n }
67 | | "-cache"::(Nat n)::xs' -> procArgs xs' { a with cache = n }
68 | | "-admin"::xs' -> procArgs xs' { a with admin = true }
69 | | x::xs' -> procArgs xs' {a with bad = x :: a.bad }
70 |
71 | let getEntropy (n : int) : ByteString =
72 | let mem = Array.zeroCreate n
73 | do RandomNumberGenerator.Create().GetBytes(mem)
74 | BS.unsafeCreateA mem
75 |
76 | let hashStr = Stowage.RscHash.hash >> BS.toString
77 |
78 | let setAppWorkingDir fp =
79 | do Directory.CreateDirectory(fp) |> ignore
80 | Directory.SetCurrentDirectory(fp)
81 |
82 | // thoughts: it might be useful to separate the authorizations DB
83 | // from the main storage layer, e.g. to simplify integration with
84 | // open ID models.
85 |
86 | []
87 | let main argv =
88 | let args : Args = procArgs (List.ofArray argv) defaultArgs
89 | if args.help then printfn "%s" helpMsg; 0 else
90 | let bad = not (List.isEmpty args.bad)
91 | if bad then printfn "Unrecognized args (try -help): %A" args.bad; (-1) else
92 | do setAppWorkingDir args.home
93 | Stowage.Cache.resize (1_000_000UL * (uint64 args.cache))
94 | use dbStore = new Stowage.LMDB.Storage("data", (1024 * args.size))
95 | let dbRoot = Stowage.DB.fromStorage dbStore
96 | let dbWiki = DB.withPrefix (BS.fromString "wiki/") dbRoot
97 | let adminPass = getEntropy 64 |> Stowage.RscHash.hash |> BS.take 24
98 | let wsParams : WS.Params = { db = dbWiki; admin = adminPass }
99 | let app = WS.mkApp wsParams
100 | let cts = new CancellationTokenSource()
101 | let svc =
102 | { defaultConfig with
103 | hideHeader = true
104 | bindings = [ HttpBinding.createSimple HTTP args.ip args.port
105 | ]
106 | cancellationToken = cts.Token
107 | }
108 | let (_,serve) = startWebServerAsync svc app
109 | Async.Start(serve, cts.Token)
110 | if args.admin then do
111 | printfn "admin:%s" (BS.toString adminPass)
112 | printfn "Press any key to halt."
113 | Console.ReadKey true |> ignore
114 | cts.Cancel() // clean cancel
115 | 0 // return an integer exit code
116 |
117 |
118 |
--------------------------------------------------------------------------------
/src/Stowage/Data/CByteString.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// A "compacting" ByteString.
5 | ///
6 | /// The idea here is that small bytestrings should be held locally
7 | /// in memory, while large bytestrings will be remotely referenced
8 | /// and only briefly cached in memory.
9 | ///
10 | /// Although this is represented by CVRef, we will use
11 | /// a specialized encoder to reduce overheads and ensure the "raw"
12 | /// encoding is used for remote binaries (no size prefix), making
13 | /// structure sharing with equivalent binaries a little easier.
14 | type CByteString = CVRef
15 |
16 | module EncCByteString =
17 |
18 | // Prefix is:
19 | // 0 for remote
20 | // EncVarNat (1 + Length) for local.
21 | //
22 | // Suffix is: 0 for remote, nothing for local. This is
23 | // used to simplify recognition of remote references.
24 |
25 | let write (ref:CByteString) (dst:ByteDst) : unit =
26 | match ref with
27 | | Local (s,_) ->
28 | EncVarNat.write (1UL + uint64 s.Length) dst
29 | ByteStream.writeBytes s dst
30 | | Remote vref ->
31 | EncVarNat.write (0UL) dst
32 | ByteStream.writeBytes (vref.Addr) dst
33 | ByteStream.writeByte 0uy dst
34 |
35 | let read (db:Stowage) (src:ByteSrc) : CByteString =
36 | let s0 = ByteStream.bytesRem src
37 | let len = EncVarNat.read src
38 | if (0UL = len) then // Remote
39 | let h = ByteStream.readBytes (RscHash.size) src
40 | let bSuffix = ByteStream.readByte src
41 | if (bSuffix <> 0uy) then raise ByteStream.ReadError
42 | Remote (LVRef.wrap (VRef.wrap (EncBytesRaw.codec) db h))
43 | else // Local
44 | let bs = ByteStream.readBytes (int (len - 1UL)) src
45 | let sf = ByteStream.bytesRem src
46 | Local (bs, uint64 (s0 - sf))
47 |
48 | let remoteSize = uint64 (2 + RscHash.size)
49 | let inline localSize s =
50 | let len = uint64 (BS.length s)
51 | len + EncVarNat.size (1UL + len)
52 |
53 | let compact (thresh:SizeEst) (db:Stowage) (ref:CByteString) : struct(CByteString * SizeEst) =
54 | match ref with
55 | | Local (s,szEst) ->
56 | if (szEst < thresh) then struct(ref, szEst) else
57 | let sz = localSize s
58 | if (sz < thresh) then struct(Local(s,sz), sz) else
59 | let vref = LVRef.stow (EncBytesRaw.codec) db s sz
60 | struct(Remote vref, remoteSize)
61 | | Remote _ -> struct(ref, remoteSize)
62 |
63 | let codec (thresh:SizeEst) =
64 | { new Codec with
65 | member __.Write ref dst = write ref dst
66 | member __.Read db src = read db src
67 | member __.Compact db ref = compact thresh db ref
68 | }
69 |
70 | module CByteString =
71 |
72 | // local, load, etc. matching interface for CVRef
73 | let inline local (s:ByteString) : CByteString = CVRef.local s
74 | let inline isRemote (ref:CByteString) : bool = CVRef.isRemote ref
75 | let inline load (ref:CByteString) : ByteString = CVRef.load ref
76 | let inline load' (ref:CByteString) : ByteString = CVRef.load' ref
77 |
78 | /// Wrap a remote binary resource. Will decref remote resource
79 | /// when destroyed. Use `wrap` if you need to incref resource.
80 | let inline wrap' (db:Stowage) (h:RscHash) =
81 | Remote (LVRef.wrap (VRef.wrap' (EncBytesRaw.codec) db h))
82 |
83 | /// Wrap remote binary resource, with incref to prevent GC.
84 | let inline wrap (db:Stowage) (h:RscHash) =
85 | db.Incref h
86 | wrap' db h
87 |
88 | /// Construct a compacted bytestring directly, compacting immediately
89 | /// if required according to the threshold. This function uses the
90 | /// specialized encoder from EncCByteString.
91 | let stow (thresh:SizeEst) (db:Stowage) (s:ByteString) : CByteString =
92 | let struct(ref,_) = EncCByteString.compact thresh db (local s)
93 | ref
94 |
95 |
--------------------------------------------------------------------------------
/src/Stowage/Data/CVRef.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// Compacting Value References
5 | ///
6 | /// The overhead for secure hash resources is relatively high, so we don't
7 | /// want to use them for small values. Instead, keep small values inline.
8 | /// CVRef models this common use case. It is either a value in local memory
9 | /// or a cacheable LVRef. Upon compaction, we heuristically move large data
10 | /// into LVRefs while. Repeated compactions are short-circuited by keeping
11 | /// some size metadata.
12 | type CVRef<'V> =
13 | | Local of 'V * SizeEst
14 | | Remote of LVRef<'V>
15 | override cref.ToString() =
16 | match cref with
17 | | Local (v,_) -> sprintf "`%A" v
18 | | Remote lvref -> lvref.ToString()
19 |
20 | /// Serialization for CVRef
21 | module EncCVRef =
22 | // encoding is `local or {hash}.
23 |
24 | // overlapping the first byte with the RscHash container.
25 | let cLocal = byte '`'
26 | do assert(cLocal <> EncRscHash.cPrefix)
27 |
28 | let write (cV:Codec<'V>) (ref:CVRef<'V>) (dst:ByteDst) : unit =
29 | match ref with
30 | | Local (v,_) ->
31 | ByteStream.writeByte cLocal dst
32 | Codec.write cV v dst
33 | | Remote vref -> EncVRef.write (vref) dst
34 |
35 | let read (cV:Codec<'V>) (db:Stowage) (src:ByteSrc) : CVRef<'V> =
36 | let b0 = ByteStream.readByte src
37 | if (b0 = cLocal) then
38 | let s0 = ByteStream.bytesRem src
39 | let v = Codec.read cV db src
40 | let sf = ByteStream.bytesRem src
41 | Local (v, uint64 (s0 - sf))
42 | else if (b0 = EncRscHash.cPrefix) then
43 | let h = ByteStream.readBytes (RscHash.size) src
44 | let bf = ByteStream.readByte src
45 | if (bf <> EncRscHash.cSuffix) then raise ByteStream.ReadError
46 | Remote (LVRef.wrap (VRef.wrap cV db h))
47 | else raise ByteStream.ReadError
48 |
49 | let compact (thresh:SizeEst) (cV:Codec<'V>) (db:Stowage) (ref:CVRef<'V>) : struct(CVRef<'V> * SizeEst) =
50 | match ref with
51 | | Local (v,szEst) ->
52 | if (szEst < thresh) then struct(ref, 1UL + szEst) else
53 | let struct(v',szV) = Codec.compactSz cV db v
54 | if (szV < thresh) then struct(Local(v',szV), 1UL + szV) else
55 | struct(Remote (LVRef.stow cV db v' szV), EncRscHash.size)
56 | | Remote _ -> struct(ref, EncRscHash.size)
57 |
58 | let codec (thresh:SizeEst) (cV:Codec<'V>) =
59 | { new Codec> with
60 | member __.Write ref dst = write cV ref dst
61 | member __.Read db src = read cV db src
62 | member __.Compact db ref = compact thresh cV db ref
63 | }
64 |
65 |
66 | module CVRef =
67 |
68 | /// Local in-memory value.
69 | ///
70 | /// The first compaction will determine whether this remains local
71 | /// or is stowed remotely. Subsequent compactions of the same size
72 | /// will be short-circuited.
73 | let inline local v = Local (v, System.UInt64.MaxValue)
74 |
75 | /// Remote value reference.
76 | let inline remote r = Remote r
77 |
78 | /// Test whether value is a Stowage reference.
79 | let isRemote ref =
80 | match ref with
81 | | Local _ -> false
82 | | Remote _ -> true
83 |
84 | /// Caching access to value.
85 | let load (ref:CVRef<'V>) : 'V =
86 | match ref with
87 | | Local (v,_) -> v
88 | | Remote r -> LVRef.load r
89 |
90 | /// Non-caching access to value.
91 | let load' (ref:CVRef<'V>) : 'V =
92 | match ref with
93 | | Local (v,_) -> v
94 | | Remote r -> LVRef.load' r
95 |
96 | /// Construct a compacted value directly.
97 | ///
98 | /// This will immediately compact the value in memory, then decide
99 | /// whether to keep it local or stow it to the database based on
100 | /// the estimated size. Even if stowed, the value remains in cache
101 | /// at least briefly.
102 | let inline stow (thresh:SizeEst) (cV:Codec<'V>) (db:Stowage) (v:'V) : CVRef<'V> =
103 | let struct(ref,_) = EncCVRef.compact thresh cV db (local v)
104 | ref
105 |
106 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Cache.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open System.Threading
3 | open System.Threading.Tasks
4 |
5 | /// Abstract cached resource.
6 | ///
7 | /// A cached resource provides both an interface to clear the cache
8 | /// and a 'usage' heuristic - usually a touch count or time stamp -
9 | /// to delay release of a recently used resource.
10 | type Cached =
11 | abstract member Usage : int with get
12 | abstract member Clear : unit -> unit
13 |
14 | /// Stowage Cache
15 | ///
16 | /// Use of Stowage resources can operate a lot like virtual memory,
17 | /// a purely functional variant where stowed elements are loaded and
18 | /// parsed as needed. A Stowage cache exists to manage the resources,
19 | /// simulating the virtual memory paging heuristics.
20 | ///
21 | /// The current implementation uses a concrete heuristic strategy that
22 | /// combines aspects of least-recently-used with exponential-decay. It
23 | /// should be effective for most use cases. Resources are cleared from
24 | /// the .Net task thread pool.
25 | ///
26 | /// This module only provides a manager, not lookup. For cached lookups,
27 | /// consider MCache or DCache.
28 | module Cache =
29 |
30 | type private Rsc = (struct(SizeEst * int * System.WeakReference))
31 | type private Frame = ResizeArray
32 |
33 | // Our simple algorithm for releasing memory.
34 | let private scrubFrame (rng:System.Random) (pdecay:int) (f:Frame) : struct(Frame * SizeEst) =
35 | let mutable erased = 0UL
36 | let newFrame = new Frame()
37 | for (struct(sz,tc,wref)) in f do
38 | match wref.Target with
39 | | null ->
40 | erased <- (erased + sz)
41 | | :? Cached as c ->
42 | let tc' = c.Usage
43 | let doScrub = (tc = tc') && (rng.Next(100) < pdecay)
44 | if doScrub
45 | then c.Clear(); erased <- (erased + sz)
46 | else newFrame.Add(struct(sz,tc',wref))
47 | | _ -> failwith "invalid state"
48 | struct(newFrame,erased)
49 |
50 | /// Concrete cache manager.
51 | ///
52 | /// This object manages a set of Cached resources, clearing some
53 | /// whenever a quota overflows (this is a soft quota). The items
54 | /// cleared are selected based on strategies of least-recently
55 | /// used and exponential decay. This should be good for most use
56 | /// cases, but more sophisticated strategies can be implemented
57 | /// by having Cached items decide what to do upon Clear.
58 | ///
59 | /// Cached items are only referenced weakly, such that GC can
60 | /// remove items independently from the cache manager clearing
61 | /// them. Due to GC, there is no guarantee Clear is called.
62 | type Manager =
63 | val mutable private szMax : SizeEst
64 | val mutable private szCur : SizeEst
65 | val mutable private ixHd : int
66 | val private frames : Frame[]
67 | val private rngSrc : System.Random
68 | val private pdecay : int
69 | val mutable private bgtask : bool
70 | new(framect,pdecay,quota) =
71 | { szMax = quota
72 | szCur = 0UL
73 | ixHd = 0
74 | frames = Array.init (max framect 2) (fun _ -> new Frame())
75 | rngSrc = new System.Random(0)
76 | pdecay = (max pdecay 1)
77 | bgtask = false
78 | }
79 | new(quota) = new Manager(12,60,quota)
80 |
81 | /// Adjust the managed quota.
82 | member m.Resize (quota:SizeEst) : unit =
83 | lock m (fun () ->
84 | m.szMax <- quota
85 | m.ConsiderBGScrub())
86 |
87 | /// Add object for management. When added, a size estimate must
88 | /// also be provided to count against the quota.
89 | member m.Receive (c:Cached) (sz0:SizeEst) : unit =
90 | lock m (fun () ->
91 | let f = m.frames.[m.ixHd]
92 | let sz = 80UL + sz0 // add per-item overhead
93 | let tc = c.Usage + System.Int32.MinValue // logical touch
94 | f.Add(struct(sz,tc,System.WeakReference(c)))
95 | m.szCur <- (m.szCur + sz)
96 | m.ConsiderBGScrub())
97 |
98 | member private m.ConsiderBGScrub() : unit =
99 | if (m.bgtask || (m.szMax >= m.szCur)) then () else
100 | assert(Monitor.IsEntered(m))
101 | m.bgtask <- true
102 | Task.Run(fun () -> m.BGScrub()) |> ignore
103 |
104 | member inline private m.NextFrameIx() =
105 | ((m.ixHd + 1) % (m.frames.Length))
106 |
107 | member private m.BGScrub() : unit =
108 | assert(not (Monitor.IsEntered(m)))
109 | lock (m.frames) (fun () ->
110 | assert(m.bgtask)
111 | lock m (fun () -> m.ixHd <- m.NextFrameIx())
112 | let ixScrub = m.NextFrameIx()
113 | let f = m.frames.[ixScrub]
114 | let struct(f',erased) = scrubFrame (m.rngSrc) (m.pdecay) (f)
115 | m.frames.[ixScrub] <- f'
116 | lock m (fun () ->
117 | assert(m.szCur >= erased)
118 | m.szCur <- (m.szCur - erased)
119 | m.bgtask <- false
120 | m.ConsiderBGScrub()))
121 |
122 | /// Although there are some use-cases for multiple cache managers,
123 | /// it's usually best to just use a global cache manager to match
124 | /// our global heap and OS-provided virtual memory system.
125 | let defaultManager = new Manager(80_000_000UL)
126 |
127 | /// Configure the global Stowage cache size. Default is eighty
128 | /// megabytes. Sizes aren't exact, but are used to estimate when
129 | /// an overflow occurs to drive background expiration of data.
130 | ///
131 | /// Cache size should be larger than the normal working set or
132 | /// we'll suffer cache thrashing. It shouldn't be larger than
133 | /// RAM because Stowage serves as a virtual memory system.
134 | let inline resize sz = defaultManager.Resize sz
135 |
136 | /// Manage a cached resource.
137 | ///
138 | /// Cached objects are held by weak reference, expire only
139 | /// upon cache overflow, and cleared by background thread.
140 | /// The `Clear` method is only called if the object has not
141 | /// been garbage collected, hence is not guaranteed.
142 | ///
143 | /// Size estimates should include rough overheads. They don't
144 | /// need to be exact, only sufficient that we're not overshooting
145 | /// our quotas by too much. Better to err towards high estimates.
146 | let inline receive c sz = defaultManager.Receive c sz
147 |
148 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Codec.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// Estimated Serialization Size (in bytes).
5 | type SizeEst = uint64
6 |
7 | /// Abstract encoder-decoder type for data in Stowage.
8 | ///
9 | /// This uses the stream-oriented read and write from Data.ByteString
10 | /// to avoid constructing too many intermediate buffers. It decodes
11 | /// in context of a DB so we can usefully decode reference values into
12 | /// VRefs, BRefs, or LVRefs.
13 | ///
14 | /// Stowage data structures frequently supports a "compaction" step
15 | /// that rewrites large, stable fragments of a value as VRefs. This
16 | /// is accessible through the Stowage codec, and compaction returns
17 | /// an estimated write size to aid heuristics.
18 | ///
19 | /// Compaction is a special step in context of stowage, whereby the
20 | /// components of a large value are replaced by value refs that can
21 | /// be loaded, cached, and efficiently serialized via secure hashes.
22 | /// For heuristic compaction, a size estimate is also returned.
23 | ///
24 | /// Compaction should ideally be idempotent, but this isn't enforced.
25 | type Codec<'T> =
26 | abstract member Write : 'T -> ByteDst -> unit
27 | abstract member Read : Stowage -> ByteSrc -> 'T
28 | abstract member Compact : Stowage -> 'T -> struct('T * SizeEst)
29 |
30 | module Codec =
31 |
32 | let inline write (c:Codec<'T>) (v:'T) (dst:ByteDst) : unit = c.Write v dst
33 |
34 | let inline read (c:Codec<'T>) (db:Stowage) (src:ByteSrc) : 'T = c.Read db src
35 |
36 | let inline compactSz (c:Codec<'T>) (db:Stowage) (v:'T) : struct('T * SizeEst) = c.Compact db v
37 |
38 | let inline compact (c:Codec<'T>) (db:Stowage) (v:'T) : 'T =
39 | let struct(v',_) = compactSz c db v
40 | v'
41 |
42 | let inline writeBytes (c:Codec<'T>) (v:'T) : ByteString =
43 | ByteStream.write (write c v)
44 |
45 | /// Read full bytestring as value, or raise ByteStream.ReadError
46 | let inline readBytes (c:Codec<'T>) (db:Stowage) (b:ByteString) : 'T =
47 | ByteStream.read (read c db) b
48 |
49 | /// Read full bytestring as value, or return None.
50 | let inline tryReadBytes (c:Codec<'T>) (db:Stowage) (b:ByteString) : 'T option =
51 | try readBytes c db b |> Some
52 | with
53 | | ByteStream.ReadError -> None
54 |
55 | /// Stow a value.
56 | ///
57 | /// Note: You'll have a reference to the resulting RscHash, so
58 | /// you'll need to use db.Decref later, or wrap into VRef. See
59 | /// VRef.stow for a more convenient .Net reference type.
60 | let inline stow (c:Codec<'T>) (db:Stowage) (v:'T) : RscHash =
61 | let result = db.Stow (writeBytes c v)
62 | System.GC.KeepAlive v // prevent GC of VRefs in v during write
63 | result
64 |
65 | /// Load a stowed value from RscHash.
66 | let inline load (c:Codec<'T>) (db:Stowage) (h:RscHash) : 'T =
67 | readBytes c db (db.Load h)
68 |
69 | /// Encode 'Val via 'Rep.
70 | ///
71 | /// This is very useful for building codec combinators. It should be
72 | /// the case that get and set compose to identity, and the translation
73 | /// should ideally be inexpensive in each direction.
74 | let view (cRep : Codec<'Rep>) (get : 'Rep -> 'Val) (set : 'Val -> 'Rep) =
75 | { new Codec<'Val> with
76 | member __.Write v dst = cRep.Write (set v) dst
77 | member __.Read db src = get (cRep.Read db src)
78 | member __.Compact db v =
79 | let struct(rep, szRep) = cRep.Compact db (set v)
80 | struct(get rep, szRep)
81 | }
82 |
83 | /// Boxed Codec, to unify various codec types.
84 | let inline boxed (cV : Codec<'V>) : Codec =
85 | view cV (box<'V>) (unbox<'V>)
86 |
87 | /// Invalid Codec - raises `invalidOp` for every operation.
88 | ///
89 | /// This is intended as a temporary placeholder for construction of
90 | /// recursively structured codecs (for example, the Trie codec).
91 | let invalid : Codec<'Val> =
92 | { new Codec<'Val> with
93 | member __.Write _ _ = invalidOp "invalid Codec cannot write"
94 | member __.Read _ _ = invalidOp "invalid Codec cannot read"
95 | member __.Compact _ _ = invalidOp "invalid Codec cannot compact"
96 | }
97 |
98 |
99 |
--------------------------------------------------------------------------------
/src/Stowage/Data/DurableCache.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// Durable Resource Cache
5 | ///
6 | /// The motive with DCache is to support large-scale memoization
7 | /// for incremental compilation or monadic views. The cached data
8 | /// is held on disk (rather than in memory), and is accessed via
9 | /// ByteString keys. If the cache has reached its quota limits,
10 | /// we'll gradually erase entries using heuristic techniques.
11 | module DCache =
12 |
13 | /// Keys must be ByteStrings. Typically, these represent names or
14 | /// computations (with version information or secure hashes).
15 | type Key = ByteString
16 |
17 | // For simplicity, I rewrite the keys using a secure hash.
18 | //
19 | // This ensures data has uniform depth and distribution, which
20 | // in turn simplifies the exponential decay implementation. It
21 | // also guards against timing attacks on a shared cache.
22 | let inline private mangleKey k = RscHash.hash k |> BS.drop 4
23 |
24 | // Each element will remember its given "memory" size estimate,
25 | // which may be different from encoded size. Clients should use
26 | // use CVRef or other reference type explicitly for larger values.
27 | type private E<'V> = (struct(SizeEst * 'V))
28 |
29 | // How should we represent the cache?
30 | //
31 | // I'm leaning towards an LSM Trie, to support efficient updates
32 | // and lazy deletions. A normal trie would also be acceptable.
33 | //
34 | // Delayed parse might be useful - we could use an intermediate
35 | // MCache for stored data. But it might be better to make this
36 | // explicit in our value types, e.g. using a lazy value encoder.
37 |
38 |
39 | // Our basic storage representation - a sized tree. We need the
40 | // sizes to help drive GC for quota management.
41 | type private StowageRep<'V> =
42 | { data : LSMTrie>
43 | size : uint64 // how much data (total of SizeEst)
44 | count : uint64 // how many keys
45 | }
46 |
47 | let private cRep cV =
48 | let cT = LSMTrie.codec (EncPair.codec' (EncVarNat.codec) cV)
49 | { new Codec> with
50 | member cR.Write r dst =
51 | EncVarNat.write (r.count) dst
52 | EncVarNat.write (r.size) dst
53 | Codec.write cT (r.data) dst
54 | member cR.Read db src =
55 | let count = EncVarNat.read src
56 | let size = EncVarNat.read src
57 | let data = Codec.read cT db src
58 | { count = count; size = size; data = data }
59 | member cR.Compact db r =
60 | let struct(data',szData) = Codec.compactSz cT db (r.data)
61 | let szEst = EncVarNat.size (r.count)
62 | + EncVarNat.size (r.size)
63 | + szData
64 | let r' = { count = (r.count); size = (r.size); data = data' }
65 | struct(r', szEst)
66 | }
67 |
68 |
69 | // thoughts: should I favor an LSM trie or a plain trie? A plain
70 | // trie has advantages of more precise size estimates, but for a
71 | // cache the update performance and working sets from LSM may be
72 | // more valuable.
73 | //
74 | // If I don't secure-hash keys, an IntMap based hashmap may be wise.
75 | //
76 | // I don't want to use a DB TVar directly because I don't want to
77 | // sync every update with other durable DB updates. Instead, I might
78 | // want to write after some threshold is buffered in memory, or after
79 | // several seconds delay. Or some other heuristic. Loss is acceptable.
80 | //
81 | // An interesting point: if Codec changes, I can presumably handle
82 | // this by simply erasing the keys that raised ByteStream.ReadError.
83 | // So cache is highly resilient to changes in type or codec. Similar
84 | // for missing a hash resource.
85 |
86 |
--------------------------------------------------------------------------------
/src/Stowage/Data/FingerTree.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 |
3 | /// A Finger Tree in Stowage
4 | ///
5 | /// The Finger Tree is the 'swiss army knife' of data structures. It
6 | /// supports efficient O(1) operations at each edge. Monoidal summaries
7 | /// can support efficient random access, priority, etc.
8 | ///
9 | /// This finger tree assumes relatively cheap computation for measures,
10 | /// and chooses to recompute rather than cache for local data.
11 | ///
12 | /// TODO: This module is still far from complete.
13 | module FingerTree =
14 |
15 | /// Branches for the 2-3 tree structure.
16 | type B<'V> =
17 | | B2 of 'V * 'V
18 | | B3 of 'V * 'V * 'V
19 |
20 | /// A node is the main element we compact for Stowage.
21 | []
22 | type Node<'V, 'M> =
23 | val M : 'M
24 | val B : CVRef>
25 | new(m,b) = { M = m; B = b }
26 | interface IMeasured<'M> with
27 | member node.Measure = node.M
28 |
29 | /// Digits at each edge of a finger tree.
30 | type D<'V> =
31 | | D1 of 'V
32 | | D2 of 'V * 'V
33 | | D3 of 'V * 'V * 'V
34 | | D4 of 'V * 'V * 'V * 'V
35 |
36 | let private measureD (p:'M -> 'M -> 'M) (m:'V -> 'M) (d:D<'V>) : 'M =
37 | match d with
38 | | D1 (v1) -> m v1
39 | | D2 (v1,v2) -> p (m v1) (m v2)
40 | | D3 (v1,v2,v3) -> p (m v1) (p (m v2) (m v3))
41 | | D4 (v1,v2,v3,v4) -> p (m v1) (p (m v2) (p (m v3) (m v4)))
42 |
43 | let inline private measure (v:'V) : 'M = (v :> IMeasured<'M>).Measure
44 |
45 | type Tree<'V, 'M when 'V :> IMeasured<'M> and 'M :> Monoid<'M> and 'M:(new:unit -> 'M)> =
46 | | Empty
47 | | Single of 'V
48 | | Many of D<'V> * 'M * Tree,'M> * D<'V>
49 | // note: measure in Many is cache only for central FingerTree.
50 | member t.Monoid with get() = MonoidSource<'M>.Instance
51 | interface IMeasured<'M> with
52 | member t.Measure : 'M =
53 | match t with
54 | | Empty -> t.Monoid.Zero
55 | | Single v -> measure v
56 | | Many (pre,mt,t,suf) ->
57 | let p = t.Monoid.Plus
58 | let m d = measureD p measure d
59 | p (m pre) (p mt (m suf))
60 |
61 |
62 |
--------------------------------------------------------------------------------
/src/Stowage/Data/LVRef.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// Value References with cache-driven 'latency' upon stow and load.
5 | ///
6 | /// When first stowing data, we treat the cache as a buffer, delaying
7 | /// serialization until space is required. When loading data, we will
8 | /// cache data in the reference and hold it in memory until space is
9 | /// required again.
10 | ///
11 | /// Note: LVRef does not attempt sharing of cached resources. That is,
12 | /// there is no equivalent to an ephemeron table.
13 | type LVRef<'V> = VRef<'V>
14 |
15 | module LVRef =
16 |
17 | // I assume some overheads when parsing a value, but it isn't
18 | // clear the size of a value in .Net memory. For now, just using
19 | // an assumption of 50% compression and some base overheads.
20 | let inline private memSize (sz:SizeEst) : SizeEst =
21 | 200UL + (sz <<< 1)
22 |
23 | type private R<'V> =
24 | inherit VRef<'V>
25 | val mutable lvref : Lazy>
26 | val mutable cache : (struct('V * SizeEst)) option
27 | val mutable tc : int
28 | member inline r.Touch() = r.tc <- (r.tc + 1)
29 | member private r.Load() = lock r (fun () ->
30 | match r.cache with
31 | | Some vsz -> r.Touch(); vsz // benign race to load data
32 | | None ->
33 | let vsz = r.lvref.Value.Deref()
34 | r.cache <- Some vsz
35 | let struct(_,sz) = vsz
36 | Cache.receive (r :> Cached) (memSize sz)
37 | vsz
38 | ) // end lock
39 | override r.Addr with get() = r.lvref.Value.Addr
40 | override r.Deref() =
41 | match r.cache with
42 | | Some v -> r.Touch(); v
43 | | None -> r.Load()
44 | interface Cached with
45 | member r.Usage with get() = r.tc
46 | member r.Clear() =
47 | r.lvref.Force() |> ignore> // move data to Stowage
48 | lock r (fun () -> r.cache <- None) // clear data from cache
49 | new(lvref,cache) =
50 | { inherit VRef<'V>()
51 | lvref = lvref
52 | cache = cache
53 | tc = 0
54 | }
55 |
56 | /// Wrap an existing VRef, adding a cache layer.
57 | let wrap (vref:VRef<'V>) : LVRef<'V> =
58 | let lvref = lazy vref
59 | lvref.Force() |> ignore>
60 | (new R<'V>(lvref,None)) :> LVRef<'V>
61 |
62 | /// Non-buffered, immediate stowage, with caching on load.
63 | let inline stow' (cV:Codec<'V>) (db:Stowage) (v:'V) : LVRef<'V> =
64 | wrap (VRef.stow cV db v)
65 |
66 | /// Stow a value lazily, when there is memory pressure in cache or
67 | /// when the RscHash address is first required. Until stowage, the
68 | /// value will dereference using the given value and size estimate.
69 | let stow (c:Codec<'V>) (db:Stowage) (v:'V) (sz:SizeEst) : LVRef<'V> =
70 | let ref = new R<'V>(lazy (VRef.stow c db v), Some (struct(v,sz)))
71 | Cache.receive (ref :> Cached) (memSize sz)
72 | ref :> LVRef<'V>
73 |
74 | /// Non-caching Load.
75 | ///
76 | /// When applied to a wrapped LVRef, this will use the cached
77 | /// data if available, or otherwise loads without caching. If
78 | /// applied to other VRef types, simply loads normally.
79 | let load' (ref:LVRef<'V>) : 'V =
80 | match ref with
81 | | :? R<'V> as r ->
82 | match r.cache with
83 | | Some (struct(v,_)) -> v // use cached value
84 | | None -> VRef.load (r.lvref.Value) // load without caching
85 | | _ -> VRef.load ref // not an LVRef, just load directly
86 |
87 | /// Load a value, caching it briefly for future lookups. (Default Deref()).
88 | ///
89 | /// If the value was already cached, or has never been fully stowed,
90 | /// this will reuse the cached value and extend the lifespan of the
91 | /// cache. Otherwise it will load, parse, and cache the value.
92 | let inline load (ref:LVRef<'V>) : 'V = VRef.load ref
93 |
94 | /// Compared to EncVRef, EncLVRef will perform LVRef.wrap upon read.
95 | module EncLVRef =
96 | let size = EncVRef.size
97 | let inline write (ref:LVRef<_>) (dst:ByteDst) : unit =
98 | EncVRef.write ref dst
99 | let inline read (cV:Codec<'V>) (db:Stowage) (src:ByteSrc) : LVRef<'V> =
100 | LVRef.wrap (EncVRef.read cV db src)
101 | let codec (cV:Codec<'V>) =
102 | { new Codec> with
103 | member __.Write ref dst = write ref dst
104 | member __.Read db src = read cV db src
105 | member __.Compact _ ref = struct(ref, size)
106 | }
107 |
108 |
109 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Measured.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 |
3 | /// IMeasured indicates a value can be summarized or measured by
4 | /// another value of type M. In general, the summary should be
5 | /// much smaller than the value being summarized!
6 | type IMeasured<'M> =
7 | abstract member Measure : 'M
8 |
9 |
--------------------------------------------------------------------------------
/src/Stowage/Data/MemoryCache.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 |
3 | open System.Collections.Generic
4 |
5 | module MCache =
6 |
7 |
8 | /// A Memory Cache is essentially an unreliable Dictionary. This
9 | /// is unreliable because elements in the table may be eventually
10 | /// deleted by a Stowage.Cache manager in the background. This is
11 | /// useful for resource lookups, when resources can be reloaded
12 | /// or regenerated if not present.
13 | ///
14 | /// Note: Use of a Lazy<'V> types is appropriate in some cases,
15 | /// to enable use of `tryAdd` without race conditions on load.
16 | type C<'K,'V when 'K : equality> =
17 | val internal M : Cache.Manager
18 | val internal D : Dictionary<'K,E<'K,'V>>
19 | new(cm:Cache.Manager, eq:IEqualityComparer<'K>) =
20 | { D = new Dictionary<'K,E<'K,'V>>(eq)
21 | M = cm
22 | }
23 | new() =
24 | let cm = Cache.defaultManager
25 | let eq = EqualityComparer<'K>.Default
26 | new C<'K,'V>(cm,eq)
27 | // cached element type
28 | and internal E<'K,'V when 'K : equality> =
29 | val K : 'K
30 | val V : 'V
31 | val C : C<'K,'V>
32 | val mutable TC : int
33 | new(k,v,c) = { K = k; V = v; C = c; TC = 0 }
34 | member e.Touch() = e.TC <- (e.TC + 1)
35 | interface Cached with
36 | member e.Usage with get() = e.TC
37 | member e.Clear() = lock (e.C.D) (fun () ->
38 | e.C.D.Remove(e.K) |> ignore)
39 |
40 |
41 | /// Attempt to load data from the cache. Thread-safe.
42 | ///
43 | /// There is no guarantee the key is present, even if recently
44 | /// added, due to background management of the cache.
45 | let tryFind (k:'K) (c:C<'K,'V>) : 'V option =
46 | lock (c.D) (fun () ->
47 | match c.D.TryGetValue(k) with
48 | | true,e -> e.Touch(); Some (e.V)
49 | | _ -> None)
50 |
51 | /// Add and return data if key is new, otherwise return existing
52 | /// data. Atomic. Thread-safe. Consider use of Lazy<'V> type to
53 | /// delay value-load operations.
54 | let tryAdd (k:'K) (v:'V) (sz:SizeEst) (c:C<'K,'V>) : 'V =
55 | lock (c.D) (fun () ->
56 | match c.D.TryGetValue(k) with
57 | | true,e -> e.Touch(); e.V
58 | | _ ->
59 | let e = new E<'K,'V>(k,v,c)
60 | c.D.Add(k,e)
61 | c.M.Receive (e :> Cached) sz
62 | e.V)
63 |
64 | /// Add data, replacing existing object in cache. Thread-safe.
65 | let add (k:'K) (v:'V) (sz:SizeEst) (c:C<'K,'V>) : unit =
66 | let e = new E<'K,'V>(k,v,c)
67 | lock (c.D) (fun () -> c.D.Add(k,e))
68 | c.M.Receive (e :> Cached) sz
69 |
70 | /// Remove specified key data from cache. Thread-safe.
71 | let remove (k:'K) (c:C<'K,'V>) : unit =
72 | lock (c.D) (fun () ->
73 | c.D.Remove(k) |> ignore)
74 |
75 | /// Remove all keys from cache. Thread-safe.
76 | let clear (c:C<'K,'V>) : unit =
77 | lock (c.D) (fun () -> c.D.Clear())
78 |
79 |
80 |
81 | type MCache<'K,'V when 'K : equality> = MCache.C<'K,'V>
82 |
83 |
84 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Monoid.fs:
--------------------------------------------------------------------------------
1 |
2 | namespace Stowage
3 |
4 | type Monoid<'M> =
5 | abstract member Zero : 'M
6 | abstract member Plus : 'M -> 'M -> 'M
7 |
8 | /// Monoid singletons on demand.
9 | type MonoidSource<'M when 'M :> Monoid<'M> and 'M:(new: unit -> 'M)> private () =
10 | static let zero : 'M = (new 'M() :> Monoid<'M>).Zero
11 | static member Instance = (zero :> Monoid<'M>)
12 |
13 |
--------------------------------------------------------------------------------
/src/Stowage/Data/README.md:
--------------------------------------------------------------------------------
1 | # Stowage.Data
2 |
3 | This module aims to provide useful data structures above Stowage. It also includes Stowage concepts:
4 |
5 | * `Stowage` - remote value storage by secure hash
6 | * `RscHash` - concrete secure hash function
7 | * `Codec` - interpret binary data as values
8 | * `DB` - durable software transactional memory
9 | * `VRef` - remote value reference
10 | * `LVRef` - VRef with caching, delayed write
11 | * `CVRef` - LVRef but uses memory for small values
12 | * `IntMap` - sparse associative array, indexed by uint64
13 | * `Trie` - tree with binary keys, prefix sharing
14 | * `LSMTrie` - trie with write buffering
15 |
16 | TODO:
17 |
18 | * finger-tree sequences for vectors, deques
19 | * SeqHash or
20 |
21 | I'm also interested in developing the SeqHash structure, which is roughly a history-independent finger-tree sequence. But that's low priority. I don't fully grok the details yet.
22 |
23 |
--------------------------------------------------------------------------------
/src/Stowage/Data/RscHash.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 | open Konscious.Security.Cryptography
4 |
5 | /// A RscHash is simply a secure hash of fixed size encoded using
6 | /// an unusual base32 alphabet: bcdfghjklmnpqrstBCDFGHJKLMNPQRST.
7 | ///
8 | /// This string is computed from the Blake2B 320-bit secure hash
9 | /// of a binary, and is used as a name or capability to read a
10 | /// binary resource.
11 | ///
12 | /// Note on 10/27/2017: changed to 320 bits.
13 | type RscHash = ByteString
14 |
15 | module RscHash =
16 | /// the base32 alphabet used for Stowage hash references.
17 | let alphabet = "bcdfghjklmnpqrstBCDFGHJKLMNPQRST"
18 |
19 | /// number of bits encoded in hash
20 | let hashBitLen : int = 320
21 |
22 | let inline private hdiv n =
23 | assert (0 = (hashBitLen % n))
24 | (hashBitLen / n)
25 |
26 | /// number of base32 ASCII characters in hash
27 | let size : int = hdiv 5
28 |
29 | /// expected length of hash in bytes
30 | let private hashByteLen : int = hdiv 8
31 |
32 | // alphabet encoded as bytes array
33 | let private alphabyte : byte[] =
34 | let s = System.Text.Encoding.UTF8.GetBytes(alphabet)
35 | assert (32 = s.Length)
36 | s
37 |
38 | // table lookup for presence of data
39 | let private alphabool : bool[] =
40 | let memb x = Array.exists (int >> ((=) x)) alphabyte
41 | Array.init 256 memb
42 |
43 | // test whether an element is valid within a UTF8 or ASCII hash.
44 | let isHashByte (b : byte) : bool = alphabool.[int b]
45 |
46 | // encode forty bits from src to dst.
47 | let inline private b32e40 (dst : byte[]) (src : byte[]) off =
48 | let dst_off = (off * 8)
49 | let src_off = (off * 5)
50 | let inline r ix = src.[src_off + ix]
51 | let inline w ix v = dst.[dst_off + ix] <- alphabyte.[int v]
52 | // read forty bits of data
53 | let i4 = r 4
54 | let i3 = r 3
55 | let i2 = r 2
56 | let i1 = r 1
57 | let i0 = r 0
58 | // encode data into eight bytes
59 | do w 7 (((i4 &&& 0x1Fuy) ))
60 | do w 6 (((i4 &&& 0xE0uy) >>> 5) |||
61 | ((i3 &&& 0x03uy) <<< 3))
62 | do w 5 (((i3 &&& 0x7Cuy) >>> 2))
63 | do w 4 (((i3 &&& 0x80uy) >>> 7) |||
64 | ((i2 &&& 0x0Fuy) <<< 1))
65 | do w 3 (((i2 &&& 0xF0uy) >>> 4) |||
66 | ((i1 &&& 0x01uy) <<< 4))
67 | do w 2 (((i1 &&& 0x3Euy) >>> 1))
68 | do w 1 (((i1 &&& 0xC0uy) >>> 6) |||
69 | ((i0 &&& 0x07uy) <<< 2))
70 | do w 0 (((i0 &&& 0xF8uy) >>> 3))
71 |
72 | // perform a base32 encoding of the Blake2 hash.
73 | let private b32enc (src : byte[]) : byte[] =
74 | assert ((40 = src.Length) && (64 = size))
75 | let dst = Array.zeroCreate size
76 | do b32e40 dst src 7
77 | do b32e40 dst src 6
78 | do b32e40 dst src 5
79 | do b32e40 dst src 4
80 | do b32e40 dst src 3
81 | do b32e40 dst src 2
82 | do b32e40 dst src 1
83 | do b32e40 dst src 0
84 | dst
85 |
86 | /// basic bytestring hash
87 | let hash (s : ByteString) : ByteString =
88 | use alg = new HMACBlake2B(hashBitLen)
89 | let bytes = alg.ComputeHash(s.UnsafeArray, s.Offset, s.Length)
90 | BS.unsafeCreateA (b32enc bytes)
91 |
92 | /// Fold over RscHash dependencies represented within a value.
93 | ///
94 | /// Find substrings that look like hashes - appropriate size and
95 | /// character set, separated by non-hash characters. Useful for
96 | /// conservative GC of resources.
97 | let rec foldHashDeps (fn : 's -> RscHash -> 's) (s:'s) (v:ByteString) : 's =
98 | if (v.Length < size) then s else
99 | let hv' = BS.dropWhile (not << isHashByte) v
100 | let struct(h,v') = BS.span isHashByte hv'
101 | let s' = if (size = h.Length) then (fn s h) else s
102 | foldHashDeps fn s' v'
103 |
104 | /// Iterate through RscHash dependencies in a value.
105 | /// Recognizes same RscHash substrings as foldHashDeps.
106 | let rec iterHashDeps (fn : RscHash -> unit) (v:ByteString) : unit =
107 | if (v.Length < size) then () else
108 | let hv' = BS.dropWhile (not << isHashByte) v
109 | let struct(h,v') = BS.span isHashByte hv'
110 | if (size = h.Length)
111 | then fn h
112 | iterHashDeps fn v'
113 |
114 | // for Seq.unfold, which is more efficient than seq {}.
115 | let rec private stepHashDeps (v:ByteString) : (RscHash * ByteString) option =
116 | if (v.Length < size) then None else
117 | let hv' = BS.dropWhile (not << isHashByte) v
118 | let struct(h,v') = BS.span isHashByte hv'
119 | if (size = BS.length h) then Some(h,v') else
120 | stepHashDeps v'
121 |
122 | let seqHashDeps (v:ByteString) : seq =
123 | Seq.unfold stepHashDeps v
124 |
125 | /// Test whether a ByteString matches format of RscHash.
126 | let isValidHash (h:ByteString) : bool =
127 | (size = BS.length h) && (BS.forall isHashByte h)
128 |
129 |
130 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Seq.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// A finger-tree based sequence, modeled above Stowage.
5 | ///
6 | /// Finger-trees are an interesting persistent data structure that offer
7 | /// O(lg(min(K,N-K))) access to the Kth of N elements. This reduces to
8 | /// O(1) access to the first or last few elements. Similar logarithmic
9 | /// properties apply to splitting or concatenation of the finger-trees.
10 | ///
11 | /// Consequently, finger-trees can be used for: queues, stacks, dequeues,
12 | /// lists or vectors, ropes, and many things that can be built above a
13 | /// sequence.
14 | ///
15 | /// Finger trees do have weaknesses: weak memory locality, and history
16 | /// dependent structure. They're useful despite these limitations, but in
17 | /// some cases a more specialized structure might be better.
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Stowage.Data.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/src/Stowage/Data/Stowage.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// Abstract Remote Storage for Binaries referenced by RscHash.
5 | ///
6 | /// Use of secure hashes is a convenient way to reference binary data.
7 | /// They are immutable and acyclic by construction, cacheable, secure,
8 | /// provider-independent, self-authenticating, implicitly shared,
9 | /// automatically named, decentralized, uniformly sized, and smaller
10 | /// than many full URLs or file paths.
11 | ///
12 | /// The idea for Stowage is to build data structures above binaries,
13 | /// in order to represent larger-than-memory data and distributed,
14 | /// purely functional computations. Our stowage layer doubles as a
15 | /// purely functional variant of "virtual memory", since we can move
16 | /// data we won't soon need into higher-latency storage - an external
17 | /// database or filesystem or network.
18 | ///
19 | /// Stowage resources may ultimately be GC'd like normal values, and
20 | /// we can easily use conservative GC for references between binaries.
21 | /// Stowage is usually one aspect of a durable database that determines
22 | /// a durable "root set" for GC. Unlike most databases, Stowage makes
23 | /// it feasible to model entire databases as first-class values within
24 | /// another database - convenient for modeling versioned systems.
25 | ///
26 | type Stowage =
27 |
28 | /// The Stow operation should add a value to the Stowage database
29 | /// and return its RscHash, such that a subsequent Load can access
30 | /// the data. Additionally, it must atomically Incref the RscHash
31 | /// to prevent concurrent GC.
32 | ///
33 | /// Stowage systems should support values of up to 64MB. Most items
34 | /// should be much smaller, at most a few hundred kilobytes. If the
35 | /// item is too large, an appropriate exception should be raised.
36 | ///
37 | /// NOTE: resources containing sensitive data should include a salt,
38 | /// e.g. an 80-bit random string for entropy. Otherwise, an attacker
39 | /// can construct millions of likely hashes and test whether each is
40 | /// present within the system.
41 | abstract member Stow : ByteString -> RscHash
42 |
43 | /// The Load operation should access data from Stowage. If this
44 | /// data cannot be located, a MissingRsc exception must be raised.
45 | ///
46 | /// There is no access control for Stowage, but the RscHash serves
47 | /// as a secure bearer token and read capability. Consequently, it
48 | /// is important that implementations don't expose the full RscHash
49 | /// through timing attacks. (Exposing the first half is acceptable.)
50 | abstract member Load : RscHash -> ByteString
51 |
52 | /// RscHash references to binaries can be understood as a form of
53 | /// unmanaged resource from perspective of our .Net runtime. But
54 | /// a simple reference counting interface can guard hashes in .Net
55 | /// memory from a premature GC. Upon stowage, Incref is implicit.
56 | /// Usually, decref will be performed by .Net finalizer (see VRef).
57 | abstract member Decref : RscHash -> unit
58 | abstract member Incref : RscHash -> unit
59 |
60 | /// Exception on Load failure.
61 | exception MissingRsc of Stowage * RscHash
62 |
63 | // TODO: Develop a useful set of Stowage combinators. (Low Priority.)
64 | // layered, cached, mirrored, distributed hashtables...
65 |
66 | module Stowage =
67 |
68 | /// Fake Stowage. Drops everything you give it!
69 | ///
70 | /// Use cases: testing, default 'null' object, or computing an
71 | /// ETAG based on logical compaction of a large data structure.
72 | let fake : Stowage =
73 | { new Stowage with
74 | member __.Stow s = RscHash.hash s
75 | member db.Load h = raise (MissingRsc (db,h))
76 | member __.Decref _ = ()
77 | member __.Incref _ = ()
78 | }
79 |
80 | // when tracking deps, don't want to implicitly hold a huge array
81 | // of origin data in memory, so we'll trimBytes first.
82 | let private depcons lst rsc = (BS.trimBytes rsc) :: lst
83 |
84 | // step through dependencies. for use with Seq.unfold
85 | let rec private streamStep (db:Stowage) (struct(hist,rs)) =
86 | match rs with
87 | | (rsc::remrs) ->
88 | if Set.contains rsc hist then streamStep db (struct(hist,remrs)) else
89 | let hist' = Set.add rsc hist
90 | let data =
91 | try Some (db.Load rsc)
92 | with
93 | | MissingRsc _ -> None
94 | let rs' =
95 | match data with
96 | | Some bytes -> RscHash.foldHashDeps depcons remrs bytes
97 | | None -> remrs
98 | Some((rsc,data),struct(hist',rs'))
99 | | [] -> None
100 |
101 | /// Stream all Stowage data from a given set of roots. This is
102 | /// mostly intended for import/export operations. Missing entries
103 | /// are possible due to Stowage state or false positives (we use
104 | /// a conservative algorithm to recognize references). In those
105 | /// cases, we'll report `None` in the data field.
106 | let streamDeps (db:Stowage) (roots:RscHash list) : seq =
107 | Seq.unfold (streamStep db) (struct(Set.empty,roots))
108 |
109 |
110 |
--------------------------------------------------------------------------------
/src/Stowage/Data/VDiff.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 |
3 | /// Value differences for key-indexed values.
4 | type VDiff<'V> =
5 | | InL of 'V // value in left
6 | | InR of 'V // value in right
7 | | InB of 'V * 'V // two values with equality failure
8 |
9 |
--------------------------------------------------------------------------------
/src/Stowage/Data/VRef.fs:
--------------------------------------------------------------------------------
1 | namespace Stowage
2 | open Data.ByteString
3 |
4 | /// A VRef is an abstract reference to a value whose binary
5 | /// representation is held in Stowage, addressed by secure
6 | /// hash. The abstract nature is to separate caching, GC,
7 | /// lazy serialization, and similar features. To simplify
8 | /// caching, Deref also returns a serialized size estimate.
9 | ///
10 | /// Comparison of VRefs is based on type and secure hash, so
11 | /// we assume a stable codec per type (at least within context
12 | /// of the comparisons). The ToString method simply returns the
13 | /// "{secureHash}".
14 | []
15 | type VRef<'V>() =
16 | abstract member Addr : RscHash with get
17 | abstract member Deref : unit -> struct('V * SizeEst)
18 |
19 | override r.ToString() = sprintf "{%s}" (BS.toString r.Addr)
20 | override r.GetHashCode() = ByteString.Hash32 (r.Addr) |> int
21 | override x.Equals yobj =
22 | match yobj with
23 | | :? VRef<'V> as y -> System.Object.ReferenceEquals(x,y) || (x.Addr = y.Addr)
24 | | _ -> false
25 | interface System.IComparable with
26 | member x.CompareTo yobj =
27 | match yobj with
28 | | :? VRef<'V> as y ->
29 | if System.Object.ReferenceEquals(x,y) then 0 else
30 | ByteString.Compare (x.Addr) (y.Addr)
31 | | _ -> invalidArg "yobj" "comparing values of different types"
32 |
33 | module VRef =
34 |
35 | /// Create VRef by packaging Codec, Stowage DB, and RscHash.
36 | /// Assumes caller is passing ownership, so for GC we will
37 | /// call Stowage Decref on the hash when done with the VRef.
38 | /// This is the "simplest" VRef, without any caching.
39 | let wrap' (c:Codec<'V>) (db:Stowage) (h0:RscHash) : VRef<'V> =
40 | assert(BS.length h0 = RscHash.size) // partial validation
41 | let h = BS.trimBytes h0
42 | { new VRef<'V>() with
43 | override __.Addr with get() = h
44 | override ref.Deref() =
45 | let bytes = db.Load h
46 | let szest = uint64 (BS.length bytes)
47 | let value = Codec.readBytes c db bytes
48 | System.GC.KeepAlive(ref)
49 | struct(value,szest)
50 | override __.Finalize() = db.Decref h
51 | }
52 |
53 | /// As wrap', but will first Incref the RscHash to resist premature
54 | /// garbage collection of the referenced data.
55 | let inline wrap (c:Codec<'V>) (db:Stowage) (h:RscHash) : VRef<'V> =
56 | db.Incref h
57 | wrap' c db h
58 |
59 | /// Create VRef by eagerly Stowing a value.
60 | let inline stow (c:Codec<'V>) (db:Stowage) (v:'V) : VRef<'V> =
61 | wrap' c db (Codec.stow c db v)
62 |
63 | /// load the VRef's data from Stowage (ignoring the SizeEst).
64 | let inline load (ref:VRef<'V>) : 'V =
65 | let struct(value,_) = ref.Deref()
66 | value
67 |
68 | module EncRscHash =
69 | let cPrefix = byte '{'
70 | let cSuffix = byte '}'
71 | let size = uint64 (2 + RscHash.size)
72 | let write (h:RscHash) (dst:ByteDst) =
73 | assert(h.Length = RscHash.size)
74 | ByteStream.writeByte cPrefix dst
75 | ByteStream.writeBytes h dst
76 | ByteStream.writeByte cSuffix dst
77 | let read (src:ByteSrc) : RscHash =
78 | let bPrefix = ByteStream.readByte src
79 | if (bPrefix <> cPrefix) then raise ByteStream.ReadError
80 | let h = ByteStream.readBytes (RscHash.size) src
81 | let bSuffix = ByteStream.readByte src
82 | if (bSuffix <> cSuffix) then raise ByteStream.ReadError
83 | h
84 |
85 |
86 | module EncVRef =
87 | let size = EncRscHash.size
88 | let inline write (ref:VRef<_>) (dst:ByteDst) : unit =
89 | EncRscHash.write (ref.Addr) dst
90 | let inline read (cV:Codec<'V>) (db:Stowage) (src:ByteSrc) : VRef<'V> =
91 | let h = EncRscHash.read src
92 | VRef.wrap cV db h
93 | let codec (cV:Codec<'V>) =
94 | { new Codec> with
95 | member __.Write ref dst = write ref dst
96 | member __.Read db src = read cV db src
97 | member __.Compact _ ref = struct(ref, size)
98 | }
99 |
100 |
--------------------------------------------------------------------------------
/src/Stowage/RCTable.fs:
--------------------------------------------------------------------------------
1 |
2 | namespace Stowage
3 |
4 | open System
5 | open System.Security
6 | open System.Runtime.InteropServices
7 |
8 | /// Reference Tracking
9 | ///
10 | /// The stowage system tracks secure hash references from .Net memory
11 | /// using a table with reference counts. Resources referenced by this
12 | /// table should not be GC'd from stowage even if they lack persistent
13 | /// roots. The decref will usually be performed via .Net finalizers.
14 | [< SecuritySafeCriticalAttribute >]
15 | module internal RCTable =
16 | // The current implementation is a hierarchical hashtable with
17 | // 64-bit entries where keys use 58 bits and reference counts
18 | // are 6 bits.
19 | //
20 | // The 'hierarchical' aspect solves the problem of saturating
21 | // a small 6-bit refct. If we surpass the max refct, we'll add
22 | // the item to a child table representing a higher 'digit'. We
23 | // can borrow from this digit upon decref. So a few layers of
24 | // table essentially give us extra reference count bits.
25 | //
26 | // This design is thus best for cases where most items have small
27 | // reference counts, which is a reasonable assumption for most
28 | // use cases.
29 | //
30 | // Meanwhile, the 58-bit IDs provide reasonable resistance to
31 | // hash collisions. We'll still likely have a few collisions in
32 | // a billion references. But for Stowage, we shouldn't have a
33 | // billion refs in memory (since that would cost a hundred gigs)
34 | // and a few collisions isn't going to hurt much.
35 | []
36 | type Elem =
37 | val v : uint64
38 | static member rcBits : int = 6
39 | static member idBits : int = (64 - Elem.rcBits)
40 | static member rcMask : uint64 = (1UL <<< Elem.rcBits) - 1UL
41 | static member rcMax : uint16 = uint16 Elem.rcMask
42 | static member idMask : uint64 = (1UL <<< Elem.idBits) - 1UL
43 | new(ev : uint64) = { v = ev }
44 | new(id : uint64, rc : uint16) =
45 | assert(Elem.rcMask >= (uint64 rc))
46 | Elem((id <<< Elem.rcBits) ||| (uint64 rc))
47 | member inline e.rc with get() : uint16 = uint16 (e.v &&& Elem.rcMask)
48 | member inline e.id with get() : uint64 = (e.v >>> Elem.rcBits)
49 | static member size : int = 8
50 |
51 | let inline elemOff (ix:nativeint) : nativeint = ((nativeint Elem.size) * ix)
52 | let inline getElemAt (p:nativeint) (ix:nativeint) : Elem =
53 | // wat? where is ReadUInt64?
54 | let i64 = Marshal.ReadInt64(p + (elemOff ix))
55 | Elem(uint64 i64)
56 | let inline setElemAt (p:nativeint) (ix:nativeint) (e:Elem) =
57 | Marshal.WriteInt64(p + (elemOff ix), int64 e.v)
58 | let inline clearElemAt p ix = setElemAt p ix (Elem(0UL))
59 |
60 | let allocData (sz:int) : nativeint =
61 | assert(sz >= 4)
62 | let ct = (1n <<< sz)
63 | let p = Marshal.AllocHGlobal (elemOff ct)
64 | // initialize the memory
65 | let rec loop ix =
66 | clearElemAt p ix
67 | if(0n <> ix) then loop (ix - 1n)
68 | loop (ct - 1n)
69 | p
70 |
71 | // Our table is represented in unmanaged memory in order to guarantee
72 | // it may scale proportional to address space. Geometric growth, powers
73 | // of two sizes, never empty. At most 2/3 fill. The initial size is
74 | // a few kilobytes, enough to track a few hundred items.
75 | //
76 | // This table assumes single-threaded access, and should be locked for
77 | // use from any multi-threaded context.
78 | []
79 | type Table =
80 | val mutable private Data : nativeint
81 | val mutable private Size : int
82 | val mutable private Fill : nativeint
83 | val mutable private Next : Table // next RC digit or null
84 |
85 | // "digit" size is less than rcMax to provide a small buffer
86 | // between incref and decref touching the `Next` table.
87 | static member private Digit : uint16 =
88 | let buffer = 7us
89 | assert(Elem.rcMax > (2us * buffer))
90 | (Elem.rcMax - buffer)
91 |
92 | private new(sz:int) =
93 | { Data = allocData sz
94 | Size = sz
95 | Fill = 0n
96 | Next = null
97 | }
98 | new() = new Table(10)
99 |
100 | override tbl.Finalize() =
101 | Marshal.FreeHGlobal(tbl.Data)
102 | tbl.Data <- 0n
103 |
104 | // test for finalization for safer shutdown
105 | member inline private tbl.Finalized() : bool =
106 | (0n = tbl.Data)
107 |
108 | // find returns struct(index * refct).
109 | //
110 | // This assumes the tbl.Fill < tbl.Size and hence we have some
111 | // zero elements. If the identifier isn't part of our table, we
112 | // return the appropriate location (with refct 0)
113 | member private tbl.Find (idFull:uint64) : struct(nativeint * uint16) =
114 | let id = (idFull &&& Elem.idMask)
115 | let mask = ((1n <<< tbl.Size) - 1n)
116 | let rec loop ix =
117 | let e = getElemAt (tbl.Data) ix
118 | if ((e.id = id) || (0us = e.rc)) then struct(ix,e.rc) else
119 | loop ((1n + ix) &&& mask)
120 | loop ((nativeint id) &&& mask)
121 |
122 | /// Test whether ID is present within table.
123 | member tbl.Contains (id:uint64) : bool =
124 | if(tbl.Finalized()) then false else
125 | let struct(_,rc) = tbl.Find id
126 | (0us <> rc)
127 |
128 | // get the index, ignore the refct
129 | member inline private tbl.IndexOf id =
130 | let struct(ix,_) = tbl.Find id
131 | ix
132 |
133 | static member private MaxSize : int =
134 | (8 * (System.IntPtr.Size - 1))
135 |
136 | // grow table; geometric growth
137 | member private tbl.Grow () : unit =
138 | if (tbl.Finalized())
139 | then invalidOp "incref after finalize"
140 | let old_size = tbl.Size
141 | let old_data = tbl.Data
142 | if (old_size >= Table.MaxSize)
143 | then raise (System.OutOfMemoryException())
144 | let new_size = (old_size + 1)
145 | let new_data = allocData new_size
146 | tbl.Data <- new_data
147 | tbl.Size <- new_size
148 | let rec loop ix =
149 | let e = getElemAt old_data ix
150 | if (0us <> e.rc)
151 | then setElemAt (tbl.Data) (tbl.IndexOf (e.id)) e
152 | if (0n = ix) then () else loop (ix - 1n)
153 | loop ((1n <<< old_size) - 1n)
154 | Marshal.FreeHGlobal(old_data)
155 |
156 | member inline private tbl.Reserve() : unit =
157 | let overfilled = ((tbl.Fill * 3n) >>> (1 + tbl.Size)) <> 0n
158 | if overfilled then tbl.Grow()
159 |
160 | /// Add ID to the table, or incref existing ID
161 | member tbl.Incref (id:uint64) : unit =
162 | tbl.Reserve()
163 | let struct(ix,rc) = tbl.Find id
164 | if (0us = rc) then
165 | setElemAt (tbl.Data) ix (Elem(id, 1us))
166 | tbl.Fill <- (tbl.Fill + 1n)
167 | else if(Elem.rcMax = rc) then
168 | if (null = tbl.Next)
169 | then tbl.Next <- new Table(7)
170 | tbl.Next.Incref id
171 | setElemAt (tbl.Data) ix (Elem(id,(Elem.rcMax - Table.Digit) + 1us))
172 | else
173 | setElemAt (tbl.Data) ix (Elem(id, rc + 1us))
174 |
175 | member tbl.Decref (id:uint64) : unit =
176 | if(tbl.Finalized()) then () else
177 | let struct(ix,rc) = tbl.Find id
178 | if (1us = rc) then
179 | let delete = (null = tbl.Next) || not (tbl.Next.Contains id)
180 | if delete then tbl.Delete ix else
181 | // borrow Table.Digit from next table
182 | tbl.Next.Decref id
183 | setElemAt (tbl.Data) ix (Elem(id, Table.Digit))
184 | else if(0us = rc) then invalidOp "refct already zero!"
185 | else setElemAt (tbl.Data) ix (Elem(id, rc - 1us))
186 |
187 | // clear the specified index, then walk the hashtable to shift
188 | // potential linear-collision items into appropriate locations
189 | member private tbl.Delete (ixDel:nativeint) : unit =
190 | clearElemAt (tbl.Data) ixDel
191 | tbl.Fill <- (tbl.Fill - 1n)
192 | let mask = ((1n <<< tbl.Size) - 1n)
193 | let rec loop ix =
194 | let e = getElemAt (tbl.Data) ix
195 | if (0us = e.rc) then () else
196 | let ix' = tbl.IndexOf (e.id)
197 | if (ix <> ix')
198 | then clearElemAt (tbl.Data) ix
199 | setElemAt (tbl.Data) ix' e
200 | loop ((1n + ix) &&& mask)
201 | loop ((1n + ixDel) &&& mask)
202 |
203 |
204 |
--------------------------------------------------------------------------------
/src/Stowage/README.md:
--------------------------------------------------------------------------------
1 | # Stowage
2 |
3 | This package provides an LMDB-backed implementation for the abstract `Stowage` and `DB` types from the `Stowage.Data` package. This enables representations of durable and larger-than-memory data types with garbage collection, and also for transactional named variables. This is the first and primary backend for Stowage, though we might eventually pursue cloud-based implementations.
4 |
5 |
--------------------------------------------------------------------------------
/src/Stowage/Stowage.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/src/Stowage/Test/.gitignore:
--------------------------------------------------------------------------------
1 | testDB
2 |
3 |
--------------------------------------------------------------------------------
/src/Stowage/Test/Program.fs:
--------------------------------------------------------------------------------
1 | module Program = let [] main _ = 0
2 |
--------------------------------------------------------------------------------
/src/Stowage/Test/Test.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netcoreapp2.0
5 |
6 | false
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/Wikilon/Database.fs:
--------------------------------------------------------------------------------
1 | namespace Wikilon
2 | open Awelon
3 | open Stowage
4 | open Data.ByteString
5 |
6 | // Wikilon will host multiple dictionaries and multiple users.
7 | //
8 | // Active dictionaries will define as set of bots via `app-*`. But we'll
9 | // also have inactive dictionaries such as checkpoints and histories, or
10 | // perhaps based on quotas and payments.
11 | //
12 | // We could perhaps model Wikilon itself using the same fractal system,
13 | // with a set of bots operating on a partially persistent environment. It
14 | // might also be useful to have a `master` dictionary.
15 | //
16 | // In any case, it should be easy to build projections over variables,
17 | // and to wait on variables in a projection for COMET-style real-time
18 | // updates.
19 | //
20 | //module WikiState =
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/src/Wikilon/History.fs:
--------------------------------------------------------------------------------
1 |
2 | namespace Wikilon
3 | open Stowage
4 |
5 | /// A time-stamp. Alias to .Net System Ticks. This is mostly for
6 | /// internal use within applications.
7 | type TimeStamp = int64
8 | module TimeStamp =
9 | let now () : TimeStamp = System.DateTime.UtcNow.Ticks
10 | let toDateTime (tm:TimeStamp) : System.DateTime = System.DateTime(tm,System.DateTimeKind.Utc)
11 | let fromDateTime (dt:System.DateTime) : TimeStamp = dt.ToUniversalTime().Ticks
12 |
13 | // A snapshot-based history model.
14 | module Snapshot =
15 |
16 | /// A history is modeled as a list of time-stamped snapshots with
17 | /// the latest snapshots near the head. Exponential decay is used
18 | /// to limit the list length, erasing intermediate snapshots to
19 | /// keep entries fairly well distributed over time. The longer our
20 | /// history, the more snapshots we'll preserve. Length can be set
21 | /// via the codec, to erase entries upon compaction.
22 | type H<'V> = (struct(TimeStamp * 'V)) list
23 |
24 | /// Add an entry to the history. Adds to best location. Under
25 | /// normal circumstances, it's assumed you'll add timestamps
26 | /// near the head (i.e. for more recent entries), so this would
27 | /// be O(1).
28 | let rec add (tm:TimeStamp) (v:'V) (h:H<'V>) : H<'V> =
29 | match h with
30 | | (struct(tmHd,vHd)::h') when (tmHd > tm) ->
31 | struct(tmHd,vHd) :: (add tm v h')
32 | | _ -> (struct(tm,v)::h)
33 |
34 | /// Find historical entry most relevant to a given timestamp.
35 | /// May return None, if no entry exists for the given time.
36 | let rec tryFind (tm:TimeStamp) (h:H<'V>) : 'V option =
37 | match h with
38 | | (struct(tmHd,vHd)::h') ->
39 | if (tm >= tmHd) then Some vHd else
40 | tryFind tm h'
41 | | [] -> None
42 |
43 | let inline private tmDiff tm tm' = abs (tm - tm')
44 |
45 | // find least difference of timestamps within history
46 | let rec private leastTimeDiff tm ltd hst =
47 | match hst with
48 | | (struct(tm',_)::hst') ->
49 | let ltd' = min ltd (tmDiff tm tm')
50 | leastTimeDiff tm' ltd' hst'
51 | | [] -> ltd
52 |
53 | // remove first entry matching a timestamp difference
54 | let rec private delTimeDiff tm ltd hst =
55 | match hst with
56 | | (struct(tm',v)::hst') ->
57 | if ltd = (tmDiff tm tm') then hst' else
58 | struct(tm',v)::(delTimeDiff tm' ltd hst')
59 | | [] -> []
60 |
61 | // remove one entry from history based on least time difference.
62 | // Does not remove head entry from chunk.
63 | let private decayChunk hst =
64 | match hst with
65 | | (struct(tm0,v0)::hst') ->
66 | let ltd0 = System.Int64.MaxValue
67 | let ltd = leastTimeDiff tm0 ltd0 hst'
68 | struct(tm0,v0)::(delTimeDiff tm0 ltd hst')
69 | | [] -> []
70 |
71 | // exact sized split-list (may failwith insufficient data).
72 | // I assume `n` is relatively small, so I can use the stack.
73 | let rec private splitListAt n lst =
74 | if (0 = n) then struct([],lst) else
75 | match lst with
76 | | (hd::lst') ->
77 | let struct(tl,rem) = splitListAt (n-1) lst'
78 | struct(hd::tl,rem)
79 | | [] -> failwith "insufficient data"
80 |
81 | // take fair-sized chunks of size at least k. Fair-size means
82 | // the smallest chunk and largest chunk are at most one element
83 | // different in size. When our list is large compared to k, the
84 | // size of our chunks will swiftly approach k (from above).
85 | let private fairChunks (k:int) (lst0:'V list) : ('V list) seq =
86 | assert(k > 0)
87 | let n = List.length lst0
88 | if (0 = n) then Seq.empty else
89 | let ct = n / k // ct chunks of at least size k
90 | if (ct < 2) then Seq.singleton lst0 else
91 | let szMin = (n/ct) // minimum size for each chunk
92 | assert(szMin >= k) // our contract assumption
93 | let rem = (n%ct) // first rem chunks have sz = szMin+1
94 | let step (struct(idx,lst)) =
95 | if (idx = ct) then (assert(List.isEmpty lst); None) else
96 | let sz = szMin + if (idx < rem) then 1 else 0
97 | let struct(chunk,lst') = splitListAt sz lst
98 | Some(chunk, struct(idx+1,lst'))
99 | Seq.unfold step (struct(0,lst0))
100 |
101 | /// Decay a history by removing about 1/K snapshots, for k at
102 | /// least 3. Based on an exponential decay model with fairness.
103 | let decay (k:int) (hst:H<'V>) : H<'V> =
104 | assert(k >= 3)
105 | hst |> fairChunks k |> Seq.map decayChunk |> List.concat
106 |
107 | // incremental decay until quota is reached (elim 1/K per step)
108 | let rec private decayToQuota (q:int) (k:int) (h:H<'V>) : H<'V> =
109 | if (q >= (List.length h)) then h else
110 | decayToQuota q k (decay k h)
111 |
112 | /// Codec with lossy, exponential-decay inspired compaction model.
113 | ///
114 | /// With this codec, you choose an approximate number of snapshots
115 | /// to retain. Upon compaction, snapshots will incrementally erase
116 | /// until quota is met, using a fair exponential decay algorithm
117 | /// that results in snapshots of the past being further temporally
118 | /// distributed compared to snapshots of recent changes. This will
119 | /// ensure deep history is robust against large numbers of "recent"
120 | /// updates, while keeping size constant (modulo changes in size
121 | /// of individual snapshots).
122 | let codec (entryQuota:int) (cV:Codec<'V>) : Codec> =
123 | let cTV = EncPair.codec' (EncVarInt.codec) cV
124 | { new Codec> with
125 | member __.Write h dst = EncArray.write cTV (List.toArray h) dst
126 | member __.Read db src = List.ofArray (EncArray.read cTV db src)
127 | member __.Compact db h0 =
128 | let hQ = decayToQuota entryQuota 9 h0
129 | let struct(hQA',sz) = EncArray.compact' cTV db (List.toArray hQ)
130 | struct(List.ofArray hQA', sz)
131 | }
132 |
133 | // TODO: snapshot histories are useful for dictionaries, but an alternative
134 | // is an event-based update history. In that case, we would want to merge
135 | // updates during the decay step - i.e. a monoidal update-event type. This
136 | // might be worth developing for some other parts of Wikilon. Eventually.
137 |
138 |
139 |
140 |
141 |
142 |
--------------------------------------------------------------------------------
/src/Wikilon/README.md:
--------------------------------------------------------------------------------
1 | # Wikilon Library
2 |
3 | This Wikilon library is responsible for providing the web-service interface.
4 |
5 | Desiderata:
6 |
7 | * Defining, Reviewing, and Updating Words
8 | * Dictionary import, export, versioning.
9 | * Multi-Word Views and Edit Sessions
10 | * Simple REPL and Spreadsheet views
11 | * Multiple dictionaries
12 |
13 |
14 |
--------------------------------------------------------------------------------
/src/Wikilon/User.fs:
--------------------------------------------------------------------------------
1 | namespace Wikilon
2 | open Data.ByteString
3 | open Stowage
4 |
5 | // Wikilon's "User model".
6 | //
7 | // I'm not familiar with conventional user models. My idea for Wikilon
8 | // is to model a user as something like a game avatar, with an inventory
9 | // and equipment and location. In this case, the 'inventory' can include
10 | // authorities and clipboards, 'equipment' can support preferences, macros,
11 | // and views, and 'location' is something like a navigation history and
12 | // session information.
13 | //
14 | // The concrete representation of a user might be an Awelon dictionary,
15 | // to support easy export, import, and debugging. Alternatively, we could
16 | // use a fragment of a dictionary.
17 | //
18 | // I'm interested in supporting remote authentication, e.g. from Google
19 | // or Facebook. But I could support simple passwords, too, e.g. using a
20 | // salt and encoded password pair.
21 | //
22 | // This is relatively low priority, however. Early on, I can start with
23 | // just admin vs guest.
24 |
25 | (*
26 | module User =
27 |
28 | ///
29 |
30 | let private getEntropy (n : int) : ByteString =
31 | let mem = Array.zeroCreate n
32 | do RandomNumberGenerator.Create().GetBytes(mem)
33 | BS.unsafeCreateA mem
34 |
35 | let createSalt () : ByteString =
36 | getEntropy 64 |> RscHash.hash |> BS.take 24
37 |
38 |
39 | /// Instead of storing plain-text passwords, we'll mangle things.
40 |
41 | let manglePassword salt pass =
42 | *)
43 |
--------------------------------------------------------------------------------
/src/Wikilon/WS.fs:
--------------------------------------------------------------------------------
1 | namespace Wikilon
2 | open Stowage
3 | open Data.ByteString
4 | open Suave
5 |
6 | // The main goal right now is to get something useful running ASAP.
7 |
8 | module WS =
9 |
10 | type Params =
11 | { db : Stowage.DB
12 | admin : ByteString
13 | // might add logging, etc.
14 | }
15 |
16 |
17 |
18 | let mkApp (p:Params) =
19 | Successful.OK ("Hello World")
20 |
21 |
22 |
--------------------------------------------------------------------------------
/src/Wikilon/Wikilon.fsproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/wikilon.nginx-config:
--------------------------------------------------------------------------------
1 | # This is a NGINX reverse-proxy configuration for Wikilon.
2 | # At the moment, it's in an experimental status/condition.
3 | #
4 | #
5 | server {
6 | listen 443;
7 | listen [::]:443;
8 |
9 | #server_name example.com;
10 |
11 | ssl_certificate /etc/nginx/cert.crt;
12 | ssl_certificate_key /etc/nginx/cert.key;
13 |
14 | ssl on;
15 | ssl_session_cache builtin:1000 shared:SSL:10m;
16 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
17 | ssl_ciphers HIGH:!aNULL:!eNULL:!EXPORT:!CAMELLIA:!DES:!MD5:!PSK:!RC4;
18 | ssl_prefer_server_ciphers on;
19 |
20 | #access_log /var/log/nginx/access.log;
21 |
22 | location / {
23 | # default Wikilon listen port is localhost:3000
24 | proxy_pass http://localhost:3000;
25 | #proxy_buffering off;
26 |
27 | proxy_set_header Host $host;
28 | proxy_set_header X-Real-IP $remote_addr;
29 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
30 | proxy_set_header X-Forwarded-Proto $scheme;
31 |
32 | #default proxy_redirect should be okay.
33 | #Wikilon shouldn't be using non-relative URIs anyway.
34 | }
35 |
36 | # might need special handlers for websockets?
37 | # But I'll just use AJAX for now.
38 | }
39 |
40 |
41 |
42 | server {
43 | # generic redirect http to https
44 | listen 80;
45 | listen [::]:80;
46 | return 301 https://$host$request_uri;
47 | }
48 |
49 |
50 |
--------------------------------------------------------------------------------