├── .gitignore
├── LICENSE
├── README.md
├── abuse
    ├── README.md
    └── signup.md
├── docker-compose.yml
└── userdb
    ├── README.md
    ├── host_data.yaml
    ├── schema.sql
    └── user_data.yaml


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pdf
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 hashbang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # #! - Core Infrastructure #
 2 | 
 3 | <http://github.com/hashbang/hashbang>
 4 | 
 5 | 
 6 | ## About ##
 7 | 
 8 | This repository contains the design documents and documentation for
 9 | [Hashbang's](https://hashbang.sh) overall infrastructure.
10 | 
11 | Likewise, its associated [issue tracker](https://github.com/hashbang/hashbang/issues)
12 | is used for keeping track of infra-wide issues, bugs, improvements, ...
13 | 
14 | 
15 | ## Services ##
16 | 
17 | Currently we provide the following services:
18 | 
19 |   * SSH - `ssh://{da1,ny1,sf1,to1}.hashbang.sh:22`
20 |     - [etckeeper configuration](https://github.com/hashbang/shell-etc)
21 |     - [Docker Source](https://github.com/hashbang/shell-server)  
22 |     - [Docker Image](https://hub.docker.com/r/hashbang/shell-server/)
23 | 	
24 | 	
25 |     Note: the `shell-server` Docker container is there for test
26 |     mockups and preparing `shell-etc` pull requests.  Actual instances
27 |     are deployed on VMs hosted by [Atlantic.net](https://atlantic.net)
28 | 
29 |   * IRC - `ircs://irc.hashbang.sh:6697`
30 |     - Servers
31 |       - [Source Code](https://github.com/hashbang/hashbang)
32 |       - [Docker Image](https://hub.docker.com/r/hashbang/unrealircd/)
33 |     - Services
34 |       - [Source Code](https://github.com/hashbang/docker-anope)
35 |       - [Docker Image](https://hub.docker.com/r/hashbang/anope/)
36 | 
37 |   * Bitlbee - `ircs://im.hashbang.sh:6697`
38 |     - [Source Code](https://github.com/hashbang/hashbang)
39 |     - [Docker Image](https://hub.docker.com/r/hashbang/unrealircd/)
40 | 
41 |   * SMTP - `smtp://mail.hashbang.sh`
42 |     - [Source Code](https://github.com/hashbang/docker-postfix)
43 |     - [Docker Image](https://hub.docker.com/r/hashbang/postfix/)
44 | 
45 |   * VOIP - `mumble://voip.hashbang.sh:64738`
46 |     - [Source Code](https://github.com/hashbang/docker-mumble)
47 |     - [Docker Image](https://hub.docker.com/r/hashbang/mumble/)
48 | 
49 |   * LDAP - `ldaps://ldap.hashbang.sh`
50 |     - [Source Code](https://github.com/hashbang/docker-slapd)
51 |     - [Docker Image](https://hub.docker.com/r/hashbang/slapd/)
52 | 
53 | 
54 | ## Documentation ##
55 | 
56 |   - [Abuse Prevention](https://github.com/hashbang/hashbang/tree/master/abuse)
57 |   - [Next-Gen UserDB](https://github.com/hashbang/hashbang/tree/master/userdb)
58 |   - [hashbangctl](https://github.com/hashbang/hashbangctl)
59 | 
60 | 
61 | ## Notes ##
62 | 
63 |   Use at your own risk. You may be eaten by a grue.
64 | 
65 |   Questions/Comments?
66 | 
67 |   Talk to us via:
68 | 
69 |   [Email](mailto://team@hashbang.sh) |
70 |   [IRC](ircs://irc.hashbang.sh:6697/#!) |
71 |   [Github](http://github.com/hashbang/)
72 | 


--------------------------------------------------------------------------------
/abuse/README.md:
--------------------------------------------------------------------------------
1 | # Abuse handling at #!
2 | 
3 | This section of the documentation is meant to describe how we deal with abuse,
4 | both proactively and after the fact.
5 | 
6 | Currently, only the handling of new signups is documented.
7 | 


--------------------------------------------------------------------------------
/abuse/signup.md:
--------------------------------------------------------------------------------
  1 | # #! user signup — abuse handling
  2 | 
  3 | Since possession of an #! account let users run things, host services, send mail
  4 | and so on, signup is an attractive target for all sorts of abuses.
  5 | 
  6 | Here, the goal is to only enable sophonts to create accounts, not bots.
  7 | 
  8 | 
  9 | ## Requirements
 10 | 
 11 | 1. The abuse-limiting mechanisms must **not** reduce the accessibility of the
 12 |    system: any legitimate user able to create an account without them should
 13 |    still be able to create an account.
 14 | 
 15 | 2. Exposure of privacy-relevant information should be as limited as possible.
 16 | 
 17 | 3. Those mechanisms are implemented by the signup HTTP API:
 18 |    the SSH UI is not specially trusted.
 19 | 
 20 | 4. No data must be recorded before a signup is successful:  doing otherwise
 21 |    opens up a DoS vector (and is inefficient) and a potential privacy issue.
 22 | 
 23 | 
 24 | ## Implementation
 25 | 
 26 | The concrete implementation relies on two mechanisms:
 27 | 
 28 | - a textual captcha that is systematically sent to the user requesting the
 29 |   account creation, and the account creation can only be fulfilled if a valid
 30 |   answer is provided;
 31 | - hierarchical rate-limiting to limit the rate at which an adversary
 32 |   who can defeat the captcha (e.g. by employing humans) can create accounts.
 33 | 
 34 | 
 35 | ### CAPTCHAs
 36 | 
 37 | Using a captcha is a low-overhead (both for the user and for #!) way to tell
 38 | apart legitimate users from automated signup.
 39 | 
 40 | The captcha is implemented using [TextCaptcha],
 41 | a service that provides English-language, text-based CAPTCHAs.
 42 | This is a compromise on the first requirement (accessibility), as it is only
 43 | accessible to English-speaking users; on the other hand, the entire signup process
 44 | is currently only accessible in English, and #!'s documentation and communication
 45 | channels are in English.
 46 | 
 47 | 
 48 | The addition of a CAPTCHA obviously requires an API change.
 49 | The design criteria for it are as follows:
 50 | 
 51 | - change as little as possible the current signup API;
 52 | - be secure, in the following ways:
 53 |   - allow a limited (configurable) time to solve the CAPTCHA;
 54 |   - prevent users from reusing CAPTCHA answers;
 55 |   - do not expose any information to the user that may facilitate
 56 | 	automated CAPTCHA solving;
 57 | - be independent from [TextCaptcha]: the CAPTCHA-generating system
 58 |   must be replaceable without any change to the API.
 59 | 
 60 | 
 61 | #### Public API
 62 | 
 63 | A `/captcha` endpoint is added, expecting a JSON object with a single
 64 | `username` attribute.  The reply contains:
 65 | 
 66 | - a `challenge` string, the human-readable question;
 67 | - an opaque `token`, serialized as a string;
 68 | - an `expiration` time, serialized as an integer timestamp.
 69 | 
 70 | When using the `/user/create` endpoint for user creation, the client
 71 | must provide (in addition to the current requirements):
 72 | 
 73 | - the opaque `token` received from a previous call to `captcha`,
 74 |   for the `username` it is requesting;
 75 | - a matching `answer` string.
 76 | 
 77 | The `/user/create` implementation must perform all other validation
 78 | checks (and error-out accordingly) before validating the CAPTCHA.
 79 | Doing otherwise would expose an interactive verifier for the CAPTCHA
 80 | solution (which might or might not be an exploitable flaw).
 81 | 
 82 | If the answer did not match, the CAPTCHA is added to a set of invalid
 83 | CAPTCHAs until its expiration time, to prevent an attacker from trying
 84 | to brute-force a CAPTCHA.  The set does not need to be persisted to disk
 85 | or to a database, keeping it in an in-memory datastructure is enough.
 86 | 
 87 | The corresponding API is described as a
 88 | [JSON HyperSchema](https://github.com/hashbang/userdb-schemas/blob/refactor/api_schema.yml#L37-L58)
 89 | 
 90 | 
 91 | #### Opaque token
 92 | 
 93 | The CAPTCHA validation requires three pieces of information:
 94 | 
 95 | - the `a` value returned by [TextCaptcha];
 96 | - the expiration `timestamp`;
 97 | - the `username` requested during CAPTCHA generation.
 98 | 
 99 | This data needs to be integrity-protected, since an attacker able to modify
100 | any of its parts would be able to violate the security requirements.
101 | 
102 | Moreover, it needs to be kept confidential: the `a` attribute is a hash of
103 | the valid, lowercased answers: exposing it to the user reveals a *verifier*
104 | for the valid answers, enabling a malicious user to bruteforce them offline.
105 | 
106 | As such, the `token` opaque value is generated as follows:
107 | 
108 | - The required data (`a`, `timestamp` and `username`) is serialized
109 |   using an implementation-defined mechanism.
110 |   [Snappy](https://github.com/golang/snappy)-compressed JSON is suitable.
111 | - The serialized data is encrypted, using an authenticated encryption
112 |   primitive such as AES128-GCM, with a constant, symmetric key that
113 |   is randomly generated when the API server starts (and never persisted
114 |   to disk) and a large, random nonce.
115 | - The random nonce is appended to the ciphertext.
116 | - The resulting data is Base64-encoded, using the
117 |   [RFC 4648 URL-safe alphabet](https://tools.ietf.org/html/rfc4648#section-5).
118 | 
119 | The use of a random, volatile key for token encryption implies two trade-offs:
120 | 
121 | - CAPTCHA tokens are implicitely expired when the application is restarted;
122 |   they do expire within a short timeframe anyway, mitigating the issue;
123 | - `api.hashbang.sh` can only be served by a single instance.
124 | 
125 | Switching to a persistent, shared key can be implemented at any time without
126 | any visible change in the API; however, care must be taken to implement key
127 | rollover procedures and secure key storage.
128 | 
129 | 
130 | [TextCaptcha]: http://textcaptcha.com/
131 | 
132 | 
133 | #### Key management considerations
134 | 
135 | Since no persistent key is used, most of the usual key management and
136 | distribution woes are side-stepped.  However, we must be careful that
137 | the transient encryption key is not kept too long.
138 | 
139 | The theoretical limit on how many tokens can be safely encrypted with a 128-bit
140 | cipher is 2⁶⁴ bits, and Go standard nonce size is 96 bits, leading to 2⁴⁸
141 | messages with random nonces before a nonce reuse can be not unlikely.
142 | 
143 | Both are comfortably over what can be expected to be served by the API server
144 | over its lifetime (i.e. without restart).  However, less-than-perfect random
145 | number generation can significantly increase the risk of accidental nonce reuse.
146 | 
147 | More importantly, a variety of other factors can lead to key compromise:
148 | vulnerabilities in our own code, flawed crypto implementation, ...
149 | 
150 | To mitigate this, the implementation should enforce that the encryption key is
151 | volatile, by replacing it with a new random one, whenever a new `/captcha`
152 | request occurs and either of these conditions are met:
153 | 
154 | - the number of CAPTCHA issued since last key rollover is greater than 2²⁰;
155 | - the last issued CAPTCHA is older than the configured validity duration
156 |   **and** the encryption key is older than an hour.
157 | 
158 | 
159 | *NOTE*: The first condition may cause CAPTCHAs to be revoked (becoming
160 |         undecipherable), but can only be triggered if 2²⁰ requests are
161 |         sent with no gap longer than the CAPTCHA validity period.
162 | 
163 | 
164 | ### Hierarchical rate-limiting
165 | 
166 | The intent behind rate-limiting is to prevent one single entity from creating
167 | a disproportionate number of accounts over a given time-span.
168 | 
169 | There are several challenges inherent with this:
170 | 
171 | - Special care must be taken not to hinder legitimate users.
172 |   This precludes, for instance, blocking any IP range except for a short period
173 |   of time: dynamic IPs being as they are, the range block would most of the time
174 |   be evaded by the abuser, yet impact unrelated users.
175 | - Such a system must work at several scales:
176 |   - multiple temporal scales are required to deal with both large automated signup
177 | 	spikes and slow-but-steady trickles of (automated) account creations;
178 |   - multiple “spacial” scales are required to deal with both a few IPs abusing
179 | 	account creation, and an abuser using a larger pool of IPs (like the dynamic IP
180 | 	pool from their ISP).
181 | 
182 | 
183 | This approach is governed by several tune-able parameters:
184 | 
185 | - `r [d⁻¹]`, an over-estimate of the legitimate signup rate, in users per day;
186 | - `0 < α < 1`, an adimensional fudge factor for the space scale:
187 |   closer to 0, it makes the rate-limit more forgiving of subnets with an
188 |   above-expectation signup rate;
189 | - a set of timescales that are considered;
190 | - a pair of fudge factors for the temporal scales: `0< β` and `1 < c ≤ 1 + β⁻¹`;
191 |   for simplicity, we set now `c = 1 + β⁻¹`.
192 | 
193 | Given some IP `host` (assuming for now IPv4), the request is accepted if, for every
194 | timescale `t` and every space scale `s` from /8 to /24, the network `host/s`
195 | performed at most `f(t)×r 2⁻ᵅˢ` successful signups over the last `t` days
196 | with `f(t) = (1 + β×t⁻ᶜ) t`.
197 | 
198 | 
199 | #### Rationale
200 | 
201 | `t×r` is the expected number of signups over the last `t` days, over the world.
202 | 
203 | The subnet `host/s` contains `2³²⁻ˢ` IPv4 addresses out of `2³²`,
204 | hence the expected ratio of signups originating from it is `2⁻ˢ`.
205 | 
206 | The “fudge factor” `α` is a tune-able parameter that controls how strict
207 | the dependency regarding network size is: it has less of an impact on large
208 | networks (`s` goes to 0), and more on small networks (which are more likely
209 | to have over-average legitimate behavior).
210 | 
211 | Lastly, the dependency on time (`t`) is replaced by `f(t)` with the following
212 | properties:
213 | 
214 | 1. `f(t)` is increasing: bigger sliding windows have bigger limits;
215 | 2. `f(t)/t` goes towards 1: the rate limit goes towards `r` when the timespan grows large;
216 | 3. `f(1) = β+1`: the parameter `β` controls the values of `f` for small timespans.
217 | 
218 | `f` was rewritten as `f(t) = (1 + g(t)) t`, transforming the constraints into:
219 | 
220 | 1. `f'(t) = 1 + g + g'×t > 0`
221 | 2. `g(t)` goes towards 0
222 | 3. `g(1) = β`
223 | 
224 | By picking `g(t) = β×t⁻ᶜ` (which fulfills constraints 2 and 3), the first constraint
225 | becomes `c ≤ 1 + β⁻¹`.
226 | 
227 | 
228 | ## Privacy concerns
229 | 
230 | Implementing rate-limiting requires keeping track of signup IPs and timestamps,
231 | which is a compromise on the privacy requirement (2).
232 | 
233 | This is mitigated by the fact that this data doesn't need to be made available to
234 | any other service than `api.hashbang.sh` (nor does it need to be part of the
235 | replicated database), and `api.hashbang.sh` does not need to know the corresponding
236 | username, nor does it need to have read access to historical data beyond the greatest
237 | timescales considered.
238 | 
239 | 
240 | ## Security concerns
241 | 
242 | ### CAPTCHAs
243 | 
244 | Let's assume that an adversary successfuly creates an account under the
245 | following restrictions:
246 | 
247 | 1. the adversary may not break the confidentiality or integrity of the AE;
248 | 2. the adversary does not have access to our communication channel with
249 |    [TextCaptcha].
250 | 
251 | Since the account creation is predicated on receiving an `answer` matching
252 | the verifier encapsulated in `token`, and `token` is integrity-protected,
253 | then `answer` must be a valid answer to a CAPTCHA.
254 | 
255 | Furthermore, `token` (is integrity-protected and) contains `username` and
256 | `timestamp`, which are validated against: `answer` must thus be an answer
257 | to a CAPTCHA that has not expired yet, and was issued for the specified
258 | `username`.
259 | 
260 | Since usernames are unique, a given answer can only be used for a single
261 | account creation.
262 | 
263 | Lastly, the adversary must compute the answer with only access to `challenge`
264 | and an online verifier (the API server).  The verifier refuses to answer to a
265 | given CAPTCHA after one wrong answer, and the adversary may not violate the
266 | confidentiality properties of `token` (which is encrypted).
267 | 
268 | It follows that the adversary must be able to compute the solution to the
269 | CAPTCHA on the first attempt.
270 | 
271 | 
272 | *NOTE:* Assumption 2 might be violated, given that [TextCaptcha]'s API doesn't
273 |         use HTTPS.  This is very sad, but having MitM-resilient CAPTCHAs is
274 | 		overkill.
275 | 
276 | 
277 | ### Rate-limiting
278 | 
279 | #### Risk of bypassing the rate-limit
280 | 
281 | In the case of SSH-based signup, the SSH UI server needs to transmit the connecting
282 | client's IP, possibly in a HTTP header. Special care must be taken that only the SSH
283 | UI server is allowed to set the client IP to an arbitrary address, not the
284 | directly-connecting users (if any).
285 | 
286 | In case of a compromise of the signup server, the attacker can bypass the
287 | rate-limiting (by lying to the API server on the client's IP).  The alternative
288 | (implementing rate-limitations in the signup server) does not solve that issue,
289 | and exposes the privacy-sensitive data mentioned earlier to the attacker.
290 | 
291 | The compromise of the signup server is mitigated against by the CAPTCHA mechanism,
292 | as an attacker still needs to solve CAPTCHAs to perform registration.
293 | 
294 | 
295 | #### Potential DoS
296 | 
297 | An attacker may attempt to (ab)use the rate-limiting system to prevent users in
298 | “neighboring” networks from using the service.
299 | 
300 | In order to block a number of size `s`, over a duration `t`, the attacker must:
301 | - solve `t×r 2⁻ᵅˢ` CAPTCHAs;
302 | - send requests from computers in `(t×r 2⁻ᵅˢ)/(t×r 2⁻²⁴ᵅ) = 2⁽²⁴⁻ˢ⁾ᵅ` different
303 |   /24 networks.
304 | 
305 | For concrete values `t = 7 d`, `s = /16`, `r = 1000 d⁻¹` and `α = 90%`, this
306 | means solving 2300 CAPTCHAs and having access to 147 different /24 networks.
307 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | services:
 3 |   shellbox:
 4 |     restart: always
 5 |     build: ./shellbox
 6 |     cap_add: 
 7 |       - SYS_ADMIN
 8 |     ports:
 9 |       - "22:22/tcp"
10 |     links:
11 |       - "db:db"
12 |     depends_on:
13 |       - db
14 |   db:
15 |     restart: always
16 |     image: postgres:9.5
17 |     ports:
18 |       - "5432:5432"
19 | 


--------------------------------------------------------------------------------
/userdb/README.md:
--------------------------------------------------------------------------------
  1 | # #! user database — Requirements
  2 | 
  3 | This document describes the requirements for our user database.
  4 | Understanding the design goals is an important part of understanding the
  5 | engineering trade-offs made there.
  6 | 
  7 | ## Data
  8 | 
  9 | A user has several kinds of associated data:
 10 | - relational data, which might be involved in `WHERE` conditions or `JOIN`s:
 11 |   `uid`, `gid`, `username` and `host`;
 12 | - non-relational data: SSH keys, full name, preferred shell, ...
 13 | - users may add additional data without specific cooperation from the #! admins,
 14 |   facilitating the construction of new, non-core services
 15 |   (finger, GPG key discovery, ...)
 16 | 
 17 | Each user has a primary group, that shares the same id and name.
 18 | 
 19 | Administrators may define auxiliary groups (such as `adm` or `sudo`)
 20 | and any user can belong to any number of auxiliary groups.
 21 | 
 22 | Lastly, the DB needs to keep track of per-server information, namely its
 23 | `hostname`, IP address and location.  This is in part intended for consumption
 24 | by the [stats API](https://hashbang.sh/server/stats).
 25 | 
 26 | 
 27 | ## Properties
 28 | 
 29 | ### Immediate
 30 | 
 31 | These requirements **must** be achieved before deployment:
 32 | 
 33 | - Availability
 34 |   - Users do not lose access to the shell servers if part of the infra goes down.
 35 |   - Loss of any part of the infrastructure is recoverable with limited data loss.
 36 | 
 37 | - Consistency
 38 |   - Changes in the userdb must occur in some coherent ordering, and reads must
 39 | 	respect it.  In particular, it is not possible to observe partial updates.
 40 |   - The data held in the database must be internally coherent: for instance,
 41 | 	a user may not belong to a group that does not exist.
 42 | 
 43 | - Maintainability
 44 |   - Avoid custom implementations of standard modules (NSS, PAM, ...)
 45 | 	whenever possible and reasonable.
 46 |   - Minimize the amount of components that have knowledge of the database
 47 | 	implementation, and make the others rely on a more abstract API.
 48 | 
 49 | - Privilege separation
 50 |   - All data must be non-readable, non-writeable, by default.
 51 |   - Each service/component must have the least possible access (read, write, ...),
 52 | 	restricted to the data it needs to manipulate.
 53 | 
 54 | - No-downtime deployment
 55 |   - The initial deployment must be achievable without disrupting core services
 56 | 	(shell access, IRC, ...), and must minimize the disruption to auxiliary
 57 | 	services (mail, ...).
 58 |   - Any later update/maintainance of the system must be achievable without
 59 | 	disrupting read-availability of the user DB.  Having a short window where
 60 | 	users cannot edit their records or signup is acceptable.
 61 | 
 62 | 
 63 | ### Long-term
 64 | 
 65 | These requirements **must** be *achievable*:
 66 | 
 67 | - Privilege separation
 68 |   - Unprivileged `hashbangctl` can only modify the user's own data
 69 | - Remote service authentication: local (shell) users should be able to authenticate
 70 |   transparently and securely to remote (#!) services (SMTP, IRC, ...).
 71 | 
 72 | 
 73 | ## Services
 74 | 
 75 | The following services need to interact with the user DB:
 76 | - OpenSSH, through `AuthorizedKeysCommand`;
 77 | - `mail.hashbang.sh` needs to extract host info for mail routing;
 78 | - `hashbang.sh` needs to extract statistics;
 79 | - user creation.
 80 | 
 81 | 
 82 | # #! user database — PostgreSQL-based proposal
 83 | 
 84 | ## Design goals
 85 | 
 86 | This design leans strongly towards consistency of the data, enforced
 87 | as much as possible at the database level.
 88 | 
 89 | Part of this appears in the user of foreign keys, range or value constraints,
 90 | preventing applications from inserting (or modifying) data that violates those
 91 | constraints.
 92 | 
 93 | Less apparent manifestations appear in the design of the database schema:
 94 | - User's primary groups are known to have the same id and name as the users,
 95 |   and as such are not stored explicitely; such an inconsistency caused the
 96 |   [“group 3000” bug](https://github.com/hashbang/provisor/pull/25).
 97 | - Data duplication is systematically avoided, as it is a major cause of
 98 |   inconsistencies in databases; the schema is even in
 99 |   [project-join normal form](https://en.wikipedia.org/wiki/Fifth_normal_form).
100 | 
101 | 
102 | ## Replication
103 | 
104 | In a single-master deployment, PostgreSQL has hot replication features that
105 | allow to immediately propagate changes to (a configurable number of) replicas,
106 | possibly before the change is commited on the master.
107 | 
108 | Using a local PostgreSQL instance on each shell server, acting as a local replica,
109 | immediately fulfills the availability requirements:
110 | - each server holds a read-only copy of the database, so users can login
111 |   regardless of whether the DB master is available;
112 | - should the DB master be lost, the most up-to-date replica of the DB (can be found
113 |   by comparing `pg_last_xlog_receive_location` values) can be either promoted to
114 |   the role of master, or (preferred) copied to the new master instance.
115 | 
116 | Moreover, single-master PostgreSQL provides the usual ACID consistency guarantees.
117 | 
118 | 
119 | ## Database schema
120 | 
121 | The database schema is provided in [`schema.sql`](schema.sql).
122 | 
123 | DB constraints are used to enforce, as much as possible, consistency:
124 | - uids must be valid and unique;
125 | - usernames must be unique and follow the proper syntax rules;
126 | - a user's host must exist.
127 | 
128 | User records have an optional `data` column, that can hold
129 |   additional, non-relational data as a (binary-encoded) JSON object.
130 | 
131 | *NOTE:* Rows in `group` and `passwd` shouldn't share a `name`.
132 |         Can this be expressed as a constraint?
133 | 
134 | 
135 | ## Data representation
136 | 
137 | In the `passwd` and `hosts` tables, a `data` (binary) JSON object holds
138 | some non-relational data.  The rationale for this is two-fold:
139 | - the `data` object can be easily extended with additional information
140 |   without having to modify the schema (or even coordinate with the administrators);
141 | - the `data` object can easily be passed across JSON-based APIs.
142 | 
143 | The `data` objects for [users](user_data.yaml) and [host](host_data.yml)
144 | must obey certain JSON schemata, for several reasons:
145 | - Some fields, like `shell` or `ssh_keys`, are used by #! infrastructure;
146 |   validating the JSON objects prevents users from accidentally losing access to
147 |   their own account in this way.
148 | - The host `data` object is directly added to data that is exposed on a public API.
149 |   This avoids breaking the public API accidentally simply by changing the data.
150 | - More generally, once a convention is widely adopted by #! users, it can be
151 |   formalised into a JSON schema and enforced, making the data format of user records
152 |   more interoperable.
153 | 
154 | 
155 | *NOTE:* It might be possible to enforce the JSON Schema in the database itself.
156 |         This isn't an immediate goal.
157 | 
158 | *NOTE:* Yes, I'm aware I serialized the JSON Schema as YAML.  Yes, it's legit.
159 | 
160 | 
161 | ## Permissions
162 | 
163 | Moreso than separating permissions on a per-server basis, permissions should be
164 | assigned on a per-service basis, and follow the least privilege principle.
165 | 
166 | 
167 | ### Shell servers
168 | 
169 | A shell server hosts several components that get different access rights to the DB:
170 | - `pgsql`: the DB server itself need a DB user with the `replication` privilege.
171 |   It gives complete read access to the database (from the master), and nothing else.
172 | - `ssh`: needs read access to the `passwd.{name,data}` columns.
173 | - `nss`: needs read access to `passwd`, `group` and `aux_groups`.
174 | - `hashbangctl`: needs write access to the `passwd.data` column.
175 | 
176 | 
177 | ### `hashbang.sh`
178 | 
179 | The website fulfills two complementary (and independent) roles:
180 | - user creation: `INSERT` privilege in the `passwd` table;
181 | - statistics: read-only access to a `hosts_stats` view, created as follows:
182 | 
183 | ```postgres
184 | CREATE VIEW hosts_stats AS
185 |   SELECT hosts.id, hosts.name, agg.count FROM hosts
186 |   JOIN (SELECT host, count(distinct id) as count FROM passwd GROUP BY host) AS agg
187 |   ON agg.host = hosts.id
188 | ```
189 | 
190 | 
191 | ### `mail.hashbang.sh`
192 | 
193 | The mail server only needs read access to `passwd.{name,host}` and `hosts`.
194 | 
195 | 
196 | ## Service integration
197 | 
198 | ### Shell servers
199 | 
200 | On the shell servers, integrating the new auth DB involves three things:
201 | - having Postgres installed and configured for streaming replication;
202 | - having `libnss-pgsql` configured as a NSS provider: this makes all
203 |   users in the DB visible in the `getpwent(3)` functions family, making
204 |   them “be there on the system”;
205 | - having a script set as SSH `AuthorizedKeysCommand` that queries for a
206 |   user's `passwd.data` and pipe it to `jq '.ssh_keys | .[]'`.
207 | 
208 | 
209 | #### `libnss-pgsql` configuration
210 | 
211 | The main part of the configuration of `libnss-pgsql` is to set the queries
212 | used to retrieve information from the database.  Passwords are systematically
213 | set to be `!`: this is a value that cannot possibly match any password hash
214 | in `crypt(3)` format.
215 | 
216 | Extracting user information is fairly straightforward:
217 | 
218 | 	# Returns (name, passwd, gecos, dir, shell, uid, gid) for a given name or uid, or all
219 | 	getpwnam = SELECT name, '!', data->>'name', homedir, data->>'shell', uid, uid FROM passwd WHERE name = $1
220 | 	getpwuid = SELECT name, '!', data->>'name', homedir, data->>'shell', uid, uid FROM passwd WHERE id   = $1
221 | 	allusers = SELECT name, '!', data->>'name', homedir, data->>'shell', uid, uid FROM passwd
222 | 
223 | 
224 | Retrieving group-related data is a bit harder, as there as two kinds of groups:
225 | - a user's primary group shares the same name and id (and has a single user);
226 | - an auxiliary group is described in the `group` table.
227 | 
228 | ```
229 | # Returns (name, passwd, gid) for a given name or gid, or all
230 | getgrnam  = SELECT name, '!', gid FROM group  WHERE name = $1
231 |       UNION SELECT name, '!', uid FROM passwd WHERE name = $1
232 | getgrgid  = SELECT name, '!', gid FROM group  WHERE gid  = $1
233 |       UNION SELECT name, '!', uid FROM passwd WHERE uid  = $1
234 | allgroups = SELECT name, '!', gid FROM group
235 |       UNION SELECT name, '!', uid FROM passwd
236 | ```
237 | 
238 | Finally, we need a query to link together users and auxiliary groups:
239 | 
240 | 	# Returns all auxiliary group ids a user is a member of
241 | 	groups_dyn = SELECT gid FROM passwd JOIN aux_groups USING (uid) WHERE name = $1
242 | 	
243 | 	# Returns all uids belonging to a given group
244 | 	getgroupmembersbygid = SELECT name FROM passwd WHERE uid = $1
245 | 	                 UNION SELECT name FROM passwd JOIN aux_groups USING (uid) WHERE gid = $1
246 | 
247 | 
248 | ### `mail.hashbang.sh`
249 | 
250 | We can se directly Postfix's Postgres support to use a specific query as a virtual
251 | table; `pgsql:/etc/postfix/pgsql-aliases.cf` can be specified as `virtual_alias_map`.
252 | 
253 | The `pgsql-aliases.cf` config file itself would look like this:
254 | 
255 | 	# The hosts that Postfix will try to connect to
256 | 	hosts = localhost
257 | 	
258 | 	# The user name and password to log into the pgsql server.
259 | 	user = someone
260 | 	password = some_password
261 | 	
262 | 	# The database name on the servers.
263 | 	dbname = userdb
264 | 
265 | 	# Query the user's host and return user@host
266 | 	domain = hashbang.sh
267 | 	query = SELECT host FROM passwd WHERE name='%U'
268 | 	result_format = %U@%s
269 | 


--------------------------------------------------------------------------------
/userdb/host_data.yaml:
--------------------------------------------------------------------------------
 1 | $schema: http://json-schema.org/schema#
 2 | title: "#! userdb -- Schema for host records auxiliary data"
 3 | type: object
 4 | properties:
 5 |   location:
 6 |     type: string
 7 |     description: "Server's location"
 8 | 
 9 |   coordinates:
10 |     description: "Server's GPS coordinates"
11 |     lat: {type: number}
12 |     lon: {type: number}
13 |     required: [lat, lon]
14 |     additionalProperties: False
15 | 
16 |   inet:
17 |     description: "Server's IP address(es)"
18 |     minItems: 1
19 |     items:
20 |       type: string
21 |       oneOf: [ipv4, ipv6]
22 |     uniqueItems: True
23 | 
24 |   required: [location, coordinates, inet]
25 | 


--------------------------------------------------------------------------------
/userdb/schema.sql:
--------------------------------------------------------------------------------
 1 | -- -*- mode: sql; product: postgres -*-
 2 | 
 3 | -- hosts table
 4 | CREATE TABLE "hosts" (
 5 |   "id" serial PRIMARY KEY,
 6 |   "name" text UNIQUE NOT NULL,
 7 |   "data" jsonb -- extra data added in the stats answer
 8 |                -- conforms to the host_data.yaml schema
 9 | )
10 | 
11 | 
12 | -- data for NSS' passwd
13 | -- there is an implicit primary group for each user
14 | CREATE SEQUENCE user_id MINVALUE 4000 MAXVALUE 2147483647 NO CYCLE;
15 | 
16 | CREATE DOMAIN username_t varchar(31) CHECK (
17 |   VALUE ~ '^[a-z][a-z0-9]+$'
18 | );
19 | 
20 | CREATE TABLE "passwd" (
21 |   "uid" integer PRIMARY KEY MINVALUE 1000 DEFAULT nextval('user_id'),
22 |   "name" username_t UNIQUE NOT NULL,
23 |   "host" integer NOT NULL REFERENCES hosts (id),
24 |   "homedir" text NOT NULL,
25 |   "data" jsonb  -- conforms to the user_data.yaml schema
26 | );
27 | 
28 | -- auxiliary groups
29 | CREATE TABLE "group" (
30 |   "gid" integer PRIMARY KEY MAXVALUE 999,
31 |   "name" username_t UNIQUE NOT NULL,
32 | );
33 | 
34 | CREATE TABLE "aux_groups" (
35 |   "uid" int4 NOT NULL REFERENCES passwd (uid) ON DELETE CASCADE,
36 |   "gid" int4 NOT NULL REFERENCES group  (gid) ON DELETE CASCADE,
37 |   PRIMARY KEY ("uid", "gid"),
38 | );
39 | 


--------------------------------------------------------------------------------
/userdb/user_data.yaml:
--------------------------------------------------------------------------------
 1 | $schema: http://json-schema.org/schema#
 2 | title: "#! userdb -- Schema for user records auxiliary data"
 3 | type: object
 4 | properties:
 5 |   ssh_keys:
 6 |     type: array
 7 |     items: {type: string}
 8 |     uniqueItems: True
 9 |     description: SSH keys for the shell servers
10 |   name:
11 |     type: string
12 |     description: "User's name"
13 |   shell:
14 |     type: string
15 |     description: "User's shell"
16 |   required: [ssh_keys, shell]
17 | 


--------------------------------------------------------------------------------