├── .formatter.exs
├── .gitignore
├── DESIGN.md
├── README.md
├── Slides.pdf
├── assets
├── README.md
├── esbuild.cjs
├── index.html
├── jsconfig.json
├── package-lock.json
├── package.json
├── postcss.config.js
├── src
│ ├── App.svelte
│ ├── app.css
│ ├── assets
│ │ └── svelte.svg
│ ├── lib
│ │ ├── sync_db.js
│ │ ├── todos_store.js
│ │ ├── utils.js
│ │ └── uuidv4.js
│ └── main.js
├── svelte.config.js
└── tailwind.config.js
├── config
├── config.exs
├── dev.exs
├── prod.exs
├── runtime.exs
└── test.exs
├── lib
├── sync
│ ├── application.ex
│ ├── mailer.ex
│ ├── replication.ex
│ ├── repo.ex
│ └── todo
│ │ └── item.ex
├── sync_web.ex
└── sync_web
│ ├── channels
│ ├── channel.ex
│ └── socket.ex
│ ├── components
│ ├── core_components.ex
│ ├── layouts.ex
│ └── layouts
│ │ ├── app.html.heex
│ │ └── root.html.heex
│ ├── controllers
│ ├── error_html.ex
│ ├── error_json.ex
│ ├── page_controller.ex
│ ├── page_html.ex
│ └── page_html
│ │ └── home.html.heex
│ ├── endpoint.ex
│ ├── gettext.ex
│ ├── router.ex
│ └── telemetry.ex
├── mix.exs
├── mix.lock
├── priv
├── gettext
│ ├── en
│ │ └── LC_MESSAGES
│ │ │ └── errors.po
│ └── errors.pot
├── repo
│ ├── migrations
│ │ ├── .formatter.exs
│ │ ├── 20240806131210_create_publication.exs
│ │ ├── 20240806131212_create_items.exs
│ │ └── 20240806131214_soft_delete_items.exs
│ └── seeds.exs
└── static
│ ├── favicon.ico
│ ├── images
│ └── logo.svg
│ └── robots.txt
└── test
├── support
├── channel_case.ex
├── conn_case.ex
└── data_case.ex
├── sync_test.exs
├── sync_web
└── channels
│ └── channel_test.exs
└── test_helper.exs
/.formatter.exs:
--------------------------------------------------------------------------------
1 | [
2 | import_deps: [:ecto, :ecto_sql, :phoenix],
3 | subdirectories: ["priv/*/migrations"],
4 | plugins: [Phoenix.LiveView.HTMLFormatter],
5 | inputs: ["*.{heex,ex,exs}", "{config,lib,test}/**/*.{heex,ex,exs}", "priv/*/seeds.exs"]
6 | ]
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # The directory Mix will write compiled artifacts to.
2 | /_build/
3 |
4 | # If you run "mix test --cover", coverage assets end up here.
5 | /cover/
6 |
7 | # The directory Mix downloads your dependencies sources to.
8 | /deps/
9 |
10 | # Where 3rd-party dependencies like ExDoc output generated docs.
11 | /doc/
12 |
13 | # Ignore .fetch files in case you like to edit your project deps locally.
14 | /.fetch
15 |
16 | # If the VM crashes, it generates a dump, let's ignore it too.
17 | erl_crash.dump
18 |
19 | # Also ignore archive artifacts (built via "mix archive.build").
20 | *.ez
21 |
22 | # Temporary files, for example, from tests.
23 | /tmp/
24 |
25 | # Ignore package tarball (built via "mix hex.build").
26 | sync-*.tar
27 |
28 | # Ignore assets that are produced by build tools.
29 | /priv/static/assets/
30 |
31 | # Ignore digested assets cache.
32 | /priv/static/cache_manifest.json
33 |
34 | # In case you use Node.js/npm, you want to ignore these.
35 | npm-debug.log
36 | /assets/node_modules/
37 |
38 |
--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
1 | # Design
2 |
3 | > Disclaimer: I am not a Postgres specialist and the document below may have inaccuracies or wrong assumptions.
4 |
5 | This document outlines an implementation of implementing live queries and synchronization on top of Phoenix Channels and PostgreSQL.
6 |
7 | The idea behind implementing "live queries" is that a client can request "select * from tasks where org_id = 42" and they will receive both the current tasks but subscribe to any future insert, update, or deletion of these tasks.
8 |
9 | One way to implement such tasks is via direct table polling. However, if we have several queries and several users, polling can quickly become expensive. Furthermore, [PostgreSQL does not guarantee rows become available in the same order as primary keys/sequences](https://event-driven.io/en/ordering_in_postgres_outbox/), which is a big challenge that we reference to throughout this document.
10 |
11 | Another approach is to set Postgres Replication and then duplicate all of Postgres data elsewhere. If you want to catch-up on past events, then you go through the replicated data, and unpack it. Copying the data elsewhere requires additional services, for both compute and storage, which I would like to avoid.
12 |
13 | Instead, we want to use Postgres Replication alongside Elixir. The idea is that each Elixir node will establish a replication connection with Postgres and receive all events as they happen. Those events can then be filtered and sent to the appropriate clients via Phoenix Channels. This requires at least two Postgres connections: one for replication and another for queries. Both can come from replicas, as long as they are from the same replica.
14 |
15 | ## Live queries
16 |
17 | To implement live queries, we first perform a **catch-up query** and then you wait for further changes from the replication. In order to discuss the possible problems that may happen, consider we want to query a table and two transactions are changing it at the same time:
18 |
19 | ```sql
20 | UPDATE tasks WHERE id=13 SET title="A" -- xacc1
21 | UPDATE tasks WHERE id=13 SET title="B" -- xacc2
22 | ```
23 |
24 | Since we want to receive live updates, the first step is to subscribe to the PostgreSQL replication service before we query the data. Then, as we receive updates from replication, we broadcast them to the interested parties via Phoenix Channels. This makes sure no updates are losts. Otherwise the following may happen:
25 |
26 | 1. `UPDATE tasks WHERE id=13 SET title="A"`
27 | 2. We perform the catch-up query and see `title="A"`
28 | 3. `UPDATE tasks WHERE id=13 SET title="B"`
29 | 4. We start the subscription
30 |
31 | Because we started the subscription _after_ SET title="B", we have never received this event. Instead, we want this to happen:
32 |
33 | 1. We start the subscription
34 | 2. `UPDATE tasks WHERE id=13 SET title="A"`
35 | 3. We perform the catch-up query and see `title="A"`
36 | 4. `UPDATE tasks WHERE id=13 SET title="B"`
37 |
38 | Of course this means we will receive the information title="A" twice, but we can largely assume that receiving duplicate data is not a problem. On the other hand, this solution may also mean we go back in time or stutter. Take the following order of events:
39 |
40 | 1. We start the subscription
41 | 2. `UPDATE tasks WHERE id=13 SET title="A"`
42 | 3. `UPDATE tasks WHERE id=13 SET title="B"`
43 | 4. We perform the catch-up query and see title="B"
44 |
45 | The catch-up query sees title="B". However, when receiving the events, we will first receive title="A" and then receive title="B". If for some reason there is a huge gap in the replication log between title="A" and title="B", it may mean the UI can stutter between B -> A -> B or show inconsistent data.
46 |
47 | Therefore, we need a way to merge the data from the catch-up query and the replication log. We can solve this problem by Postgres' log sequence numbers (LSN), which are monotonically increasing: after the catch-up query, we will fetch `pg_current_wal_lsn()`, let's call it the _query-LSN_. Each update we receive from the replication subscription will also have a LSN, let's call it _sync-LSN_. Now we must buffer all replication events until the sync-LSN matches the query-LSN, only then we can show the catch-up query results and the queued updates to the user. For this "overtaking" to happen, the catch-up queries and the subscription server must use the same replica, otherwise we may have gaps in the data. From this moment on, the client continues to track the _sync-LSN_.
48 |
49 | The _sync-LSN_ is important to avoid transactional inconsistency on live queries. For example, imagine you have a "projects" table with a foreign key to "managers". If you query managers first, and then projects, the projects may point to a manager that has not yet been sent by the replication. Using the sync-LSN addresses that. The downside is that, if the replication layer is slow, it will delay when data can be shown to the client. We will explore solutions to address this particular problem after we introduce synchronization.
50 |
51 | ## Synchronization
52 |
53 | So far we have discussed live queries but there is another feature we could build on top of this system, which is synchronization. Because our live query implementation broadcasts PostgreSQL replication event as they happen, without storing past data, if the user closes the application and joins after 1 hour or a day or a week, we cannot catch them up.
54 |
55 | The good news is that the latest version of the data can be found directly in the tables we want to synchronize. To do so, we can issue another live query, but it would be a waste to download all data again. Unfortunately, we cannot simply use the ID or a database sequence to solve this problem because they are not monotonically increasing: a transaction that started earlier and inserted an entry ID=11 may be committed after a transaction that started later with ID=13. Here is [an excellent article](https://blog.sequinstream.com/postgres-sequences-can-commit-out-of-order/) that discusses this problem and possible solutions.
56 |
57 | We can adapt one of the solutions in the article by introducing a `snapmin` column to every table we want to live/sync. The `snapmin` column tells us the minimum transaction version that may have been committed after us. It can be computed by using triggers on INSERT and UPDATE to set the `snapmin` column to the following
58 |
59 | ```sql
60 | pg_snapshot_xmin(pg_current_snapshot())
61 | ```
62 |
63 | We will also augment the catch-up query to return the value of `pg_snapshot_xmin(pg_current_snapshot())` at the beginning of the transaction. This will be our pointer, subsequent catch-up queries should only return records where their `snapmin` column is later or equal to our pointer. Furthermore, as we receive updates from subscription, we will update our pointer with the latest `snapmin` from the replicated rows (per table). This overall enables us to catch-up data in any table with subsequent queries.
64 |
65 | Unfortunately, this solution may introduce two pitfalls in practice.
66 |
67 | The first one can be caused by slow replication. If replication is slow, we delay when to show data in the client, because the query-LSN and the sync-LSN must match. Luckily, now that we introduce `snapmin`, we can use it to compute data that is safe to show. The cient can show any data, without waiting, as long as the resource `snapmin` retrieved from a catch-up query is less than the latest `snapmin` seen by the replication.
68 |
69 | The second issue is caused by long running transactions. Because our synchronization point is `pg_snapshot_xmin(pg_current_snapshot())`, any long transaction will cause several resources to be introduced with the same `snapmin`, forcing them to be replayed in future catch-up queries. A simple solution to the problem would be for the catch-up query to not show data where `snapmin < pg_snapshot_xmin(pg_current_snapshot())`, but that comes with the downside of potentially delaying when data is seen until the long running transaction either commits or rolls back. Instead, we can reduce the impact long running transactions have on the system by using shared locks and topics.
70 |
71 | Most applications namespace their data by a key, such as the `organization_id`, `company_id` or `team_id`. Of all transactions happening on a database, only some of them affect a given organization, and only some of them affect sync tables. Therefore, we could use a shared advisory lock: the `classid` will partition the namespace (such as organization id) and the `objid` will store the lower 32 bits of `pg_current_xact_id`. Now, instead of storing `pg_snapshot_xmin(pg_current_snapshot())` in the `snapmin` column or reading it at the beginning of catch-up queries, we will query the shared lock and filter the snapshot to only hold the transaction IDs with matching lower 32 bits. This means that regular long running transactions will not affect our system (because they won't be using the advisory locks) and, if a transaction in a sync table needs to run for long, it will only affect a subset of the organizations (split over a 32 bits namespace). Once you have enough over 2 billion organizations, you may have overlap between organization IDs, but those are safe to overlap, as this is purely an optimization.
72 |
73 | A potential downside of this approach is that we can only allow changes to sync tables if they are wrapped in these "sync transactions", although you may bypass this limitation by introducing functions that use either `pg_snapshot_xmin(pg_current_snapshot())` or the shared advisory lock, depending on local transactional variables.
74 |
75 | ## Offline-first
76 |
77 | Now that we have live queries and synchronization in place, the next challenge is to make our application offline-first.
78 |
79 | The idea is that writes on the client will first go to a transaction store, that stores events. Entries in the transaction store are sent to the server as soon as possible but it may also work while offline, [similar to Linear's](https://linear.app/blog/scaling-the-linear-sync-engine). The server will eventually accept or refute these transactions and their changes to the underlying tables eventually make their way back to the client via replication. There are many trade-offs and design decisions that could be made in regards to how and when transactions are submitted, accepted, or refuted.
80 |
81 | It is also important to keep the transaction store is kept separate from the synchronized data. What the user sees on the screen is the result of the transaction store events applies to the synchronized data.
82 |
83 | I'd also recommend to allow developers to store events of different types inside the transaction store, not only synchronization ones. For each event type stored, the client needs to know how to apply that event to its in-memory data, and the server needs to know how to process it.
84 |
85 | ## To be explored
86 |
87 | There are several topics not explored here:
88 |
89 | * Since we are doing synchronization, we need to store how schemas evolve over time, so changes to the schema on the server are automatically mirrored on the client. Not all schema changes can be automatically mirrored to the client.
90 |
91 | * We have not discussed the object model for JavaScript, this is important for both reads and writes. For example, if we have to load all data for a given company on page load, that won't be a good user experience. We probably want to control which collections are synchronized and which ones are live. Figma's [LiveGraph](https://www.figma.com/blog/livegraph-real-time-data-fetching-at-figma/) may be a source of inspiration.
92 |
93 | * One important topic to discuss is authentication and authorization, which must still live on the server. My early thoughts on this is that, at least part of authorization layer, must be based on "topics": as the user signs in, or uses the application, the client will request the server for authorization to listen to topics, such as "organization:ID", "project:ID", etc. Our live query system will broadcast data to clients based on the topics they have been subscribed to. Furthermore, I'd suggest for most tables in your database to have at least a `organization_id` (or `company_id`, `subdomain_id`, etc) column, which will behave as the "root key" of all operations. This will be important to guarantee event ordering and enable several optimizations (for tables that are publically available, having no key whatsoever is fine). Other keys within the organization, such as `post_id`, `project_id`, may also be kept as additional columns (and additional authorization topics).
94 |
95 | * Other authorization rules may be written either in regular Elixir code, an embedded Elixir DSL (such as one inspired by [Ecto Query](https://hexdocs.pm/ecto/Ecto.Query.html)), or using an external language, such as [Google CEL](https://cel.dev/). When a channel subscribes to a replication event, it can do so via ETS tables. These authorization rules can be stored in the ETS table and be applied on-the-fly as the replication events arrive.
96 |
97 | * What is the programming model for Phoenix? Phoenix LiveView removed the serialization layer (REST, GraphQL, etc) and squeezed the controllers/views into a single entity, simplifying the amount of concepts we have to juggle at once. The approach here similarly allows us to squeeze some layers (albeit different ones?) by keeping the focus on the data schema (and its evolution) and on how mutations (from the transaction DB) are handled.
98 |
99 | ## Requirements and benefits
100 |
101 | Each table you wnat to synchronize, aka "sync table", needs to adhere to the following rules:
102 |
103 | * All tables must have `snapmin` columns.
104 |
105 | * We strongly recommend (as per the previous section) for all sync tables to have a "root key" column, such as `organization_id`, with all information necessary to broadcast its update. Tables may have additional keys, if necessary.
106 |
107 | * Information about deleted row must be preserved somewhere. You could use [a "soft" `deleted_at` column](https://dashbit.co/blog/soft-deletes-with-ecto), which is the approach implemented in this proof of concept, but I believe for this problem, [an additional "deletions" table per "sync table"](https://brandur.org/fragments/deleted-record-insert) would be easier to operate and scale.
108 |
109 | This approach is elegant for a few reasons:
110 |
111 | * Database tables semantics and features are preserved as is (it does not enforce a new programming model)
112 |
113 | * The table has the latest version of the data, for efficient catch-ups, and the replication gives deltas
114 |
115 | * We can scale by moving all live queries and synchronization to the read replicas
116 |
117 | * Several live queries can happen in parallel, as long as we track the snapmin/snapcur per query/table
118 |
119 | * Soft deletes are a requirement, but also a free feature
120 |
121 | * Phoenix and stock PostgreSQL only: no PostgreSQL extensions required, no addition services, no copies of the data to third party services, no need for higher database isolation levels
122 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Sync
2 |
3 | A proof of concept of an Elixir/Phoenix node that runs PostgreSQL Replication to automatically synchronize data to clients, showcased at my ElixirConf US 2024 keynote.
4 |
5 | **Beware: messy code ahead!** You will find plenty of TODOs, missing abstractions, and non-optimal decisions. Our goal is only to show how to use channels, transaction snapshots, and PostgreSQL replication to synchronize data with the front-end in a way that preserves transactional consistency. See `priv/repo/migrations` for the required SQL instructions and `lib/sync/replication.ex` for the replication logic.
6 |
7 | See the [Design.md](DESIGN.md) document for an extensive discussion on the design, possible optimizations, and trade-offs one might consider when implementing this. [The slides are also available](Slides.pdf).
8 |
9 | ## Setup
10 |
11 | This project builds on top of PostgreSQL replication and it requires PostgreSQL 14+. You must also enable replication in your PostgreSQL instance:
12 |
13 | ```sql
14 | ALTER SYSTEM SET wal_level='logical';
15 | ALTER SYSTEM SET max_wal_senders='64';
16 | ALTER SYSTEM SET max_replication_slots='64';
17 | ```
18 |
19 | Then **you must restart your database**.
20 |
21 | You can also set those values when starting "postgres". This is useful, for example, when running it from Docker:
22 |
23 | ```yaml
24 | services:
25 | postgres:
26 | image: postgres:14
27 | env:
28 | ...
29 | command: ["postgres", "-c", "wal_level=logical"]
30 | ```
31 |
32 | For CI, GitHub Actions do not support setting command, so you can update and restart Postgres instead in a step:
33 |
34 | ```yaml
35 | - name: "Set PG settings"
36 | run: |
37 | docker exec ${{ job.services.postgres.id }} sh -c 'echo "wal_level=logical" >> /var/lib/postgresql/data/postgresql.conf'
38 | docker restart ${{ job.services.pg.id }}
39 | ```
40 |
41 | In production, `max_wal_senders` and `max_replication_slots` must be set roughly to twice the number of machines you are using in production (to encompass blue-green/canary deployments). 64 is a reasonable number for the huge majority of applications out there.
42 |
43 | ## Running the app
44 |
45 | To start your Phoenix server:
46 |
47 | * Run `mix setup` to install and setup dependencies
48 | * Start Phoenix endpoint with `mix phx.server` or inside IEx with `iex -S mix phx.server`
49 |
50 | Now you can visit [`localhost:4000`](http://localhost:4000) from your browser.
51 |
52 | ## Acknowledgements
53 |
54 | [Anthony Accomazzo](https://github.com/acco) for insights, review of design documents, and code reviews. [Chris McCord](https://github.com/chrismccord) for feedback, code reviews, and writing all of my JavaScript. [Steffen Deusch](https://github.com/SteffenDE) for feedback and code reviews.
55 |
56 | ## License
57 |
58 | Copyright 2024 Dashbit
59 |
60 | ```
61 | Licensed under the Apache License, Version 2.0 (the "License");
62 | you may not use this file except in compliance with the License.
63 | You may obtain a copy of the License at
64 |
65 | http://www.apache.org/licenses/LICENSE-2.0
66 |
67 | Unless required by applicable law or agreed to in writing, software
68 | distributed under the License is distributed on an "AS IS" BASIS,
69 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
70 | See the License for the specific language governing permissions and
71 | limitations under the License.
72 | ```
73 |
--------------------------------------------------------------------------------
/Slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/josevalim/sync/ecd2f67d25ff879329dd84f547ff277e2836e302/Slides.pdf
--------------------------------------------------------------------------------
/assets/README.md:
--------------------------------------------------------------------------------
1 | # Svelte + Vite
2 |
3 | This template should help get you started developing with Svelte in Vite.
4 |
5 | ## Recommended IDE Setup
6 |
7 | [VS Code](https://code.visualstudio.com/) + [Svelte](https://marketplace.visualstudio.com/items?itemName=svelte.svelte-vscode).
8 |
9 | ## Need an official Svelte framework?
10 |
11 | Check out [SvelteKit](https://github.com/sveltejs/kit#readme), which is also powered by Vite. Deploy anywhere with its serverless-first approach and adapt to various platforms, with out of the box support for TypeScript, SCSS, and Less, and easily-added support for mdsvex, GraphQL, PostCSS, Tailwind CSS, and more.
12 |
13 | ## Technical considerations
14 |
15 | **Why use this over SvelteKit?**
16 |
17 | - It brings its own routing solution which might not be preferable for some users.
18 | - It is first and foremost a framework that just happens to use Vite under the hood, not a Vite app.
19 |
20 | This template contains as little as possible to get started with Vite + Svelte, while taking into account the developer experience with regards to HMR and intellisense. It demonstrates capabilities on par with the other `create-vite` templates and is a good starting point for beginners dipping their toes into a Vite + Svelte project.
21 |
22 | Should you later need the extended capabilities and extensibility provided by SvelteKit, the template has been structured similarly to SvelteKit so that it is easy to migrate.
23 |
24 | **Why `global.d.ts` instead of `compilerOptions.types` inside `jsconfig.json` or `tsconfig.json`?**
25 |
26 | Setting `compilerOptions.types` shuts out all other types not explicitly listed in the configuration. Using triple-slash references keeps the default TypeScript setting of accepting type information from the entire workspace, while also adding `svelte` and `vite/client` type information.
27 |
28 | **Why include `.vscode/extensions.json`?**
29 |
30 | Other templates indirectly recommend extensions via the README, but this file allows VS Code to prompt the user to install the recommended extension upon opening the project.
31 |
32 | **Why enable `checkJs` in the JS template?**
33 |
34 | It is likely that most cases of changing variable types in runtime are likely to be accidental, rather than deliberate. This provides advanced typechecking out of the box. Should you like to take advantage of the dynamically-typed nature of JavaScript, it is trivial to change the configuration.
35 |
36 | **Why is HMR not preserving my local component state?**
37 |
38 | HMR state preservation comes with a number of gotchas! It has been disabled by default in both `svelte-hmr` and `@sveltejs/vite-plugin-svelte` due to its often surprising behavior. You can read the details [here](https://github.com/sveltejs/svelte-hmr/tree/master/packages/svelte-hmr#preservation-of-local-state).
39 |
40 | If you have state that's important to retain within a component, consider creating an external store which would not be replaced by HMR.
41 |
42 | ```js
43 | // store.js
44 | // An extremely simple external store
45 | import { writable } from 'svelte/store'
46 | export default writable(0)
47 | ```
48 |
--------------------------------------------------------------------------------
/assets/esbuild.cjs:
--------------------------------------------------------------------------------
1 | let sveltePlugin = {
2 | name: "svelte",
3 | setup(build) {
4 | let svelte = require("svelte/compiler")
5 | let path = require("path")
6 | let fs = require("fs")
7 |
8 | build.onLoad({ filter: /\.svelte$/ }, async (args) => {
9 | // This converts a message in Svelte's format to esbuild's format
10 | let convertMessage = ({ message, start, end }) => {
11 | let location
12 | if (start && end) {
13 | let lineText = source.split(/\r\n|\r|\n/g)[start.line - 1]
14 | let lineEnd = start.line === end.line ? end.column : lineText.length
15 | location = {
16 | file: filename,
17 | line: start.line,
18 | column: start.column,
19 | length: lineEnd - start.column,
20 | lineText,
21 | }
22 | }
23 | return { text: message, location }
24 | }
25 |
26 | // Load the file from the file system
27 | let source = await fs.promises.readFile(args.path, "utf8")
28 | let filename = path.relative(process.cwd(), args.path)
29 |
30 | // Convert Svelte syntax to JavaScript
31 | try {
32 | let { js, warnings } = svelte.compile(source, { filename, customElement: true })
33 | let contents = js.code + `//# sourceMappingURL=` + js.map.toUrl()
34 | return { contents, warnings: warnings.map(convertMessage) }
35 | } catch (e) {
36 | return { errors: [convertMessage(e)] }
37 | }
38 | })
39 | }
40 | };
41 |
42 | const config = {
43 | entryPoints: ["./src/main.js"],
44 | chunkNames: "chunks/[name]-[hash]",
45 | bundle: true,
46 | format: "esm",
47 | splitting: true,
48 | target: "es2021",
49 | outdir: "../priv/static/assets",
50 | plugins: [sveltePlugin],
51 | logLevel: "info"
52 | };
53 |
54 | (async () => {
55 | if (process.argv.includes("--watch")) {
56 | const ctx = await require("esbuild").context(config);
57 | await ctx.watch();
58 | } else {
59 | await require("esbuild").build(config);
60 | }
61 | })()
62 | .then(() => {
63 | if (process.argv.includes("--watch")) {
64 | // do nothing
65 | } else {
66 | process.exit(0);
67 | }
68 | })
69 | .catch((e) => {
70 | console.warn(e);
71 | process.exit(1)
72 | });
73 |
74 |
--------------------------------------------------------------------------------
/assets/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
29 | //
30 | plugin(({addVariant}) => addVariant("phx-no-feedback", [".phx-no-feedback&", ".phx-no-feedback &"])),
31 | plugin(({addVariant}) => addVariant("phx-click-loading", [".phx-click-loading&", ".phx-click-loading &"])),
32 | plugin(({addVariant}) => addVariant("phx-submit-loading", [".phx-submit-loading&", ".phx-submit-loading &"])),
33 | plugin(({addVariant}) => addVariant("phx-change-loading", [".phx-change-loading&", ".phx-change-loading &"])),
34 | plugin(({addVariant}) => addVariant("phx-hook-loading", [".phx-hook-loading&", ".phx-hook-loading &"])),
35 | plugin(({addVariant}) => addVariant("phx-error", [".phx-error&", ".phx-error &"])),
36 | plugin(({addVariant}) => addVariant("drag-item", [".drag-item&", ".drag-item &"])),
37 | plugin(({addVariant}) => addVariant("drag-ghost", [".drag-ghost&", ".drag-ghost &"]))
38 | ]
39 | }
40 |
--------------------------------------------------------------------------------
/config/config.exs:
--------------------------------------------------------------------------------
1 | # This file is responsible for configuring your application
2 | # and its dependencies with the aid of the Config module.
3 | #
4 | # This configuration file is loaded before any dependency and
5 | # is restricted to this project.
6 |
7 | # General application configuration
8 | import Config
9 |
10 | config :sync,
11 | ecto_repos: [Sync.Repo],
12 | generators: [timestamp_type: :utc_datetime]
13 |
14 | # Configures the endpoint
15 | config :sync, SyncWeb.Endpoint,
16 | url: [host: "localhost"],
17 | adapter: Bandit.PhoenixAdapter,
18 | render_errors: [
19 | formats: [html: SyncWeb.ErrorHTML, json: SyncWeb.ErrorJSON],
20 | layout: false
21 | ],
22 | pubsub_server: Sync.PubSub,
23 | live_view: [signing_salt: "5gW26Rag"]
24 |
25 | config :tailwind,
26 | version: "3.4.10",
27 | sync: [
28 | args: ~w(
29 | --config=tailwind.config.js
30 | --input=src/app.css
31 | --output=../priv/static/assets/app.css
32 | ),
33 | cd: Path.expand("../assets", __DIR__)
34 | ]
35 |
36 | # Configures the mailer
37 | #
38 | # By default it uses the "Local" adapter which stores the emails
39 | # locally. You can see the emails in your browser, at "/dev/mailbox".
40 | #
41 | # For production it's recommended to configure a different adapter
42 | # at the `config/runtime.exs`.
43 | config :sync, Sync.Mailer, adapter: Swoosh.Adapters.Local
44 |
45 | # Configures Elixir's Logger
46 | config :logger, :console,
47 | format: "$time $metadata[$level] $message\n",
48 | metadata: [:request_id]
49 |
50 | # Use Jason for JSON parsing in Phoenix
51 | config :phoenix, :json_library, Jason
52 |
53 | # Import environment specific config. This must remain at the bottom
54 | # of this file so it overrides the configuration defined above.
55 | import_config "#{config_env()}.exs"
56 |
--------------------------------------------------------------------------------
/config/dev.exs:
--------------------------------------------------------------------------------
1 | import Config
2 |
3 | # Configure your database
4 | config :sync, Sync.Repo,
5 | username: "postgres",
6 | password: "postgres",
7 | hostname: "localhost",
8 | database: "sync_dev",
9 | stacktrace: true,
10 | show_sensitive_data_on_connection_error: true,
11 | pool_size: 10
12 |
13 | # For development, we disable any cache and enable
14 | # debugging and code reloading.
15 | #
16 | # The watchers configuration can be used to run external
17 | # watchers to your application. For example, we can use it
18 | # to bundle .js and .css sources.
19 | config :sync, SyncWeb.Endpoint,
20 | # Binding to loopback ipv4 address prevents access from other machines.
21 | # Change to `ip: {0, 0, 0, 0}` to allow access from other machines.
22 | http: [ip: {127, 0, 0, 1}, port: 4000],
23 | check_origin: false,
24 | code_reloader: true,
25 | debug_errors: true,
26 | secret_key_base: "a5jo0VhHRb/Nr+1C0cGVv94L0vqo+Zz6Nq9z4BvFfiiKhZJKVjNyR8u2GYU/nmfB",
27 | watchers: [
28 | node: ["esbuild.cjs", "--watch", cd: "assets"],
29 | tailwind: {Tailwind, :install_and_run, [:sync, ~w(--watch)]}
30 | ]
31 |
32 | # ## SSL Support
33 | #
34 | # In order to use HTTPS in development, a self-signed
35 | # certificate can be generated by running the following
36 | # Mix task:
37 | #
38 | # mix phx.gen.cert
39 | #
40 | # Run `mix help phx.gen.cert` for more information.
41 | #
42 | # The `http:` config above can be replaced with:
43 | #
44 | # https: [
45 | # port: 4001,
46 | # cipher_suite: :strong,
47 | # keyfile: "priv/cert/selfsigned_key.pem",
48 | # certfile: "priv/cert/selfsigned.pem"
49 | # ],
50 | #
51 | # If desired, both `http:` and `https:` keys can be
52 | # configured to run both http and https servers on
53 | # different ports.
54 |
55 | # Watch static and templates for browser reloading.
56 | config :sync, SyncWeb.Endpoint,
57 | live_reload: [
58 | patterns: [
59 | ~r"priv/static/(?!uploads/).*(js|css|png|jpeg|jpg|gif|svg)$",
60 | ~r"priv/gettext/.*(po)$",
61 | ~r"lib/sync_web/(controllers|live|components)/.*(ex|heex)$"
62 | ]
63 | ]
64 |
65 | # Enable dev routes for dashboard and mailbox
66 | config :sync, dev_routes: true
67 |
68 | # Do not include metadata nor timestamps in development logs
69 | config :logger, :console, format: "[$level] $message\n"
70 |
71 | # Set a higher stacktrace during development. Avoid configuring such
72 | # in production as building large stacktraces may be expensive.
73 | config :phoenix, :stacktrace_depth, 20
74 |
75 | # Initialize plugs at runtime for faster development compilation
76 | config :phoenix, :plug_init_mode, :runtime
77 |
78 | config :phoenix_live_view,
79 | # Include HEEx debug annotations as HTML comments in rendered markup
80 | debug_heex_annotations: true,
81 | # Enable helpful, but potentially expensive runtime checks
82 | enable_expensive_runtime_checks: true
83 |
84 | # Disable swoosh api client as it is only required for production adapters.
85 | config :swoosh, :api_client, false
86 |
--------------------------------------------------------------------------------
/config/prod.exs:
--------------------------------------------------------------------------------
1 | import Config
2 |
3 | # Note we also include the path to a cache manifest
4 | # containing the digested version of static files. This
5 | # manifest is generated by the `mix assets.deploy` task,
6 | # which you should run after static files are built and
7 | # before starting your production server.
8 | config :sync, SyncWeb.Endpoint, cache_static_manifest: "priv/static/cache_manifest.json"
9 |
10 | # Configures Swoosh API Client
11 | config :swoosh, api_client: Swoosh.ApiClient.Finch, finch_name: Sync.Finch
12 |
13 | # Disable Swoosh Local Memory Storage
14 | config :swoosh, local: false
15 |
16 | # Do not print debug messages in production
17 | config :logger, level: :info
18 |
19 | # Runtime production configuration, including reading
20 | # of environment variables, is done on config/runtime.exs.
21 |
--------------------------------------------------------------------------------
/config/runtime.exs:
--------------------------------------------------------------------------------
1 | import Config
2 |
3 | # config/runtime.exs is executed for all environments, including
4 | # during releases. It is executed after compilation and before the
5 | # system starts, so it is typically used to load production configuration
6 | # and secrets from environment variables or elsewhere. Do not define
7 | # any compile-time configuration in here, as it won't be applied.
8 | # The block below contains prod specific runtime configuration.
9 |
10 | # ## Using releases
11 | #
12 | # If you use `mix release`, you need to explicitly enable the server
13 | # by passing the PHX_SERVER=true when you start it:
14 | #
15 | # PHX_SERVER=true bin/sync start
16 | #
17 | # Alternatively, you can use `mix phx.gen.release` to generate a `bin/server`
18 | # script that automatically sets the env var above.
19 | if System.get_env("PHX_SERVER") do
20 | config :sync, SyncWeb.Endpoint, server: true
21 | end
22 |
23 | if config_env() == :prod do
24 | database_url =
25 | System.get_env("DATABASE_URL") ||
26 | raise """
27 | environment variable DATABASE_URL is missing.
28 | For example: ecto://USER:PASS@HOST/DATABASE
29 | """
30 |
31 | maybe_ipv6 = if System.get_env("ECTO_IPV6") in ~w(true 1), do: [:inet6], else: []
32 |
33 | config :sync, Sync.Repo,
34 | # ssl: true,
35 | url: database_url,
36 | pool_size: String.to_integer(System.get_env("POOL_SIZE") || "10"),
37 | socket_options: maybe_ipv6
38 |
39 | # The secret key base is used to sign/encrypt cookies and other secrets.
40 | # A default value is used in config/dev.exs and config/test.exs but you
41 | # want to use a different value for prod and you most likely don't want
42 | # to check this value into version control, so we use an environment
43 | # variable instead.
44 | secret_key_base =
45 | System.get_env("SECRET_KEY_BASE") ||
46 | raise """
47 | environment variable SECRET_KEY_BASE is missing.
48 | You can generate one by calling: mix phx.gen.secret
49 | """
50 |
51 | host = System.get_env("PHX_HOST") || "example.com"
52 | port = String.to_integer(System.get_env("PORT") || "4000")
53 |
54 | config :sync, :dns_cluster_query, System.get_env("DNS_CLUSTER_QUERY")
55 |
56 | config :sync, SyncWeb.Endpoint,
57 | url: [host: host, port: 443, scheme: "https"],
58 | http: [
59 | # Enable IPv6 and bind on all interfaces.
60 | # Set it to {0, 0, 0, 0, 0, 0, 0, 1} for local network only access.
61 | # See the documentation on https://hexdocs.pm/bandit/Bandit.html#t:options/0
62 | # for details about using IPv6 vs IPv4 and loopback vs public addresses.
63 | ip: {0, 0, 0, 0, 0, 0, 0, 0},
64 | port: port
65 | ],
66 | secret_key_base: secret_key_base
67 |
68 | # ## SSL Support
69 | #
70 | # To get SSL working, you will need to add the `https` key
71 | # to your endpoint configuration:
72 | #
73 | # config :sync, SyncWeb.Endpoint,
74 | # https: [
75 | # ...,
76 | # port: 443,
77 | # cipher_suite: :strong,
78 | # keyfile: System.get_env("SOME_APP_SSL_KEY_PATH"),
79 | # certfile: System.get_env("SOME_APP_SSL_CERT_PATH")
80 | # ]
81 | #
82 | # The `cipher_suite` is set to `:strong` to support only the
83 | # latest and more secure SSL ciphers. This means old browsers
84 | # and clients may not be supported. You can set it to
85 | # `:compatible` for wider support.
86 | #
87 | # `:keyfile` and `:certfile` expect an absolute path to the key
88 | # and cert in disk or a relative path inside priv, for example
89 | # "priv/ssl/server.key". For all supported SSL configuration
90 | # options, see https://hexdocs.pm/plug/Plug.SSL.html#configure/1
91 | #
92 | # We also recommend setting `force_ssl` in your config/prod.exs,
93 | # ensuring no data is ever sent via http, always redirecting to https:
94 | #
95 | # config :sync, SyncWeb.Endpoint,
96 | # force_ssl: [hsts: true]
97 | #
98 | # Check `Plug.SSL` for all available options in `force_ssl`.
99 |
100 | # ## Configuring the mailer
101 | #
102 | # In production you need to configure the mailer to use a different adapter.
103 | # Also, you may need to configure the Swoosh API client of your choice if you
104 | # are not using SMTP. Here is an example of the configuration:
105 | #
106 | # config :sync, Sync.Mailer,
107 | # adapter: Swoosh.Adapters.Mailgun,
108 | # api_key: System.get_env("MAILGUN_API_KEY"),
109 | # domain: System.get_env("MAILGUN_DOMAIN")
110 | #
111 | # For this example you need include a HTTP client required by Swoosh API client.
112 | # Swoosh supports Hackney and Finch out of the box:
113 | #
114 | # config :swoosh, :api_client, Swoosh.ApiClient.Hackney
115 | #
116 | # See https://hexdocs.pm/swoosh/Swoosh.html#module-installation for details.
117 | end
118 |
--------------------------------------------------------------------------------
/config/test.exs:
--------------------------------------------------------------------------------
1 | import Config
2 |
3 | # Configure your database
4 | #
5 | # The MIX_TEST_PARTITION environment variable can be used
6 | # to provide built-in test partitioning in CI environment.
7 | # Run `mix help test` for more information.
8 | config :sync, Sync.Repo,
9 | username: "postgres",
10 | password: "postgres",
11 | hostname: "localhost",
12 | database: "sync_test#{System.get_env("MIX_TEST_PARTITION")}",
13 | pool: Ecto.Adapters.SQL.Sandbox,
14 | pool_size: System.schedulers_online() * 2
15 |
16 | # We don't run a server during test. If one is required,
17 | # you can enable the server option below.
18 | config :sync, SyncWeb.Endpoint,
19 | http: [ip: {127, 0, 0, 1}, port: 4002],
20 | secret_key_base: "EUoE+5OTvSMvZaDD55GDCxmX6bI2HOJE4PYuae5ysuR0J39xatRPhjsBDYtLsMQg",
21 | server: false
22 |
23 | # In test we don't send emails
24 | config :sync, Sync.Mailer, adapter: Swoosh.Adapters.Test
25 |
26 | # Disable swoosh api client as it is only required for production adapters
27 | config :swoosh, :api_client, false
28 |
29 | # Print only warnings and errors during test
30 | config :logger, level: :warning
31 |
32 | # Initialize plugs at runtime for faster test compilation
33 | config :phoenix, :plug_init_mode, :runtime
34 |
35 | # Enable helpful, but potentially expensive runtime checks
36 | config :phoenix_live_view,
37 | enable_expensive_runtime_checks: true
38 |
--------------------------------------------------------------------------------
/lib/sync/application.ex:
--------------------------------------------------------------------------------
1 | defmodule Sync.Application do
2 | @moduledoc false
3 |
4 | use Application
5 |
6 | @impl true
7 | def start(_type, _args) do
8 | children = [
9 | SyncWeb.Telemetry,
10 | Sync.Repo,
11 | {DNSCluster, query: Application.get_env(:sync, :dns_cluster_query) || :ignore},
12 | {Phoenix.PubSub, name: Sync.PubSub},
13 | # Sync specific services
14 | {Registry, name: Sync.Registry, keys: :duplicate},
15 | {Sync.Replication,
16 | [name: Sync.Replication, endpoint: SyncWeb.Endpoint] ++ Sync.Repo.config()},
17 | {Task, fn -> Sync.Replication.wait_for_connection!(Sync.Replication) end},
18 | # Start the Finch HTTP client for sending emails
19 | {Finch, name: Sync.Finch},
20 | # Start a worker by calling: Sync.Worker.start_link(arg)
21 | # {Sync.Worker, arg},
22 | # Start to serve requests, typically the last entry
23 | SyncWeb.Endpoint
24 | ]
25 |
26 | opts = [strategy: :one_for_one, name: Sync.Supervisor]
27 | Supervisor.start_link(children, opts)
28 | end
29 |
30 | @impl true
31 | def config_change(changed, _new, removed) do
32 | SyncWeb.Endpoint.config_change(changed, removed)
33 | :ok
34 | end
35 | end
36 |
--------------------------------------------------------------------------------
/lib/sync/mailer.ex:
--------------------------------------------------------------------------------
1 | defmodule Sync.Mailer do
2 | use Swoosh.Mailer, otp_app: :sync
3 | end
4 |
--------------------------------------------------------------------------------
/lib/sync/replication.ex:
--------------------------------------------------------------------------------
1 | defmodule Sync.Replication do
2 | use Postgrex.ReplicationConnection
3 |
4 | require Logger
5 |
6 | # TODO: Allow the publications to be passed as parameters
7 | def start_link(opts) do
8 | name = Keyword.get(opts, :name)
9 |
10 | if not is_atom(name) or name == nil do
11 | raise ArgumentError, "an atom :name is required when starting #{inspect(__MODULE__)}"
12 | end
13 |
14 | {endpoint, opts} = Keyword.pop!(opts, :endpoint)
15 | opts = Keyword.put_new(opts, :auto_reconnect, true)
16 | Postgrex.ReplicationConnection.start_link(__MODULE__, endpoint, opts)
17 | end
18 |
19 | @doc """
20 | Subscribe to events from the replication connection.
21 |
22 | This is useful to know whenever there is a reconnection,
23 | which should force clients to resync.
24 | """
25 | def subscribe(name) do
26 | Registry.register(Sync.Registry, name, [])
27 | end
28 |
29 | @doc """
30 | Wait for connection.
31 |
32 | This is typically used by boot to make sure the replication
33 | is running to avoid unecessary syncs. It accepts a maximum
34 | timeout.
35 |
36 | This function will exit if the server is not running.
37 | It returns `:ok` or `:timeout` otherwise.
38 | """
39 | def wait_for_connection!(name, timeout \\ 5000) do
40 | ref = :erlang.monitor(:process, name, alias: :reply_demonitor)
41 | send(name, {:wait_for_connection, ref})
42 |
43 | receive do
44 | :ok ->
45 | :ok
46 |
47 | {:DOWN, ^ref, _, _, reason} ->
48 | exit({reason, {__MODULE__, :wait_for_connection!, [name, timeout]}})
49 | after
50 | timeout -> :timeout
51 | end
52 | end
53 |
54 | @doc """
55 | Use to emulate a disconnection from the database.
56 | """
57 | def disconnect(name) do
58 | send(name, :disconnect)
59 | end
60 |
61 | ## Callbacks
62 |
63 | @impl true
64 | def init(endpoint) do
65 | state = %{
66 | endpoint: endpoint,
67 | relations: %{},
68 | # {:disconnected, []} | :connected | [operation]
69 | replication: {:disconnected, []}
70 | }
71 |
72 | {:ok, state}
73 | end
74 |
75 | @impl true
76 | def handle_disconnect(%{replication: replication} = state) do
77 | waiting =
78 | case replication do
79 | {:disconnected, waiting} -> waiting
80 | _ -> []
81 | end
82 |
83 | {:noreply, %{state | replication: {:disconnected, waiting}}}
84 | end
85 |
86 | @impl true
87 | def handle_connect(state) do
88 | slot = random_slot_name()
89 | query = "CREATE_REPLICATION_SLOT #{slot} TEMPORARY LOGICAL pgoutput NOEXPORT_SNAPSHOT"
90 | {:query, query, state}
91 | end
92 |
93 | @impl true
94 | def handle_info({:wait_for_connection, ref}, state) do
95 | case state.replication do
96 | {:disconnected, waiting} ->
97 | {:noreply, %{state | replication: {:disconnected, [ref | waiting]}}}
98 |
99 | _ ->
100 | send(ref, :ok)
101 | {:noreply, state}
102 | end
103 | end
104 |
105 | def handle_info(:disconnect, _state) do
106 | {:disconnect, "user requested"}
107 | end
108 |
109 | def handle_info(_message, state) do
110 | {:noreply, state}
111 | end
112 |
113 | @impl true
114 | def handle_result([result], %{replication: {:disconnected, waiting}} = state) do
115 | %Postgrex.Result{
116 | command: :create,
117 | columns: ["slot_name", "consistent_point", "snapshot_name", "output_plugin"],
118 | rows: [[slot, _lsn, nil, "pgoutput"]]
119 | } = result
120 |
121 | for ref <- waiting do
122 | send(ref, :ok)
123 | end
124 |
125 | {:registered_name, name} = Process.info(self(), :registered_name)
126 |
127 | Registry.dispatch(Sync.Registry, name, fn data ->
128 | Enum.each(data, fn {pid, _} ->
129 | send(pid, {__MODULE__, %{message: :connect}})
130 | end)
131 | end)
132 |
133 | query =
134 | "START_REPLICATION SLOT #{slot} LOGICAL 0/0 (proto_version '2', publication_names 'phx_sync')"
135 |
136 | {:stream, query, [], %{state | replication: :connected}}
137 | end
138 |
139 | def handle_result(%Postgrex.Error{} = error, _state) do
140 | raise Exception.message(error)
141 | end
142 |
143 | @impl true
144 | # https://www.postgresql.org/docs/14/protocol-replication.html
145 | def handle_data(<>, state) do
146 | case rest do
147 | <> when state.replication == :connected ->
148 | handle_begin(state)
149 |
150 | <> when is_list(state.replication) ->
151 | handle_commit(lsn, state)
152 |
153 | <> when is_list(state.replication) ->
154 | handle_tuple_data(:insert, oid, count, tuple_data, state)
155 |
156 | <> when is_list(state.replication) ->
157 | handle_tuple_data(:update, oid, count, tuple_data, state)
158 |
159 | <> when is_list(state.replication) ->
160 | %{^oid => {schema, table, _columns}} = state.relation
161 |
162 | Logger.error(
163 | "A primary key of a row has been changed or its replica identity has been set to full, " <>
164 | "those operations are not currently supported by sync on #{schema}.#{table}"
165 | )
166 |
167 | {:noreply, state}
168 |
169 | <> ->
170 | handle_relation(oid, rest, state)
171 |
172 | _ ->
173 | {:noreply, state}
174 | end
175 | end
176 |
177 | def handle_data(<>, state) do
178 | messages =
179 | case reply do
180 | 1 -> [<>]
181 | 0 -> []
182 | end
183 |
184 | {:noreply, messages, state}
185 | end
186 |
187 | ## Decoding messages
188 |
189 | defp handle_begin(state) do
190 | {:noreply, %{state | replication: []}}
191 | end
192 |
193 | defp handle_relation(oid, rest, state) do
194 | [schema, rest] = :binary.split(rest, <<0>>)
195 | schema = if schema == "", do: "pg_catalog", else: schema
196 | [table, <<_replica_identity::8, count::16, rest::binary>>] = :binary.split(rest, <<0>>)
197 | columns = parse_columns(count, rest)
198 | state = put_in(state.relations[oid], {schema, table, columns})
199 | {:noreply, state}
200 | end
201 |
202 | defp handle_tuple_data(kind, oid, count, tuple_data, state) do
203 | {schema, table, columns} = Map.fetch!(state.relations, oid)
204 | data = parse_tuple_data(count, columns, tuple_data)
205 | operation = %{schema: schema, table: table, op: kind, data: Map.new(data)}
206 | {:noreply, update_in(state.replication, &[operation | &1])}
207 | end
208 |
209 | defp handle_commit(lsn, state) do
210 | # TODO: Encode this as binary data.
211 | # TODO: Potentially allow synchronizing a subset of the fields.
212 | # TODO: lsn can cause an overflow on the client, since JS integers are floats.
213 | # TODO: Broadcast will encode to JSON when fastlaning,
214 | # this can be expensive if done directly in the replication process.
215 | # We can probably partition this over several processes.
216 | # TODO: The broadcast should be per table and a commit can touch several
217 | # tables. We need an efficient mechanism to filter these down and
218 | # send to the client. Perhaps by using a custom Registry rather than
219 | # PubSub, since it is all local anyway.
220 | state.endpoint.local_broadcast("sync:todos:items", "commit", %{
221 | lsn: lsn,
222 | ops: Enum.reverse(state.replication)
223 | })
224 |
225 | {:noreply, %{state | replication: :connected}}
226 | end
227 |
228 | # TODO: if an entry has been soft-deleted, we could emit a special delete
229 | # instruction instead of sending the whole update.
230 | defp parse_tuple_data(0, [], <<>>), do: []
231 |
232 | defp parse_tuple_data(count, [{name, _oid, _modifier} | columns], data) do
233 | case data do
234 | <> ->
235 | [{name, nil} | parse_tuple_data(count - 1, columns, rest)]
236 |
237 | # TODO: We are using text for convenience, we must set binary on the protocol
238 | <> ->
239 | [{name, value} | parse_tuple_data(count - 1, columns, rest)]
240 |
241 | <> ->
242 | raise "binary values not supported by sync"
243 |
244 | <> ->
245 | parse_tuple_data(count - 1, columns, rest)
246 | end
247 | end
248 |
249 | defp parse_columns(0, <<>>), do: []
250 |
251 | defp parse_columns(count, <<_flags, rest::binary>>) do
252 | [name, <>] = :binary.split(rest, <<0>>)
253 | [{name, oid, modifier} | parse_columns(count - 1, rest)]
254 | end
255 |
256 | ## Helpers
257 |
258 | @epoch DateTime.to_unix(~U[2000-01-01 00:00:00Z], :microsecond)
259 | defp current_time(), do: System.os_time(:microsecond) - @epoch
260 |
261 | defp random_slot_name do
262 | "phx_sync_" <> Base.encode32(:crypto.strong_rand_bytes(5), case: :lower)
263 | end
264 | end
265 |
--------------------------------------------------------------------------------
/lib/sync/repo.ex:
--------------------------------------------------------------------------------
1 | defmodule Sync.Repo do
2 | use Ecto.Repo,
3 | otp_app: :sync,
4 | adapter: Ecto.Adapters.Postgres
5 | end
6 |
--------------------------------------------------------------------------------
/lib/sync/todo/item.ex:
--------------------------------------------------------------------------------
1 | defmodule Sync.Todo.Item do
2 | use Ecto.Schema
3 | import Ecto.Changeset
4 |
5 | # TODO: Introduce sync_schema that will define the snapshot columns and the scope
6 | # TODO: Figure out schema evolution
7 | @primary_key {:id, :binary_id, autogenerate: true}
8 |
9 | @derive {Jason.Encoder,
10 | only: [
11 | :id,
12 | :name,
13 | :done,
14 | :_deleted_at,
15 | :_snapmin,
16 | :_snapcur,
17 | :inserted_at,
18 | :updated_at
19 | ]}
20 | schema "visible_items" do
21 | field :name, :string
22 | field :done, :boolean, default: false
23 |
24 | # TODO: Use writable: :never on Ecto v3.12+
25 | # TODO: read_after_writes does not work with soft deletes on Postgres,
26 | # we need to address this in Ecto and add it later
27 | field :_deleted_at, :utc_datetime, read_after_writes: true
28 | field :_snapmin, :integer, read_after_writes: true
29 | field :_snapcur, :integer, read_after_writes: true
30 |
31 | timestamps(type: :utc_datetime_usec)
32 | end
33 |
34 | @doc false
35 | def changeset(item, attrs) do
36 | item
37 | |> cast(attrs, [:name, :done])
38 | |> validate_required([:name, :done])
39 | end
40 | end
41 |
--------------------------------------------------------------------------------
/lib/sync_web.ex:
--------------------------------------------------------------------------------
1 | defmodule SyncWeb do
2 | @moduledoc """
3 | The entrypoint for defining your web interface, such
4 | as controllers, components, channels, and so on.
5 |
6 | This can be used in your application as:
7 |
8 | use SyncWeb, :controller
9 | use SyncWeb, :html
10 |
11 | The definitions below will be executed for every controller,
12 | component, etc, so keep them short and clean, focused
13 | on imports, uses and aliases.
14 |
15 | Do NOT define functions inside the quoted expressions
16 | below. Instead, define additional modules and import
17 | those modules here.
18 | """
19 |
20 | def static_paths, do: ~w(assets fonts images favicon.ico robots.txt)
21 |
22 | # TODO vite suffixes the bundles for cache busting, so we need to
23 | # read the manifest and cache the lookup. A proper approach would
24 | # be to cache this into an ETS lookup in the same way the phoenix
25 | # manifest is cached.
26 | #
27 | # We need to run the build to ensure it exists before we read it
28 |
29 | def router do
30 | quote do
31 | use Phoenix.Router, helpers: false
32 |
33 | # Import common connection and controller functions to use in pipelines
34 | import Plug.Conn
35 | import Phoenix.Controller
36 | import Phoenix.LiveView.Router
37 | end
38 | end
39 |
40 | def channel do
41 | quote do
42 | use Phoenix.Channel
43 | end
44 | end
45 |
46 | def controller do
47 | quote do
48 | use Phoenix.Controller,
49 | formats: [:html, :json],
50 | layouts: [html: SyncWeb.Layouts]
51 |
52 | import Plug.Conn
53 | import SyncWeb.Gettext
54 |
55 | unquote(verified_routes())
56 | end
57 | end
58 |
59 | def live_view do
60 | quote do
61 | use Phoenix.LiveView,
62 | layout: {SyncWeb.Layouts, :app}
63 |
64 | unquote(html_helpers())
65 | end
66 | end
67 |
68 | def live_component do
69 | quote do
70 | use Phoenix.LiveComponent
71 |
72 | unquote(html_helpers())
73 | end
74 | end
75 |
76 | def html do
77 | quote do
78 | use Phoenix.Component
79 |
80 | # Import convenience functions from controllers
81 | import Phoenix.Controller,
82 | only: [get_csrf_token: 0, view_module: 1, view_template: 1]
83 |
84 | # Include general helpers for rendering HTML
85 | unquote(html_helpers())
86 | end
87 | end
88 |
89 | defp html_helpers do
90 | quote do
91 | # HTML escaping functionality
92 | import Phoenix.HTML
93 | # Core UI components and translation
94 | import SyncWeb.CoreComponents
95 | import SyncWeb.Gettext
96 |
97 | # Shortcut for generating JS commands
98 | alias Phoenix.LiveView.JS
99 |
100 | # Routes generation with the ~p sigil
101 | unquote(verified_routes())
102 | end
103 | end
104 |
105 | def verified_routes do
106 | quote do
107 | use Phoenix.VerifiedRoutes,
108 | endpoint: SyncWeb.Endpoint,
109 | router: SyncWeb.Router,
110 | statics: SyncWeb.static_paths()
111 | end
112 | end
113 |
114 | @doc """
115 | When used, dispatch to the appropriate controller/live_view/etc.
116 | """
117 | defmacro __using__(which) when is_atom(which) do
118 | apply(__MODULE__, which, [])
119 | end
120 | end
121 |
--------------------------------------------------------------------------------
/lib/sync_web/channels/channel.ex:
--------------------------------------------------------------------------------
1 | defmodule SyncWeb.Channel do
2 | use SyncWeb, :channel
3 |
4 | alias Sync.Repo
5 | import Ecto.Query
6 |
7 | @impl true
8 | def join("sync:todos", _payload, socket) do
9 | Sync.Replication.subscribe(Sync.Replication)
10 | {:ok, assign(socket, :subscriptions, MapSet.new())}
11 | end
12 |
13 | # This message is received when we lose connection to PostgreSQL,
14 | # which means we may have missed replication events. Right now,
15 | # this will force a resync but in the future we should just rather
16 | # mark all colletions as stale, so they are force synced as they
17 | # are used on the client.
18 | @impl true
19 | def handle_info({Sync.Replication, %{message: :connect}}, socket) do
20 | {:noreply, push(socket, "resync", %{})}
21 | end
22 |
23 | # The sync happens per table/view. This proof of concept
24 | # only has a single table, so we don't need to worry about
25 | # it for now.
26 | #
27 | # In order to sync, we need to receive the "snapmin" from
28 | # the client. The query we perform must be above the "snapmin"
29 | # and below the current database snapmin, as implemented below.
30 | # We return the latest "snapmin" to the client. The "snapmin"
31 | # (and the soon to be described "lsn") are tracked per resource,
32 | # but since this proof of concept only has a single resource
33 | # (the "items" table), we don't need to worry about it right
34 | # now.
35 | #
36 | # The sync also returns a "lsn". While we are syncing, we may
37 | # receive replication "commit" events. However, those need to
38 | # be buffered until the "commit" event has a "lsn" that matches
39 | # or suparpasses the returned sync "lsn". Only then we can merge
40 | # the sync data and all replication commits into the actual
41 | # client storage. This will give us snapshot isolation/transactional
42 | # consistency on the client. As we merge these (and future) replication
43 | # events, each row has a "_snapmin" column, and we should update
44 | # the resource snapmin in the client if the row "_snapmin" is
45 | # bigger than the client one.
46 | #
47 | # TODO: Allow multiple resources to be synced in
48 | # parallel and then emit data directly to the socket
49 | # TODO: _snapmin and lsn can overflow on the client because JS
50 | # ints are actually float. We need to handle this carefully
51 | # in the future.
52 | # TODO: We probably want to send data as binaries and
53 | # have the client parse it, mirroring whatever happens
54 | # in the replication layer.
55 | @impl true
56 | def handle_in("sync", payload, socket) do
57 | # Subscribe before any query
58 | # TODO: This should return the connection LSN right after the
59 | # subscription. The replication can keep the current LSN in a
60 | # counter, and store it in the Registry meta key.
61 | socket = update_subscriptions("sync:todos:items", socket)
62 |
63 | {:ok, payload} =
64 | Repo.transaction(fn ->
65 | %{rows: [[server_snapmin]]} =
66 | Repo.query!("SELECT pg_snapshot_xmin(pg_current_snapshot())")
67 |
68 | query =
69 | if client_snapmin = Map.get(payload, "snapmin") do
70 | # TODO: This also returns deleted data, because we need to tell the client
71 | # if a particular row was removed. In the future, we probably want to return
72 | # only the IDs and not the whole record.
73 | from s in {"items", Sync.Todo.Item}, where: s._snapmin >= ^client_snapmin
74 | else
75 | from s in {"items", Sync.Todo.Item}, where: is_nil(s._deleted_at)
76 | end
77 |
78 | data = Repo.all(query)
79 | %{rows: [[lsn]]} = Repo.query!("SELECT pg_current_wal_lsn()")
80 | %{snapmin: server_snapmin, data: [["items", data]], lsn: lsn}
81 | end)
82 |
83 | {:reply, {:ok, payload}, socket}
84 | end
85 |
86 | # For writes, the client has two storages: the sync storage and
87 | # the transaction storage. The sync storage only has the data
88 | # received through sync and replication layer. Whenever the client
89 | # wants to change data, it goes to the replication store first.
90 | # The in-memory data is the result of applying all transactions
91 | # in the transaction storage to the sync storage. Whenever we
92 | # receive a replication event, we discard the in-memory data,
93 | # update the sync storage, and apply the transactions on top.
94 | # Of course, there are several ways to optimize this as to not
95 | # recompute all data all over again all the time.
96 | #
97 | # TODO: IndexedDB is shared across tabs. It is necessary to
98 | # provide some mechanism to enable cross tab support.
99 | #
100 | # TODO: Writes do not need to happen on the channel. It could
101 | # happen over HTTP and it could have benefits too: writes need
102 | # to go somewhere close to the primary, channels can be on the
103 | # edge, close to replicas.
104 | @impl true
105 | def handle_in("write", %{"ops" => ops}, socket) do
106 | reply =
107 | Repo.transaction(fn -> Enum.reduce_while(ops, {:ok, %{}}, &handle_write/2) end)
108 |
109 | case reply do
110 | {:ok, {:ok, _}} -> {:reply, :ok, socket}
111 | {:ok, {:halt, error}} -> {:reply, {:error, error}, socket}
112 | # TODO handle rollback with meaningful client error
113 | {:error, _rollback} -> {:reply, {:error, %{op: hd(ops), errors: []}}, socket}
114 | end
115 | end
116 |
117 | defp handle_write([_op_id, "insert", "items", data] = op, acc) do
118 | %{"id" => id} = data
119 |
120 | case Repo.insert(Sync.Todo.Item.changeset(%Sync.Todo.Item{id: id}, data)) do
121 | {:ok, _} -> {:cont, acc}
122 | {:error, changeset} -> {:halt, {:error, %{op: op, errors: changeset.errors}}}
123 | end
124 | end
125 |
126 | defp handle_write([_op_id, "update", "items", %{"id" => id} = data] = op, acc) do
127 | # TODO conflict resolution – someone raced out update with a delete,
128 | case Repo.get(Sync.Todo.Item, id) do
129 | nil ->
130 | {:cont, acc}
131 |
132 | %Sync.Todo.Item{} = todo ->
133 | case Repo.update(Sync.Todo.Item.changeset(todo, data)) do
134 | {:ok, _} -> {:cont, acc}
135 | {:error, changeset} -> {:halt, {:error, %{op: op, errors: changeset.errors}}}
136 | end
137 | end
138 | end
139 |
140 | defp handle_write([_op_id, "delete", "items", id], acc) do
141 | {_, _} = Repo.delete_all(from i in Sync.Todo.Item, where: i.id == ^id)
142 | {:cont, acc}
143 | end
144 |
145 | defp update_subscriptions(topic, socket) do
146 | subscriptions = socket.assigns.subscriptions
147 |
148 | if "sync:todos:items" in subscriptions do
149 | socket
150 | else
151 | # TODO: We should replace the usage of endpoint in SyncWeb.Replication
152 | # by a Registry with our own dispatching logic anyway.
153 | socket.endpoint.subscribe(topic,
154 | metadata: {:fastlane, socket.transport_pid, socket.serializer, []}
155 | )
156 |
157 | assign(socket, :subscriptions, MapSet.put(subscriptions, topic))
158 | end
159 | end
160 | end
161 |
--------------------------------------------------------------------------------
/lib/sync_web/channels/socket.ex:
--------------------------------------------------------------------------------
1 | defmodule SyncWeb.Socket do
2 | use Phoenix.Socket
3 |
4 | # A Socket handler
5 | #
6 | # It's possible to control the websocket connection and
7 | # assign values that can be accessed by your channel topics.
8 |
9 | ## Channels
10 |
11 | channel "sync:todos", SyncWeb.Channel
12 |
13 | # Socket params are passed from the client and can
14 | # be used to verify and authenticate a user. After
15 | # verification, you can put default assigns into
16 | # the socket that will be set for all channels, ie
17 | #
18 | # {:ok, assign(socket, :user_id, verified_user_id)}
19 | #
20 | # To deny connection, return `:error` or `{:error, term}`. To control the
21 | # response the client receives in that case, [define an error handler in the
22 | # websocket
23 | # configuration](https://hexdocs.pm/phoenix/Phoenix.Endpoint.html#socket/3-websocket-configuration).
24 | #
25 | # See `Phoenix.Token` documentation for examples in
26 | # performing token verification on connect.
27 | @impl true
28 | def connect(_params, socket, _connect_info) do
29 | {:ok, socket}
30 | end
31 |
32 | # Socket IDs are topics that allow you to identify all sockets for a given user:
33 | #
34 | # def id(socket), do: "user_socket:#{socket.assigns.user_id}"
35 | #
36 | # Would allow you to broadcast a "disconnect" event and terminate
37 | # all active sockets and channels for a given user:
38 | #
39 | # Elixir.SyncWeb.Endpoint.broadcast("user_socket:#{user.id}", "disconnect", %{})
40 | #
41 | # Returning `nil` makes this socket anonymous.
42 | @impl true
43 | def id(_socket), do: nil
44 | end
45 |
--------------------------------------------------------------------------------
/lib/sync_web/components/core_components.ex:
--------------------------------------------------------------------------------
1 | defmodule SyncWeb.CoreComponents do
2 | @moduledoc """
3 | Provides core UI components.
4 |
5 | At first glance, this module may seem daunting, but its goal is to provide
6 | core building blocks for your application, such as modals, tables, and
7 | forms. The components consist mostly of markup and are well-documented
8 | with doc strings and declarative assigns. You may customize and style
9 | them in any way you want, based on your application growth and needs.
10 |
11 | The default components use Tailwind CSS, a utility-first CSS framework.
12 | See the [Tailwind CSS documentation](https://tailwindcss.com) to learn
13 | how to customize them or feel free to swap in another framework altogether.
14 |
15 | Icons are provided by [heroicons](https://heroicons.com). See `icon/1` for usage.
16 | """
17 | use Phoenix.Component
18 |
19 | alias Phoenix.LiveView.JS
20 | import SyncWeb.Gettext
21 |
22 | @doc """
23 | Renders a modal.
24 |
25 | ## Examples
26 |
27 | <.modal id="confirm-modal">
28 | This is a modal.
29 |
30 |
31 | JS commands may be passed to the `:on_cancel` to configure
32 | the closing/cancel event, for example:
33 |
34 | <.modal id="confirm" on_cancel={JS.navigate(~p"/posts")}>
35 | This is another modal.
36 |
37 |
38 | """
39 | attr :id, :string, required: true
40 | attr :show, :boolean, default: false
41 | attr :on_cancel, JS, default: %JS{}
42 | slot :inner_block, required: true
43 |
44 | def modal(assigns) do
45 | ~H"""
46 |