├── .gitignore
├── .vscode
└── settings.json
├── LICENSE
├── Makefile
├── README.md
├── flake.lock
├── flake.nix
├── pgwrh.control
├── src
├── common.sql
├── master
│ ├── api-management.sql
│ ├── api-replica.sql
│ ├── deps.txt
│ ├── ext-config-dump.sql
│ ├── helpers.sql
│ ├── implementation-views.sql
│ ├── monitoring.sql
│ ├── publication-sync.sql
│ ├── snapshot.sql
│ ├── tables.sql
│ └── triggers.sql
└── replica
│ ├── api-management.sql
│ ├── daemon.sql
│ ├── deps.txt
│ ├── ext-config-dump.sql
│ ├── fdw.sql
│ ├── helpers.sql
│ ├── status.sql
│ ├── sync.sql
│ └── tables.sql
└── test
├── master.sql
├── requirements.txt
└── test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .build
2 | .work
3 | /pgwrh--*.sql
4 | result
5 | /.idea/
6 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "plpgsqlLanguageServer.database": "postgres",
3 | "plpgsqlLanguageServer.user": "michal",
4 | "plpgsqlLanguageServer.password": "michal",
5 | "plpgsqlLanguageServer.definitionFiles": [
6 | "**/*.psql",
7 | "**/*.pgsql",
8 | "**/*.sql"
9 | ],
10 | "files.associations": {
11 | "*.sql": "postgres"
12 | },
13 | "[makefile]": {
14 | "editor.insertSpaces": false,
15 | "editor.detectIndentation": false
16 | },
17 | "plpgsqlLanguageServer.workspaceValidationTargetFiles": [
18 | "**/*.sql"
19 | ]
20 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published
637 | by the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # pgwrh
2 | # Copyright (C) 2024 Michal Kleczek
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Affero General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with this program. If not, see .
16 |
17 | EXTENSION = pgwrh
18 | EXTVERSION = $(shell grep default_version $(EXTENSION).control | \
19 | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/")
20 | BUILD = .build
21 | DATA = $(BUILD)/pgwrh/$(EXTENSION)--$(EXTVERSION).sql
22 | EXTRA_CLEAN = $(BUILD)
23 |
24 | MASTER = $(shell tsort src/master/deps.txt | sed -e 's/^/src\/master\//' -e 's/$$/\.sql/' | xargs echo)
25 | REPLICA = $(shell tsort src/replica/deps.txt | sed -e 's/^/src\/replica\//' -e 's/$$/\.sql/' | xargs echo)
26 |
27 | PG_CONFIG = pg_config
28 |
29 | ifdef NO_PGXS
30 | # Simple install for systems without pgxs
31 | # RedHat packages pgxs in postgresql-devel
32 | # which has a lot of dependencies (compilers etc.)
33 | # need to make it possible to use make to install
34 | # pgwrh on such systems
35 | EXTDIR := $(shell $(PG_CONFIG) --sharedir)/extension
36 |
37 | clean:
38 | rm -rf $(BUILD)
39 |
40 | install: all
41 | install -c -m 644 ./pgwrh.control $(EXTDIR)
42 | install -c -m 644 $(BUILD)/pgwrh/$(EXTENSION)--$(EXTVERSION).sql $(EXTDIR)
43 |
44 | else # NO_PGXS
45 | # Standard pgxs makefile
46 | PGXS := $(shell $(PG_CONFIG) --pgxs)
47 | include $(PGXS)
48 |
49 | endif # NO_PGXS
50 |
51 | $(BUILD)/pgwrh/$(EXTENSION)--$(EXTVERSION).sql: src/common.sql $(MASTER) $(REPLICA)
52 | cat $^ > $@
53 |
54 | all: prepare $(EXTENSION).control $(BUILD)/pgwrh/$(EXTENSION)--$(EXTVERSION).sql
55 | prepare:
56 | mkdir -p ${BUILD}/pgwrh
57 |
58 | PHONY: all prepare
59 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pgwrh
2 |
3 | An extension implementing sharding for PostgreSQL based on logical replication and postgres_fdw.
4 | The goal is to scale **_read queries_** overcoming main limitation of traditional setups based on streaming replication and hot standbys:
5 | lack of sharding and large storage requirements.
6 |
7 | See [Architecture](https://github.com/mkleczek/pgwrh/wiki/Architecture) for more information on inner workings.
8 |
9 | :warning: **WIP**: readme might be incomplete and contain mistakes in usage instrutions (as the API is still changing)
10 |
11 | # Features
12 |
13 | ## Horizontal Scalability and High Availability
14 | ### No need for rebalancing
15 | Setting up and maintaining a highly available cluster of sharded storage servers is inherently tricky, especially during changes to cluster topology.
16 | Adding a new replica often requires rebalancing (ie. reorganizing data placement among replicas).
17 |
18 | _pgwrh_ minimizes the need to copy data by utilizing _Weighted Randezvous Hashing_ algorithm to distribute shards among replicas.
19 | Adding replicas never requires moving data between existing ones.
20 | ### Data redundancy
21 | _pgwrh_ maintains requested level of redundancy of shard data.
22 |
23 | Administrator can specify:
24 | * the percentage of replicas to host each shard
25 | * the minimum number of copies of any shard (regardless of the percentage setting above)
26 |
27 | So it is possible to implement policies like: _"Shards X, Y, Z should be distributed among 20% of replicas in the cluster, but in no fewer than 2 copies"_.
28 | ### Availability zones
29 | Replicas can be assigned to _availability zones_ and _pgwrh_ ensures shard copies are distributed evenly across all of them.
30 |
31 | ### Zero downtime reconfiguration of cluster topology
32 | Changing cluster topology very often requires lengthy process of data copying and indexing.
33 | Exposing replicas that do not have necessary indexes created imposes a risk of downtimes due to long queries causing exhaustion of connection pools.
34 |
35 | _pgwrh_ makes sure the cluster can operate without disruptions and that not-yet-ready replicas are isolated from query traffic.
36 |
37 | ## Sharding policy flexibility and storage tiering
38 | _pgwrh_ does not dictate how data is split into shards. It is possible to implement _any_ sharding policy by utilizing PostgreSQL partitioning.
39 | _pgwrh_ will distribute _leaves_ of partition hierarchy among replicas.
40 | It is also possible to specify different levels of redundancy for different subtrees of partitioning hierarchy.
41 |
42 | Thanks to this it is possible to have more replicas maintain _hot_ data and have _cold_ data storage requirements minimized.
43 |
44 | ## Ease of deployment and cluster administration
45 |
46 |
47 | ## Pure SQL/PGSQL
48 | This makes it easy to use _pgwrh_ in cloud environments that limit possibilities of custom extension installation.
49 |
50 | ***
51 | _Caveat_ at the moment _pgwrh_ requires _pg_background_ to operate as it needs a way to execute SQL commands
52 | outside current transaction (_CREATE/ALTER SUBSCRIPTION_ must not be executed in transaction).
53 |
54 | ## Based on built-in PostgreSQL facilities - no need for custom query parser/planner
55 | Contrary to other PostgreSQL sharding solutions that implement a query parser and interpreter to direct queries to
56 | the right replicas, _pgwrh_ reuses built-in PostgreSQL features: partitioning and postgres_fdw.
57 |
58 | PostgreSQL query planner and executor - while still somewhat limited - have capabilities to distribute computing among
59 | multiple machines by:
60 | * _pushing down_ filtering and aggregates (see https://www.postgresql.org/docs/current/runtime-config-query.html#GUC-ENABLE-PARTITIONWISE-AGGREGATE)
61 | * skip execution of unnecessary query plan nodes (see https://www.postgresql.org/docs/current/runtime-config-query.html#GUC-ENABLE-PARTITION-PRUNING)
62 |
63 | # Installation
64 |
65 | ## Prerequisites
66 |
67 | | Name | Version |
68 | | :---- | :---: |
69 | | PostgreSQL | 16+ |
70 | | pg_background | 1.2+ |
71 |
72 | ## Extension installation
73 |
74 | Clone the Git repository.
75 | ```sh
76 | git clone https://github.com/mkleczek/pgwrh.git
77 | ```
78 | Install the extension.
79 | ```sh
80 | cd pgwrh
81 | make install
82 | ```
83 | Create extension in PostgreSQL database.
84 | ```sh
85 | psql -c "CREATE EXTENSION pgwrh CASCADE"
86 | ```
87 |
88 | # Usage
89 |
90 | ## On master server
91 |
92 | ### Create your sharded table partitioning hierarchy
93 |
94 | The below example would create a two-level partition hierarchy for `test.my_table`:
95 | * First level by dates in `col3` (split by year)
96 | * Second level by hash on `col2`
97 | ```pgsql
98 | CREATE SCHEMA IF NOT EXISTS test;
99 |
100 | CREATE TABLE test.my_data (col1 text, col2 text, col3 date) PARTITION BY RANGE (col3);
101 | CREATE TABLE test.my_data_2023 PARTITION OF parent FOR VALUES FROM (make_date(2023, 1, 1)) TO (make_date(2024, 1, 1));
102 | CREATE TABLE test.my_data_2024 PARTITION OF parent FOR VALUES FROM (make_date(2024, 1, 1)) TO (make_date(2025, 1, 1));
103 | CREATE TABLE test.my_data_2025 PARTITION OF parent FOR VALUES FROM (make_date(2025, 1, 1)) TO (make_date(2026, 1, 1));
104 |
105 | CREATE SCHEMA IF NOT EXISTS test_shards;
106 | DO$$
107 | DECLARE
108 | r record;
109 | BEGIN
110 | FOR r IN
111 | SELECT
112 | format('CREATE TABLE test_shards.my_data_%1$s_%2$s PARTITION OF test.my_data_%1$s (PRIMARY KEY (col1)) FOR VALUES WITH (MODULUS 16, REMAINDER %2$s)', year, rem) stmt
113 | FROM generate_series(2023, 2025) year, generate_series(0, 15) rem
114 | LOOP
115 | EXECUTE r.stmt;
116 | END LOOP;
117 | END$$;
118 | ```
119 |
120 | That gives 48 (16 * 3) shards in total.
121 |
122 | **Note** that there are no specific requirements for the partitioning hierarchy and any partitioned table can be sharded - the above is only for illustration purposes.
123 |
124 | ### Create a replica cluster
125 |
126 | Example:
127 | ```pgsql
128 | SELECT pgwrh.create_replica_cluster('c01');
129 | ```
130 |
131 | ### Configure roles and user accounts for replicas
132 |
133 | (Optional) Create a role for you cluster replicas and grant rights to SELECT from shards.
134 | ```pgsql
135 | CREATE ROLE c01_replica;
136 |
137 | GRANT SELECT ON ALL TABLES IN SCHEMA test_shards TO c01_replica;
138 | ```
139 |
140 | Create account for each replica.
141 | ```pgsql
142 | CREATE USER c01r01 PASSWORD 'c01r01Password' REPLICATION IN ROLE c01_replica;
143 | ```
144 |
145 | ## On every replica
146 |
147 | Make sure `pgwrh` extension is installed.
148 |
149 | ### Configure connection to master server
150 |
151 | Call `configure_controller` function providing username and password of this replica account created on master.
152 | ```pgsql
153 | SELECT configure_controller(
154 | host => 'master.myorg',
155 | port => '5432',
156 | username => 'cr01r01', -- same as above
157 | password => 'c01r01Password' -- same as above
158 | );
159 | ```
160 |
161 | ## Create and deploy replica cluster configuration
162 |
163 | ### Specify what tables to replicate
164 |
165 | Example below would configure distribution of every partition of `test.my_data` to half (50%) of replicas,
166 | except partitions of `test.my_data_2024` which will be copied to all (100%) replicas.
167 | ```pgsql
168 | WITH st(schema_name, table_name, replication_factory) AS (
169 | VALUES
170 | ('test', 'my_data', 50),
171 | ('test', 'my_data_2024', 100)
172 | )
173 | INSERT INTO pgwrh.sharded_table (replication_group_id, sharded_table_schema, sharded_table_name, replication_factor)
174 | SELECT
175 | 'c01', schema_name, table_name, replication_factor
176 | FROM
177 | st;
178 | ```
179 |
180 | ### Configure replicas
181 | Add replica to configuration:
182 | ```pgsql
183 | SELECT pgwrh.add_replica('c01', 'c01r01', 'replica01.cluster01.myorg', 5432);
184 | ```
185 |
186 | ### Start deployment
187 | ```pgsql
188 | SELECT pgwrh.start_rollout('c01');
189 | ```
190 |
191 | New configuration is now visible to connected replicas which will start data replication.
192 |
193 | ### Commit configuration
194 | Once all replicas confirmed configuration changes, execute:
195 | ```pgsql
196 | SELECT pgwrh.commit_rollout('c01');
197 | ```
198 | (this will fail if some replicas are not reconfigured yet)
199 |
200 | ### Add more replicas
201 | ```pgsql
202 | CREATE USER c01r02 PASSWORD 'c01r02Password' REPLICATION IN ROLE c01_replica;
203 | CREATE USER c01r03 PASSWORD 'c01r03Password' REPLICATION IN ROLE c01_replica;
204 | CREATE USER c01r04 PASSWORD 'c01r04Password' REPLICATION IN ROLE c01_replica;
205 |
206 | select pgwrh.add_replica(
207 | _replication_group_id := 'c01',
208 | _host_id := 'c01r02',
209 | _host_name := 'replica02.cluster01.myorg',
210 | _port := 5432);
211 | select pgwrh.add_replica(
212 | _replication_group_id := 'c01',
213 | _host_id := 'c01r03',
214 | _host_name := 'replica03.cluster01.myorg',
215 | _port := 5432,
216 | _weight := 70);
217 | select pgwrh.add_replica(
218 | _replication_group_id := 'c01',
219 | _host_id := 'c01r04',
220 | _host_name := 'replica04.cluster01.myorg',
221 | _port := 5432);
222 | ```
223 | It is possible to adjust the number of shards assigned to replicas by setting replica weight:
224 | ```pgsql
225 | SELECT pgwrh.set_replica_weight('c01', 'c01r04', 200);
226 | ```
227 |
228 | To deploy new configuration:
229 | ```pgsql
230 | SELECT pgwrh.start_rollout('c01');
231 | ```
232 | And then:
233 | ```pgsql
234 | SELECT pgwrh.commit_rollout('c01');
235 | ```
236 |
--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
1 | {
2 | "nodes": {
3 | "flakelight": {
4 | "inputs": {
5 | "nixpkgs": "nixpkgs"
6 | },
7 | "locked": {
8 | "lastModified": 1738586370,
9 | "narHash": "sha256-oNDm2sfLm9jdfOskRq2ABn85gwXusbsHEOC181peno4=",
10 | "owner": "nix-community",
11 | "repo": "flakelight",
12 | "rev": "d05bcabfc1efb84a7d8689de6e50b84d7f23b427",
13 | "type": "github"
14 | },
15 | "original": {
16 | "owner": "nix-community",
17 | "repo": "flakelight",
18 | "type": "github"
19 | }
20 | },
21 | "nixpkgs": {
22 | "locked": {
23 | "lastModified": 1738410390,
24 | "narHash": "sha256-xvTo0Aw0+veek7hvEVLzErmJyQkEcRk6PSR4zsRQFEc=",
25 | "owner": "NixOS",
26 | "repo": "nixpkgs",
27 | "rev": "3a228057f5b619feb3186e986dbe76278d707b6e",
28 | "type": "github"
29 | },
30 | "original": {
31 | "owner": "NixOS",
32 | "ref": "nixos-unstable",
33 | "repo": "nixpkgs",
34 | "type": "github"
35 | }
36 | },
37 | "nixpkgs_2": {
38 | "locked": {
39 | "lastModified": 1738961098,
40 | "narHash": "sha256-yWNBf6VDW38tl179FEuJ0qukthVfB02kv+mRsfUsWC0=",
41 | "owner": "nixos",
42 | "repo": "nixpkgs",
43 | "rev": "a3eaf5e8eca7cab680b964138fb79073704aca75",
44 | "type": "github"
45 | },
46 | "original": {
47 | "owner": "nixos",
48 | "ref": "nixos-unstable",
49 | "repo": "nixpkgs",
50 | "type": "github"
51 | }
52 | },
53 | "root": {
54 | "inputs": {
55 | "flakelight": "flakelight",
56 | "nixpkgs": "nixpkgs_2"
57 | }
58 | }
59 | },
60 | "root": "root",
61 | "version": 7
62 | }
63 |
--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
1 | {
2 | description = "Simple flake to set up env";
3 |
4 | inputs = {
5 | nixpkgs.url = "github:nixos/nixpkgs?ref=nixos-unstable";
6 | flakelight.url = "github:nix-community/flakelight";
7 | };
8 |
9 | outputs = { flakelight, nixpkgs, ... }:
10 | flakelight ./. ({lib, ...}: {
11 | inputs.nixpkgs = nixpkgs;
12 | systems = lib.systems.flakeExposed;
13 | package = { stdenv, defaultMeta, pkgs }:
14 | stdenv.mkDerivation {
15 | pname = "pgwrh";
16 | version = "0.2.0";
17 | src = ./.;
18 | buildInputs = [ pkgs.coreutils pkgs.postgresql ];
19 | buildPhase = ''
20 | USEPGXS=1 make DESTDIR=$out all
21 | '';
22 | meta = defaultMeta;
23 | };
24 |
25 | devShell.packages = pkgs: with pkgs; [ coreutils postgresql ];
26 | });
27 | }
28 |
--------------------------------------------------------------------------------
/pgwrh.control:
--------------------------------------------------------------------------------
1 | # pgwrh
2 | # Copyright (C) 2024 Michal Kleczek
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Affero General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with this program. If not, see .
16 |
17 | relocatable = false
18 | default_version = '0.2.1'
19 |
20 | schema = pgwrh
21 | requires = 'postgres_fdw,pg_background'
22 |
--------------------------------------------------------------------------------
/src/common.sql:
--------------------------------------------------------------------------------
1 | -- name: common
2 |
3 | -- pgwrh
4 | -- Copyright (C) 2024 Michal Kleczek
5 |
6 | -- This program is free software: you can redistribute it and/or modify
7 | -- it under the terms of the GNU Affero General Public License as published by
8 | -- the Free Software Foundation, either version 3 of the License, or
9 | -- (at your option) any later version.
10 |
11 | -- This program is distributed in the hope that it will be useful,
12 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | -- GNU Affero General Public License for more details.
15 |
16 | -- You should have received a copy of the GNU Affero General Public License
17 | -- along with this program. If not, see .
18 |
19 | \echo Use "CREATE EXTENSION pgwrh CASCADE" to load this file. \quit
20 |
21 | GRANT USAGE ON SCHEMA "@extschema@" TO PUBLIC;
22 |
23 | CREATE FUNCTION pgwrh_replica_role_name() RETURNS text IMMUTABLE LANGUAGE sql AS
24 | $$
25 | SELECT format('pgwrh_replica_%s', current_database());
26 | $$;
27 | CREATE FUNCTION exec_dynamic(cmd text) RETURNS void LANGUAGE plpgsql AS
28 | $$
29 | BEGIN
30 | EXECUTE cmd;
31 | END;
32 | $$;
33 |
34 | DO
35 | $$
36 | DECLARE
37 | r record;
38 | BEGIN
39 | FOR r IN SELECT format('CREATE ROLE %I', pgwrh_replica_role_name()) AS stmt WHERE NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = pgwrh_replica_role_name()) LOOP
40 | EXECUTE r.stmt;
41 | END LOOP;
42 | END
43 | $$;
44 |
45 | CREATE OR REPLACE FUNCTION add_ext_dependency(_classid regclass, _objid oid) RETURNS void LANGUAGE sql AS
46 | $$
47 | INSERT INTO pg_depend (classid, objid, refclassid, refobjid, deptype, objsubid, refobjsubid)
48 | SELECT _classid, _objid, 'pg_extension'::regclass, e.oid, 'n', 0 ,0
49 | FROM pg_extension e WHERE e.extname = 'pgwrh'
50 | $$;
51 |
52 | CREATE OR REPLACE FUNCTION select_add_ext_dependency(_classid regclass, oidexpr text) RETURNS text LANGUAGE sql AS
53 | $$SELECT format('SELECT "@extschema@".add_ext_dependency(%L, %s)', _classid, oidexpr)$$;
54 |
55 | CREATE OR REPLACE FUNCTION select_add_ext_dependency(_classid regclass, name_attr text, name text) RETURNS text LANGUAGE sql AS
56 | $$SELECT format('SELECT "@extschema@".add_ext_dependency(%1$L, (SELECT oid FROM %1$s WHERE %I = %L))', _classid, name_attr, name)$$;
57 |
58 | CREATE OR REPLACE FUNCTION is_dependent_object(_classid regclass, _objid oid) RETURNS boolean STABLE LANGUAGE sql AS
59 | $$
60 | SELECT EXISTS (SELECT 1 FROM
61 | pg_depend
62 | JOIN pg_extension e ON refclassid = 'pg_extension'::regclass AND refobjid = e.oid
63 | WHERE
64 | e.extname = 'pgwrh'
65 | AND
66 | classid = _classid
67 | AND
68 | objid = _objid
69 | )
70 | $$;
71 |
72 | CREATE VIEW owned_obj AS
73 | SELECT
74 | classid,
75 | objid
76 | FROM
77 | pg_depend d JOIN pg_extension e ON
78 | refclassid = 'pg_extension'::regclass
79 | AND refobjid = e.oid
80 | WHERE
81 | d.deptype = 'n'
82 | AND e.extname = 'pgwrh'
83 | ;
84 |
85 | CREATE VIEW owned_server AS
86 | SELECT
87 | s.*
88 | FROM
89 | pg_foreign_server s JOIN owned_obj ON
90 | classid = 'pg_foreign_server'::regclass
91 | AND objid = s.oid
92 | ;
--------------------------------------------------------------------------------
/src/master/api-management.sql:
--------------------------------------------------------------------------------
1 | -- name: master-api-management
2 | -- requires: master-tables
3 |
4 | -- pgwrh
5 | -- Copyright (C) 2024 Michal Kleczek
6 |
7 | -- This program is free software: you can redistribute it and/or modify
8 | -- it under the terms of the GNU Affero General Public License as published by
9 | -- the Free Software Foundation, either version 3 of the License, or
10 | -- (at your option) any later version.
11 |
12 | -- This program is distributed in the hope that it will be useful,
13 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | -- GNU Affero General Public License for more details.
16 |
17 | -- You should have received a copy of the GNU Affero General Public License
18 | -- along with this program. If not, see .
19 |
20 | CREATE FUNCTION start_rollout(
21 | _replication_group_id text)
22 | RETURNS void
23 | SET SEARCH_PATH FROM CURRENT
24 | LANGUAGE sql
25 | AS
26 | $$
27 | INSERT INTO replication_group_config_lock (replication_group_id, version)
28 | SELECT
29 | replication_group_id, version
30 | FROM
31 | replication_group_config
32 | JOIN replication_group USING (replication_group_id)
33 | WHERE
34 | replication_group_id = $1
35 | AND current_version = target_version AND version <> current_version
36 | ON CONFLICT DO NOTHING;
37 | UPDATE replication_group g
38 | SET target_version = l.version
39 | FROM replication_group_config_lock l
40 | WHERE
41 | g.replication_group_id = l.replication_group_id
42 | AND l.version = next_version(current_version)
43 | AND g.replication_group_id = $1;
44 | $$;
45 | COMMENT ON FUNCTION start_rollout(_replication_group_id text) IS
46 | $$
47 | Starts rollout of group's next configuration version.
48 |
49 | The new version is locked and marked as target version in replication_group record.
50 | If there is no new configuration version the function is a noop.
51 |
52 | # Parameters
53 | ## _replication_group_id
54 | Identifier of the replication group to start rollout.
55 | $$;
56 |
57 | CREATE FUNCTION create_replica_cluster(
58 | _replication_group_id text)
59 | RETURNS void
60 | SET SEARCH_PATH FROM CURRENT
61 | LANGUAGE sql
62 | AS
63 | $$
64 | INSERT INTO replication_group (replication_group_id)
65 | VALUES ($1);
66 | $$;
67 | COMMENT ON FUNCTION create_replica_cluster(_replication_group_id text) IS
68 | $$
69 | Creates new replica cluster.
70 | $$;
71 |
72 | CREATE FUNCTION add_replica(
73 | _replication_group_id text,
74 | _replica_id text,
75 | _host_name text,
76 | _port int,
77 | _member_role regrole DEFAULT NULL,
78 | _availability_zone text DEFAULT 'default',
79 | _weight int DEFAULT 100)
80 | RETURNS void
81 | SET SEARCH_PATH FROM CURRENT
82 | LANGUAGE sql
83 | AS
84 | $$
85 | WITH m AS (
86 | INSERT INTO replication_group_member (replication_group_id, host_id, member_role, availability_zone)
87 | VALUES (_replication_group_id, _replica_id, coalesce(_member_role::text, _replica_id::regrole::text), _availability_zone)
88 | ),
89 | h AS (
90 | INSERT INTO shard_host (replication_group_id, availability_zone, host_id, host_name, port)
91 | VALUES (_replication_group_id, _availability_zone, _replica_id, _host_name, _port)
92 | )
93 | INSERT INTO shard_host_weight (replication_group_id, availability_zone, host_id, weight)
94 | VALUES (_replication_group_id, _availability_zone, _replica_id, _weight)
95 | $$;
96 | COMMENT ON FUNCTION add_replica(_replication_group_id text, _host_id text, _host_name text, _port int, _member_role regrole, _availability_zone text, _weight int) IS
97 | $$
98 | Adds new replica to a cluster.
99 | $$;
100 |
101 | CREATE FUNCTION set_replica_weight(
102 | _replication_group_id text,
103 | _availability_zone text,
104 | _replica_id text,
105 | _weight int)
106 | RETURNS void
107 | SET SEARCH_PATH FROM CURRENT
108 | LANGUAGE sql
109 | AS
110 | $$
111 | INSERT INTO shard_host_weight (replication_group_id, availability_zone, host_id, weight)
112 | VALUES (_replication_group_id, _availability_zone, _replica_id, _weight)
113 | ON CONFLICT (replication_group_id, availability_zone, host_id, version)
114 | DO UPDATE SET weight = EXCLUDED.weight;
115 | $$;
116 |
117 | CREATE OR REPLACE FUNCTION commit_rollout(
118 | group_id text, keep_old_config boolean DEFAULT false)
119 | RETURNS void
120 | SET SEARCH_PATH FROM CURRENT
121 | LANGUAGE plpgsql
122 | AS
123 | $$
124 | BEGIN
125 | UPDATE replication_group g
126 | SET current_version = target_version
127 | WHERE
128 | replication_group_id = group_id;
129 | DELETE FROM replication_group_config cfg
130 | USING replication_group g
131 | WHERE
132 | g.replication_group_id = cfg.replication_group_id
133 | AND g.replication_group_id = group_id
134 | AND cfg.version <> g.current_version
135 | AND NOT keep_old_config;
136 | END
137 | $$;
138 | COMMENT ON FUNCTION commit_rollout(group_id text, keep_old_config boolean) IS
139 | $$
140 | Marks the version being rolled out as current.
141 | If any of the replicas did not report all remote and local shards as ready error is raised.
142 |
143 | # WARNING
144 | This is destructive operation. During rollout replicas maintain shards from both versions.
145 | After marking new version as current they will delete no longer needed shards.
146 | $$;
147 |
148 | CREATE FUNCTION rollback_rollout(_replication_group_id text, unlock boolean DEFAULT TRUE)
149 | RETURNS void
150 | SET SEARCH_PATH FROM CURRENT
151 | LANGUAGE sql
152 | AS
153 | $$
154 | UPDATE replication_group
155 | SET target_version = current_version
156 | WHERE replication_group_id = _replication_group_id;
157 | DELETE FROM replication_group_config_lock l
158 | USING replication_group g
159 | WHERE
160 | g.replication_group_id = _replication_group_id
161 | AND l.replication_group_id = g.replication_group_id
162 | AND l.version <> g.current_version
163 | AND unlock;
164 | $$;
165 | COMMENT ON FUNCTION rollback_rollout(_replication_group_id text, unlock boolean) IS
166 | $$
167 | Rolls back any changes that are effects of roll out of new configuration version.
168 | Unlocks configuration version being rolled out.
169 | $$;
--------------------------------------------------------------------------------
/src/master/api-replica.sql:
--------------------------------------------------------------------------------
1 | -- name: api-replica
2 | -- requires: master-implementation-views
3 |
4 | -- pgwrh
5 | -- Copyright (C) 2024 Michal Kleczek
6 |
7 | -- This program is free software: you can redistribute it and/or modify
8 | -- it under the terms of the GNU Affero General Public License as published by
9 | -- the Free Software Foundation, either version 3 of the License, or
10 | -- (at your option) any later version.
11 |
12 | -- This program is distributed in the hope that it will be useful,
13 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | -- GNU Affero General Public License for more details.
16 |
17 | -- You should have received a copy of the GNU Affero General Public License
18 | -- along with this program. If not, see .
19 |
20 | CREATE OR REPLACE VIEW shard_structure AS
21 | WITH stc AS (
22 | SELECT
23 | st.replication_group_id,
24 | c.oid::regclass
25 | FROM
26 | pg_class c
27 | JOIN pg_namespace n ON relnamespace = n.oid
28 | JOIN sharded_table st ON (nspname, relname) = (sharded_table_schema, sharded_table_name)
29 | ),
30 | roots AS (
31 | SELECT *
32 | FROM stc r
33 | WHERE NOT EXISTS (SELECT 1 FROM stc WHERE replication_group_id = r.replication_group_id AND oid <> r.oid AND oid = ANY (SELECT * FROM pg_partition_ancestors(r.oid)))
34 | )
35 | SELECT
36 | n.nspname AS schema_name,
37 | c.relname AS table_name,
38 | level,
39 | format('CREATE TABLE IF NOT EXISTS %I.%I %s%s',
40 | n.nspname, c.relname,
41 | CASE WHEN level = 0
42 | -- root of the partition tree - need to define attributes
43 | THEN
44 | '(' ||
45 | (
46 | SELECT string_agg(format('%I %s', attname, atttypid::regtype), ',')
47 | FROM pg_attribute WHERE attrelid = t.relid AND attnum >= 1
48 | ) ||
49 | coalesce(
50 | ', ' || (SELECT string_agg(pg_get_constraintdef(c.oid), ', ') FROM pg_constraint c WHERE conrelid = t.relid AND conislocal),
51 | ''
52 | ) ||
53 | ')'
54 | -- partition - no attributes necessary
55 | ELSE
56 | format('PARTITION OF %I.%I%s %s',
57 | pn.nspname, p.relname,
58 | coalesce(
59 | ' (' || (SELECT string_agg(pg_get_constraintdef(c.oid), ', ') FROM pg_constraint c WHERE conrelid = t.relid AND conislocal) || ')',
60 | ''
61 | ),
62 | pg_get_expr(c.relpartbound, c.oid))
63 | END,
64 | CASE WHEN t.isleaf
65 | THEN
66 | ''
67 | ELSE
68 | ' PARTITION BY ' || pg_get_partkeydef(t.relid)
69 | END
70 | ) AS create_table
71 | FROM
72 | roots r
73 | JOIN replication_group_member m USING (replication_group_id)
74 | JOIN replication_group USING (replication_group_id),
75 | pg_partition_tree(oid) t
76 | JOIN pg_class c ON t.relid = c.oid JOIN pg_namespace n ON c.relnamespace = n.oid
77 | LEFT JOIN pg_class p ON t.parentrelid = p.oid LEFT JOIN pg_namespace pn ON p.relnamespace = pn.oid
78 | WHERE
79 | (
80 | c.relkind = 'p'
81 | OR
82 | c.relkind = 'r'
83 | AND
84 | EXISTS (SELECT 1 FROM
85 | shard_assigned_host
86 | WHERE
87 | replication_group_id = m.replication_group_id
88 | AND
89 | schema_name = n.nspname AND table_name = c.relname
90 | AND
91 | version IN (current_version, target_version)
92 | )
93 | )
94 | AND
95 | member_role = CURRENT_ROLE;
96 |
97 | GRANT SELECT ON shard_structure TO PUBLIC;
98 |
99 |
100 | CREATE OR REPLACE VIEW shard_assignment AS
101 | SELECT
102 | schema_name,
103 | table_name,
104 | local,
105 | shard_server_name,
106 | host,
107 | port,
108 | dbname,
109 | shard_server_user,
110 | pubname,
111 | connect_remote,
112 | retained_shard_server_name
113 | FROM
114 | shard_assignment_per_member
115 | WHERE
116 | member_role = CURRENT_ROLE
117 | ;
118 | GRANT SELECT ON shard_assignment TO PUBLIC;
119 |
120 | COMMENT ON VIEW shard_assignment IS
121 | 'Main view implementing shard assignment logic.
122 |
123 | Presents a particular replication_group_member (as identified by member_role) view of the cluster (replicaton_group).
124 | Each member sees all shards with the following information for each shard:
125 | * "local" flag saying if this shard should be replicated to this member
126 | * information on how to connect to remote replicas for this shard: host, port, dbname, user, password';
127 |
128 | CREATE OR REPLACE VIEW shard_index AS
129 | SELECT
130 | schema_name,
131 | table_name,
132 | index_name,
133 | index_template,
134 | optional
135 | FROM
136 | shard_index_per_member
137 | WHERE
138 | member_role = CURRENT_ROLE
139 | ;
140 | GRANT SELECT ON shard_index TO PUBLIC;
141 |
142 | CREATE VIEW replica_state AS
143 | SELECT
144 | subscribed_local_shards,
145 | indexes,
146 | connected_local_shards,
147 | connected_remote_shards,
148 | users
149 | FROM replication_group_member
150 | WHERE
151 | member_role = CURRENT_ROLE
152 | ;
153 |
154 | -- CREATE FUNCTION update_replica_state() RETURNS trigger LANGUAGE plpgsql AS
155 | -- $$
156 | -- BEGIN
157 | -- INSERT INTO replica_state_per_member (member_role, subscribed_local_shards, indexes, connected_local_shards, connected_remote_shards)
158 | -- VALUES (CURRENT_ROLE, NEW.subscribed_local_shards, NEW.indexes, NEW.connected_local_shards, NEW.connected_remote_shards)
159 | -- ON CONFLICT (member_role) DO UPDATE SET
160 | -- subscribed_local_shards = REJECTED.subscribed_local_shards,
161 | -- indexes = REJECTED.indexes,
162 | -- connected_local_shards = REJECTED.connected_local_shards,
163 | -- connected_remote_shards = REJECTED.connected_remote_shards;
164 | -- RETURN NEW;
165 | -- END
166 | -- $$;
167 | -- CREATE TRIGGER update_replica_state_trigger INSTEAD OF INSERT OR UPDATE ON replica_state FOR EACH ROW EXECUTE FUNCTION update_replica_state();
168 | GRANT SELECT, INSERT, UPDATE ON replica_state TO PUBLIC;
169 |
170 | CREATE VIEW credentials AS
171 | SELECT
172 | creds.username,
173 | creds.password
174 | FROM
175 | replication_group_member
176 | JOIN replication_group_credentials creds USING (replication_group_id)
177 | WHERE
178 | member_role = CURRENT_ROLE;
179 | GRANT SELECT ON credentials TO PUBLIC;
180 |
--------------------------------------------------------------------------------
/src/master/deps.txt:
--------------------------------------------------------------------------------
1 | implementation-views api-replica
2 | tables ext-config-dump
3 | tables helpers
4 | helpers implementation-views
5 | helpers publication-sync
6 | publication-sync triggers
7 | snapshot triggers
8 | tables api-management
9 | implementation-views snapshot
10 | tables monitoring
11 |
--------------------------------------------------------------------------------
/src/master/ext-config-dump.sql:
--------------------------------------------------------------------------------
1 | -- name: ext-config-dump
2 | -- requires: tables
3 |
4 | SELECT pg_catalog.pg_extension_config_dump('replication_group_config_lock', '');
5 | SELECT pg_catalog.pg_extension_config_dump('replication_group_config_clone', '');
6 | SELECT pg_catalog.pg_extension_config_dump('replication_group_config', '');
7 | SELECT pg_catalog.pg_extension_config_dump('replication_group', '');
8 | SELECT pg_catalog.pg_extension_config_dump('replication_group_member', '');
9 | SELECT pg_catalog.pg_extension_config_dump('shard_host', '');
10 | SELECT pg_catalog.pg_extension_config_dump('shard_host_weight', '');
11 | SELECT pg_catalog.pg_extension_config_dump('sharded_table', '');
12 | SELECT pg_catalog.pg_extension_config_dump('shard_index_template', '');
13 | SELECT pg_catalog.pg_extension_config_dump('shard', '');
14 | SELECT pg_catalog.pg_extension_config_dump('shard_assigned_host', '');
15 | SELECT pg_catalog.pg_extension_config_dump('shard_assigned_index', '');
16 |
--------------------------------------------------------------------------------
/src/master/helpers.sql:
--------------------------------------------------------------------------------
1 | -- name: master-helpers
2 | -- requires: tables
3 |
4 | CREATE FUNCTION pubname(schema_name text, table_name text) RETURNS text IMMUTABLE LANGUAGE sql AS
5 | $$
6 | SELECT 'pgwrh_' || md5(schema_name || table_name);
7 | $$;
8 | GRANT EXECUTE ON FUNCTION pubname(schema_name text, table_name text) TO PUBLIC;
9 |
10 | CREATE FUNCTION usernamegen(replication_group_id text, version config_version, seed uuid)
11 | RETURNS text
12 | IMMUTABLE
13 | LANGUAGE sql
14 | AS
15 | $$
16 | SELECT 'pgwrh_' || current_database() || '_' || replication_group_id || '_' || right(md5(version || seed::text), 5);
17 | $$;
18 | GRANT EXECUTE ON FUNCTION usernamegen(replication_group_id text, version config_version, seed uuid) TO PUBLIC;
19 | CREATE FUNCTION passgen(replication_group_id text, version config_version, seed uuid)
20 | RETURNS text
21 | IMMUTABLE
22 | LANGUAGE sql
23 | AS
24 | $$
25 | SELECT encode(sha256(convert_to(replication_group_id || version || seed::text, 'UTF8')), 'hex');
26 | $$;
27 | GRANT EXECUTE ON FUNCTION passgen(replication_group_id text, version config_version, seed uuid) TO PUBLIC;
28 |
29 | CREATE OR REPLACE FUNCTION next_version(version config_version) RETURNS config_version
30 | IMMUTABLE
31 | LANGUAGE sql AS
32 | $$
33 | SELECT CASE version WHEN 'FLIP' THEN 'FLOP' ELSE 'FLIP' END::"@extschema@".config_version
34 | $$;
35 |
36 |
37 | CREATE OR REPLACE FUNCTION prev_version(version config_version) RETURNS config_version
38 | IMMUTABLE
39 | LANGUAGE sql AS
40 | $$
41 | SELECT "@extschema@".next_version(version)
42 | $$;
43 |
44 | CREATE OR REPLACE FUNCTION is_locked(group_id text, version config_version) RETURNS boolean
45 | LANGUAGE sql STABLE AS
46 | $$
47 | SELECT EXISTS (SELECT 1 FROM
48 | "@extschema@".replication_group_config_lock l
49 | WHERE replication_group_id = $1 AND l.version = $2
50 | )
51 | $$;
52 |
53 |
54 | CREATE OR REPLACE FUNCTION next_pending_version(group_id text) RETURNS config_version
55 | LANGUAGE sql AS
56 | $$
57 | INSERT INTO "@extschema@".replication_group_config
58 | SELECT
59 | replication_group_id, "@extschema@".next_version(current_version)
60 | FROM
61 | "@extschema@".replication_group
62 | WHERE
63 | replication_group_id = group_id
64 | ON CONFLICT DO NOTHING;
65 |
66 | SELECT
67 | "@extschema@".next_version(current_version)
68 | FROM
69 | "@extschema@".replication_group
70 | WHERE
71 | replication_group_id = group_id
72 | $$;
73 | COMMENT ON FUNCTION next_pending_version(group_id text) IS
74 | 'Inserts next pending version into replication_group_config and returns it.';
75 |
76 |
77 | CREATE OR REPLACE FUNCTION stable_hash(VARIADIC text[]) RETURNS int IMMUTABLE LANGUAGE sql AS
78 | $$
79 | SELECT ('x' || substr(md5(array_to_string($1, '', '')), 1, 8))::bit(32)::int
80 | $$;
81 |
82 | CREATE OR REPLACE FUNCTION score(weight int, VARIADIC text[]) RETURNS double precision IMMUTABLE LANGUAGE sql AS
83 | $$
84 | SELECT weight / -ln("@extschema@".stable_hash(VARIADIC $2)::double precision / ((2147483649)::bigint - (-2147483648)::bigint) + 0.5::double precision)
85 | $$;
86 |
87 | CREATE OR REPLACE FUNCTION extract_sharding_key_value(schema_name text, table_name text, sharding_key_expression text) RETURNS text IMMUTABLE LANGUAGE plpgsql AS
88 | $$
89 | DECLARE
90 | result text;
91 | BEGIN
92 | EXECUTE sharding_key_expression INTO result USING schema_name, table_name;
93 | RETURN result;
94 | END
95 | $$;
96 |
97 | CREATE OR REPLACE FUNCTION to_regclass(st sharded_table) RETURNS regclass STABLE LANGUAGE sql AS
98 | $$
99 | SELECT to_regclass(st.sharded_table_schema || '.' || st.sharded_table_name)
100 | $$;
101 |
--------------------------------------------------------------------------------
/src/master/implementation-views.sql:
--------------------------------------------------------------------------------
1 | -- name: master-implementation-views
2 | -- requires: core
3 |
4 | CREATE VIEW shard_index_definition AS
5 | SELECT
6 | replication_group_id,
7 | version,
8 | schema_name,
9 | table_name,
10 | table_name
11 | || '_'
12 | || index_template_name
13 | || '_'
14 | || substr(md5(index_template_schema || index_template_table_name || index_template), 1, 5) AS index_name,
15 | index_template
16 | FROM
17 | shard_assigned_index
18 | JOIN shard_index_template USING (replication_group_id, version, index_template_schema, index_template_table_name, index_template_name)
19 | ;
20 |
21 | CREATE OR REPLACE VIEW shard_index_per_member AS
22 | WITH shard_class_index AS (
23 | SELECT
24 | replication_group_id,
25 | schema_name,
26 | table_name,
27 | index_name,
28 | index_template,
29 | bool_or(version = current_version) is_current,
30 | bool_or(version = target_version AND current_version <> target_version) AS is_target
31 | FROM
32 | shard_index_definition
33 | JOIN replication_group USING (replication_group_id)
34 | GROUP BY
35 | 1, 2, 3, 4, 5
36 | ),
37 | member_shard AS (
38 | SELECT
39 | replication_group_id,
40 | member_role,
41 | schema_name,
42 | table_name,
43 | bool_or(version = current_version) is_current,
44 | bool_or(version = target_version AND current_version <> target_version) is_target
45 | FROM
46 | shard_assigned_host
47 | JOIN replication_group USING (replication_group_id)
48 | JOIN replication_group_member USING (replication_group_id, availability_zone, host_id)
49 | GROUP BY
50 | 1, 2, 3, 4
51 | ),
52 | member_shard_index AS (
53 | SELECT
54 | replication_group_id,
55 | member_role,
56 | schema_name,
57 | table_name,
58 | index_name,
59 | index_template,
60 | s.is_current AS optional
61 | FROM
62 | member_shard s
63 | JOIN shard_class_index i USING (replication_group_id, schema_name, table_name)
64 | WHERE
65 | s.is_current AND i.is_current
66 | OR
67 | s.is_target AND i.is_target
68 | )
69 | SELECT
70 | replication_group_id,
71 | member_role,
72 | schema_name,
73 | table_name,
74 | index_name,
75 | index_template,
76 | optional
77 | FROM
78 | member_shard_index
79 | ;
80 | COMMENT ON VIEW shard_index_per_member IS
81 | 'Provides definitions of indexes that should be created for each shard.';
82 |
83 | CREATE VIEW replication_group_credentials AS
84 | SELECT
85 | replication_group_id,
86 | version,
87 | usernamegen(replication_group_id, version, seed) AS username,
88 | passgen(replication_group_id, version, seed) AS password
89 | FROM
90 | replication_group_config_lock
91 | ;
92 |
93 | CREATE OR REPLACE VIEW shard_assignment_per_member AS
94 | SELECT
95 | replication_group_id,
96 | availability_zone,
97 | host_id,
98 | member_role,
99 | schema_name,
100 | table_name,
101 | local,
102 | -- foreign server hosting shard
103 | -- use target configuration server only when transitioning and all remote replicas subscribed to the shard (ie. we can run ANALYZE)
104 | CASE WHEN current_version <> target_version AND target_subscribed AND target_online AND target_user_created
105 | THEN target_server_name
106 | ELSE current_server_name
107 | END AS shard_server_name,
108 | CASE WHEN current_version <> target_version AND target_subscribed AND target_online AND target_user_created
109 | THEN target_host
110 | ELSE coalesce(current_host, '')
111 | END AS host,
112 | CASE WHEN current_version <> target_version AND target_subscribed AND target_online AND target_user_created
113 | THEN target_port
114 | ELSE coalesce(current_port, '')
115 | END AS port,
116 | current_database() AS dbname,
117 | CASE WHEN current_version <> target_version AND target_subscribed AND target_online AND target_user_created
118 | THEN target_credentials.username
119 | ELSE current_username
120 | END AS shard_server_user,
121 | -- If shard is remote in target version, and it is ready, connect it to slot instead of the local one
122 | -- (but keep the local one if it is still be marked as "local" above)
123 | CASE WHEN current_version <> target_version
124 | THEN target_remote AND target_subscribed AND target_online AND target_user_created
125 | ELSE NOT local
126 | END AS connect_remote,
127 | pubname(schema_name, table_name) AS pubname,
128 | current_server_name AS retained_shard_server_name, -- do not drop foreign tables with this server name (to keep current tables during transition)
129 | --local AND hosted_shard_subscribed_confirmation IS NULL AS subscription_confirmation_required -- whether confirmation from this member is required
130 | m AS replication_group_member
131 | FROM
132 | replication_group_member m
133 | JOIN replication_group g USING (replication_group_id)
134 | JOIN replication_group_credentials current_credentials USING (replication_group_id)
135 | JOIN replication_group_credentials target_credentials USING (replication_group_id)
136 | CROSS JOIN LATERAL (
137 | SELECT
138 | schema_name,
139 | table_name,
140 | -- is m among assigned hosts regardless of version
141 | -- every host has to retain shards from both current and target version
142 | bool_or(member_role = m.member_role) AS local,
143 | bool_and(member_role <> m.member_role)
144 | FILTER ( WHERE version = target_version) AS target_remote,
145 | -- server names are independent of shard
146 | md5(string_agg(sah.availability_zone || sah.host_id, ',' ORDER BY sah.availability_zone, sah.host_id)
147 | FILTER (WHERE member_role <> m.member_role AND version = current_version)) AS current_server_name,
148 | md5(string_agg(sah.availability_zone || sah.host_id, ',' ORDER BY sah.availability_zone, sah.host_id)
149 | FILTER (WHERE member_role <> m.member_role AND version = target_version)) AS target_server_name,
150 | -- is any of target version hosts online?
151 | bool_or(online) FILTER (WHERE member_role <> m.member_role AND version = target_version) AS target_online,
152 | -- status of this particular shard
153 | -- did all target hosts confirmed subscription (so that clients can execute analyze)
154 | bool_and(subscribes_local_shard)
155 | FILTER (WHERE member_role <> m.member_role AND version = target_version) AS target_subscribed,
156 | -- did all target version hosts confirm target version indexes (so that clients can expose them as foreign tables)
157 | -- we want to avoid situation when clients issue queries to hosts that don't have required indexes
158 | -- as that might disrupt whole cluster due to slow queries, that in turn cause
159 | -- a) high resource usage and cache thrashing
160 | -- b) exhausted connection pools
161 | bool_and(has_all_indexes)
162 | FILTER (WHERE member_role <> m.member_role AND version = target_version) AS target_indexed,
163 | -- If all current hosts confirmed creation of target version user
164 | -- then we rotate credentials
165 | CASE WHEN bool_and(target_user_created) FILTER ( WHERE member_role <> m.member_role AND version = current_version)
166 | THEN target_credentials.username
167 | ELSE current_credentials.username
168 | END AS current_username,
169 | bool_and(target_user_created)
170 | FILTER ( WHERE member_role <> m.member_role AND version = target_version) AS target_user_created
171 | FROM
172 | shard_assigned_host sah
173 | JOIN shard_host USING (replication_group_id, availability_zone, host_id)
174 | JOIN replication_group_member USING (replication_group_id, availability_zone, host_id)
175 | -- check if all required indexes are created
176 | CROSS JOIN LATERAL (SELECT NOT EXISTS (SELECT 1 FROM
177 | shard_index_definition i
178 | WHERE
179 | ( i.replication_group_id, i.version, i.schema_name, i.table_name) =
180 | ( sah.replication_group_id, sah.version, sah.schema_name, sah.table_name)
181 | AND
182 | NOT EXISTS (SELECT 1 FROM
183 | json_to_recordset(indexes) AS mi(schema_name text, index_name text)
184 | WHERE
185 | ( schema_name, index_name) =
186 | ( i.schema_name, i.index_name)
187 | )
188 | )) i(has_all_indexes)
189 | -- check if shard is subscribed
190 | CROSS JOIN LATERAL (
191 | SELECT EXISTS (SELECT 1 FROM
192 | json_to_recordset(subscribed_local_shards) AS t(schema_name text, table_name text)
193 | WHERE
194 | ( schema_name, table_name) =
195 | (sah.schema_name, sah.table_name)
196 | )) s(subscribes_local_shard)
197 | CROSS JOIN LATERAL (
198 | SELECT EXISTS (SELECT 1 FROM
199 | json_array_elements_text(users) AS t(username)
200 | WHERE username = target_credentials.username)
201 | ) u(target_user_created)
202 | WHERE
203 | sah.replication_group_id = m.replication_group_id
204 | AND
205 | version IN (current_version, target_version)
206 | GROUP BY
207 | 1, 2
208 | ) s
209 | -- calculate current version foreign server host and port based on _online_ assigned hosts and this member availability zone
210 | LEFT JOIN LATERAL (
211 | SELECT
212 | schema_name,
213 | table_name,
214 | string_agg(host_name, ',' ORDER BY sah.host_id) AS current_host,
215 | string_agg(port::text, ',' ORDER BY sah.host_id) AS current_port
216 | FROM
217 | shard_assigned_host sah
218 | JOIN shard_host USING (replication_group_id, availability_zone, host_id)
219 | JOIN replication_group_member shm USING (replication_group_id, availability_zone, host_id),
220 | -- multiply hosts in the same availability zone by same_zone_multiplier
221 | generate_series(1, CASE WHEN m.availability_zone = sah.availability_zone THEN m.same_zone_multiplier ELSE 1 END)
222 | WHERE
223 | sah.replication_group_id = m.replication_group_id
224 | AND
225 | version = current_version
226 | AND
227 | (availability_zone, host_id) <> (m.availability_zone, m.host_id)
228 | AND
229 | online
230 | AND
231 | -- isolate hosts that for some reason are missing current version indexes
232 | -- condition is:
233 | -- there are no current version indexes that this host did not report
234 | -- ideally we could use a function, but it is problematic due to permissions
235 | NOT EXISTS (SELECT 1 FROM
236 | shard_assigned_host
237 | JOIN shard_index_definition i USING (replication_group_id, version, schema_name, table_name)
238 | WHERE
239 | (availability_zone, host_id) = (sah.availability_zone, sah.host_id)
240 | AND version = g.current_version
241 | AND NOT EXISTS (SELECT 1 FROM
242 | json_to_recordset(shm.indexes) AS mi(schema_name text, index_name text)
243 | WHERE
244 | ( schema_name, index_name) =
245 | ( i.schema_name, i.index_name)
246 | )
247 | )
248 | GROUP BY
249 | 1, 2
250 | ) current_host_port USING (schema_name, table_name)
251 | LEFT JOIN LATERAL (
252 | SELECT
253 | schema_name,
254 | table_name,
255 | string_agg(host_name, ',' ORDER BY sah.host_id) AS target_host,
256 | string_agg(port::text, ',' ORDER BY sah.host_id) AS target_port
257 | FROM
258 | shard_assigned_host sah
259 | JOIN shard_host USING (replication_group_id, availability_zone, host_id),
260 | -- multiply hosts in the same availability zone by same_zone_multiplier
261 | generate_series(1, CASE WHEN m.availability_zone = sah.availability_zone THEN m.same_zone_multiplier ELSE 1 END)
262 | WHERE
263 | sah.replication_group_id = m.replication_group_id
264 | AND
265 | version = target_version
266 | AND
267 | (availability_zone, host_id) <> (m.availability_zone, m.host_id)
268 | AND
269 | online
270 | GROUP BY
271 | 1, 2
272 | ) target_host_port USING (schema_name, table_name)
273 | WHERE
274 | current_credentials.version = current_version
275 | AND target_credentials.version = target_version
276 | ;
277 |
278 | CREATE VIEW missing_subscribed_shard AS
279 | SELECT
280 | replication_group_id, version, availability_zone, host_id, schema_name, table_name
281 | FROM
282 | shard_assigned_host a
283 | JOIN replication_group_member USING (replication_group_id, availability_zone, host_id)
284 | WHERE
285 | NOT EXISTS (SELECT 1 FROM
286 | json_to_recordset(subscribed_local_shards) AS c(schema_name text, table_name text)
287 | WHERE (schema_name, table_name) = (a.schema_name, a.table_name)
288 | )
289 | ;
290 |
291 | CREATE VIEW missing_connected_local_shard AS
292 | SELECT
293 | replication_group_id, version, availability_zone, host_id, schema_name, table_name
294 | FROM
295 | shard_assigned_host a
296 | JOIN replication_group_member USING (replication_group_id, availability_zone, host_id)
297 | WHERE
298 | NOT EXISTS (SELECT 1 FROM
299 | json_to_recordset(connected_local_shards) AS c(schema_name text, table_name text)
300 | WHERE (schema_name, table_name) = (a.schema_name, a.table_name)
301 | )
302 | ;
303 |
304 | CREATE VIEW missing_connected_remote_shard AS
305 | WITH remote_shard AS (
306 | SELECT
307 | m.*,
308 | version,
309 | schema_name,
310 | table_name
311 | FROM
312 | replication_group_member m
313 | JOIN shard ms USING (replication_group_id)
314 | WHERE
315 | NOT EXISTS (SELECT 1 FROM shard_assigned_host WHERE
316 | ( replication_group_id, version, availability_zone, host_id, schema_name, table_name) =
317 | (m.replication_group_id, ms.version, m.availability_zone, m.host_id, ms.schema_name, ms.table_name))
318 | )
319 | SELECT
320 | replication_group_id, version, availability_zone, host_id, schema_name, table_name
321 | FROM
322 | remote_shard s
323 | WHERE
324 | NOT EXISTS (SELECT 1 FROM
325 | json_to_recordset(connected_remote_shards) AS c(schema_name text, table_name text)
326 | WHERE (schema_name, table_name) = (s.schema_name, s.table_name)
327 | )
328 | ;
329 |
--------------------------------------------------------------------------------
/src/master/monitoring.sql:
--------------------------------------------------------------------------------
1 | -- name: master-monitoring
2 | -- requires: tables
3 |
4 | CREATE VIEW replication_status AS
5 | WITH sessions AS (
6 | SELECT
7 | usename AS member_role, count(*) AS num_sessions
8 | FROM
9 | pg_stat_activity
10 | GROUP BY usename
11 | )
12 | SELECT
13 | replication_group_id,
14 | availability_zone,
15 | host_id,
16 | pg_size_pretty(pg_current_wal_lsn() - confirmed_flush_lsn) AS lag,
17 | coalesce(num_sessions, 0) AS num_sessions
18 | FROM
19 | replication_group_member m
20 | LEFT JOIN pg_replication_slots ON
21 | array_to_string(trim_array(regexp_split_to_array(slot_name, '_'), 1), '_') = m.member_role
22 | LEFT JOIN sessions USING (member_role)
23 | ;
24 | COMMENT ON VIEW replication_status IS
25 | $$
26 | Shows replication status of all replicas.
27 | $$;
28 |
--------------------------------------------------------------------------------
/src/master/publication-sync.sql:
--------------------------------------------------------------------------------
1 | -- name: publication-sync
2 | -- requires: core
3 | -- requires: master-helpers
4 |
5 | CREATE PUBLICATION pgwrh_controller_ping FOR TABLE ping WITH (PUBLISH = 'insert');
6 | SELECT add_ext_dependency('pg_publication', (SELECT oid FROM pg_publication WHERE pubname = 'pgwrh_controller_ping'));
7 |
8 | CREATE OR REPLACE FUNCTION sync_publications() RETURNS void
9 | SET SEARCH_PATH FROM CURRENT
10 | LANGUAGE plpgsql AS
11 | $$DECLARE
12 | r record;
13 | BEGIN
14 | FOR r IN
15 | SELECT format('CREATE PUBLICATION %I FOR TABLE %s WITH ( publish = %L )',
16 | pubname,
17 | c.oid::regclass,
18 | 'insert,update,delete') stmt,
19 | pubname
20 | FROM
21 | pg_class c
22 | JOIN pg_namespace n ON c.relnamespace = n.oid,
23 | pubname(nspname, relname) AS pubname
24 | WHERE
25 | EXISTS (SELECT 1 FROM
26 | shard
27 | JOIN replication_group USING (replication_group_id)
28 | WHERE
29 | (schema_name, table_name) = (nspname, relname)
30 | AND
31 | version IN (current_version, target_version)
32 | )
33 | AND
34 | NOT EXISTS (SELECT 1 FROM
35 | pg_publication_rel
36 | WHERE
37 | prrelid = c.oid
38 | AND
39 | is_dependent_object('pg_publication', prpubid)
40 | )
41 | LOOP
42 | EXECUTE r.stmt;
43 | PERFORM add_ext_dependency('pg_publication', (SELECT oid FROM pg_publication WHERE pubname = r.pubname::text));
44 | END LOOP;
45 | FOR r IN
46 | SELECT format('DROP PUBLICATION %I CASCADE',
47 | pubname) stmt
48 | FROM
49 | pg_publication p
50 | WHERE
51 | is_dependent_object('pg_publication', oid)
52 | AND
53 | pubname NOT IN ('pgwrh_controller_ping')
54 | AND
55 | NOT EXISTS (SELECT 1 FROM
56 | shard s
57 | JOIN replication_group USING (replication_group_id)
58 | WHERE
59 | version IN (current_version, target_version)
60 | AND pubname(schema_name, table_name) = p.pubname
61 | )
62 | LOOP
63 | EXECUTE r.stmt;
64 | END LOOP;
65 | RETURN;
66 | END
67 | $$;
68 |
69 | CREATE OR REPLACE FUNCTION sync_publications_trigger() RETURNS TRIGGER
70 | SET SEARCH_PATH FROM CURRENT
71 | LANGUAGE plpgsql AS
72 | $$BEGIN
73 | PERFORM sync_publications();
74 | RETURN NULL;
75 | END$$;
76 |
77 | CREATE OR REPLACE TRIGGER sync_publications AFTER INSERT OR UPDATE OR DELETE OR TRUNCATE ON replication_group
78 | FOR EACH STATEMENT EXECUTE FUNCTION sync_publications_trigger();
79 |
--------------------------------------------------------------------------------
/src/master/snapshot.sql:
--------------------------------------------------------------------------------
1 | -- name: master-snapshot
2 | -- requires: master-implementation-views
3 |
4 | -- pgwrh
5 | -- Copyright (C) 2024 Michal Kleczek
6 |
7 | -- This program is free software: you can redistribute it and/or modify
8 | -- it under the terms of the GNU Affero General Public License as published by
9 | -- the Free Software Foundation, either version 3 of the License, or
10 | -- (at your option) any later version.
11 |
12 | -- This program is distributed in the hope that it will be useful,
13 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | -- GNU Affero General Public License for more details.
16 |
17 | -- You should have received a copy of the GNU Affero General Public License
18 | -- along with this program. If not, see .
19 |
20 | CREATE FUNCTION replication_group_config_snapshot(_replication_group_id text, _version config_version)
21 | RETURNS void
22 | SET SEARCH_PATH FROM CURRENT
23 | LANGUAGE sql
24 | AS
25 | $$
26 | WITH sharded_pg_class AS (
27 | SELECT
28 | c.oid::regclass,
29 | st.replication_group_id,
30 | version,
31 | sharded_table_schema,
32 | sharded_table_name,
33 | replication_factor,
34 | sharding_key_expression
35 | FROM
36 | pg_class c
37 | JOIN pg_namespace n ON relnamespace = n.oid
38 | JOIN sharded_table st ON (nspname, relname) = (sharded_table_schema, sharded_table_name)
39 | WHERE
40 | (replication_group_id, version) = (_replication_group_id, _version)
41 | ),
42 | shard_snapshot AS (
43 | SELECT
44 | c.oid,
45 | st.replication_group_id,
46 | version,
47 | nspname AS schema_name,
48 | relname AS table_name,
49 | st.sharded_table_schema,
50 | st.sharded_table_name,
51 | replication_factor,
52 | "@extschema@".extract_sharding_key_value(
53 | nspname,
54 | relname,
55 | sharding_key_expression) AS sharding_key_value
56 | FROM
57 | pg_class c
58 | JOIN pg_namespace n ON n.oid = relnamespace
59 | JOIN sharded_pg_class st ON
60 | st.oid = ANY (
61 | SELECT * FROM pg_partition_ancestors(c.oid)
62 | )
63 | AND
64 | NOT EXISTS (SELECT 1 FROM
65 | sharded_pg_class des
66 | WHERE
67 | (des.replication_group_id, des.version) = (st.replication_group_id, st.version)
68 | AND des.oid = ANY (SELECT * FROM pg_partition_ancestors(c.oid))
69 | AND des.oid <> st.oid
70 | AND st.oid = ANY (SELECT * FROM pg_partition_ancestors(des.oid))
71 | )
72 | WHERE
73 | c.relkind = 'r'
74 | ),
75 | saved_shard AS (
76 | INSERT INTO shard
77 | (replication_group_id, version, schema_name, table_name, sharded_table_schema, sharded_table_name)
78 | SELECT
79 | replication_group_id,
80 | version,
81 | schema_name,
82 | table_name,
83 | sharded_table_schema,
84 | sharded_table_name
85 | FROM
86 | shard_snapshot
87 | ),
88 | saved_index AS (
89 | INSERT INTO shard_assigned_index
90 | (replication_group_id, version, schema_name, table_name, index_template_schema, index_template_table_name, index_template_name)
91 | SELECT
92 | replication_group_id, version, ss.schema_name, ss.table_name, t.index_template_schema, t.index_template_table_name, t.index_template_name
93 | FROM
94 | shard_snapshot ss
95 | JOIN shard_index_template t USING (replication_group_id, version)
96 | JOIN pg_namespace itn ON itn.nspname = t.index_template_schema
97 | JOIN pg_class itc ON itc.relnamespace = itn.oid AND itc.relname = t.index_template_table_name
98 | WHERE
99 | itc.oid = ANY (SELECT * FROM pg_partition_ancestors(ss.oid))
100 | ),
101 | group_counts AS (
102 | SELECT
103 | replication_group_id,
104 | version,
105 | count(DISTINCT availability_zone) AS az_count,
106 | count(*) AS host_count
107 | FROM
108 | shard_host_weight
109 | GROUP BY
110 | 1, 2
111 | ),
112 | replicated_shard AS (
113 | SELECT
114 | replication_group_id,
115 | version,
116 | schema_name,
117 | table_name,
118 | greatest(
119 | ceil((replication_factor * host_count) / 100),
120 | least(min_replica_count, host_count),
121 | least(min_replica_count_per_availability_zone * az_count, host_count)) AS replica_count,
122 | sharding_key_value
123 | FROM
124 | shard_snapshot sc
125 | JOIN replication_group_config USING (replication_group_id, version)
126 | JOIN group_counts USING (replication_group_id, version)
127 | )
128 | INSERT INTO shard_assigned_host (replication_group_id, version, schema_name, table_name, availability_zone, host_id)
129 | SELECT
130 | replication_group_id,
131 | version,
132 | schema_name,
133 | table_name,
134 | availability_zone,
135 | host_id
136 | FROM
137 | replicated_shard s
138 | CROSS JOIN LATERAL (
139 | SELECT
140 | availability_zone,
141 | host_id,
142 | row_number() OVER (
143 | PARTITION BY availability_zone
144 | ORDER BY "@extschema@".score(weight, sharding_key_value, host_id) DESC) AS group_rank
145 | FROM
146 | shard_host_weight
147 | JOIN shard_host USING (replication_group_id, availability_zone, host_id)
148 | JOIN replication_group_member m USING (replication_group_id, availability_zone, host_id)
149 | WHERE
150 | (replication_group_id, version) = (s.replication_group_id, s.version)
151 | ORDER BY
152 | group_rank, "@extschema@".score(100, sharding_key_value, availability_zone) DESC
153 | LIMIT
154 | s.replica_count
155 | ) h
156 | $$;
157 |
--------------------------------------------------------------------------------
/src/master/tables.sql:
--------------------------------------------------------------------------------
1 | -- name: tables
2 | -- requires: common
3 |
4 | -- pgwrh
5 | -- Copyright (C) 2024 Michal Kleczek
6 |
7 | -- This program is free software: you can redistribute it and/or modify
8 | -- it under the terms of the GNU Affero General Public License as published by
9 | -- the Free Software Foundation, either version 3 of the License, or
10 | -- (at your option) any later version.
11 |
12 | -- This program is distributed in the hope that it will be useful,
13 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | -- GNU Affero General Public License for more details.
16 |
17 | -- You should have received a copy of the GNU Affero General Public License
18 | -- along with this program. If not, see .
19 |
20 | CREATE TYPE config_version AS ENUM ('FLIP', 'FLOP');
21 | COMMENT ON TYPE config_version IS
22 | 'A FLIP/FLAP enum to use as configuration version identifier.';
23 |
24 | CREATE TABLE replication_group (
25 | replication_group_id text NOT NULL PRIMARY KEY,
26 | current_version config_version NOT NULL DEFAULT 'FLIP',
27 | target_version config_version NOT NULL DEFAULT 'FLIP'
28 | );
29 | COMMENT ON TABLE replication_group IS
30 | 'Represents a specific cluster (replica group) configuration.
31 | A single sever may be a source of data for multiple groups of replicas.
32 | Each group may have different configuration, in particular:
33 | * what tables should be sharded
34 | * number of desired copies per shard
35 | * member servers and shard hosts topology
36 | ';
37 | COMMENT ON COLUMN replication_group.replication_group_id IS
38 | 'Unique identifier of a replication group.';
39 | COMMENT ON COLUMN replication_group.current_version IS
40 | 'Identifier of currently deployed configuration version.';
41 | COMMENT ON COLUMN replication_group.target_version IS
42 | 'Identifier of pending configuration version that is currently being deployed.';
43 |
44 | CREATE TABLE replication_group_lock (
45 | replication_group_id text NOT NULL PRIMARY KEY REFERENCES replication_group(replication_group_id)
46 | );
47 | COMMENT ON TABLE replication_group_lock IS
48 | $$
49 | Having a lock on replication_group ensures accidental DELETE on a group cannot happen.
50 |
51 | To delete a replication group it is necessary to delete replication_group_lock first.
52 | $$;
53 |
54 | CREATE TABLE replication_group_config (
55 | replication_group_id text NOT NULL REFERENCES replication_group(replication_group_id) ON DELETE CASCADE,
56 | version config_version NOT NULL,
57 |
58 | min_replica_count int NOT NULL CHECK ( min_replica_count >= 0 ) DEFAULT 1,
59 | min_replica_count_per_availability_zone int NOT NULL CHECK ( min_replica_count_per_availability_zone >= 0 ) DEFAULT 1,
60 |
61 | PRIMARY KEY (replication_group_id, version)
62 | );
63 | COMMENT ON TABLE replication_group_config IS
64 | 'Represents a version of configuration of a replication group.
65 |
66 | Each cluster (replication group) configuration is versioned to make sure
67 | changes in cluster topology and shards configuration does not cause any downtime.
68 |
69 | There may be two versions of configuration present at the same time.
70 | A configuration version might be "pending" or "ready".
71 |
72 | Version marked as "ready" (pending = false) is a configuration version that all
73 | replicas installed and configured successfully. The shards assigned to replicas in that version are copied, indexed and available to use.
74 |
75 | Version marked as "pending" (pending = true) is a configuration version that is under installaction/configuration by the replicas.
76 |
77 | A replica keeps all shards from "ready" configuration even if a shard might be no longer assigned to it in "pending" configuration version.
78 | ';
79 |
80 | CREATE TABLE replication_group_config_clone (
81 | replication_group_id text NOT NULL,
82 | source_version config_version NOT NULL,
83 | target_version config_version NOT NULL,
84 |
85 | PRIMARY KEY (replication_group_id, target_version),
86 | CHECK ( source_version <> target_version ),
87 | FOREIGN KEY (replication_group_id, source_version)
88 | REFERENCES replication_group_config(replication_group_id, version) ON DELETE CASCADE,
89 | FOREIGN KEY (replication_group_id, target_version)
90 | REFERENCES replication_group_config(replication_group_id, version) ON DELETE CASCADE
91 | );
92 |
93 | CREATE TABLE replication_group_config_lock (
94 | replication_group_id text NOT NULL,
95 | version config_version NOT NULL,
96 | -- most probably it should be separate
97 | -- but for now it is simpler here
98 | seed uuid NOT NULL DEFAULT gen_random_uuid(),
99 |
100 | PRIMARY KEY (replication_group_id, version),
101 | FOREIGN KEY (replication_group_id, version)
102 | REFERENCES replication_group_config(replication_group_id, version)
103 | ON DELETE CASCADE
104 | );
105 |
106 | ALTER TABLE replication_group ADD FOREIGN KEY (replication_group_id, current_version)
107 | REFERENCES replication_group_config_lock(replication_group_id, version) DEFERRABLE INITIALLY DEFERRED;
108 |
109 | ALTER TABLE replication_group ADD FOREIGN KEY (replication_group_id, target_version)
110 | REFERENCES replication_group_config_lock(replication_group_id, version) DEFERRABLE INITIALLY DEFERRED;
111 |
112 | CREATE TABLE replication_group_member (
113 | replication_group_id text NOT NULL REFERENCES replication_group(replication_group_id),
114 | availability_zone text NOT NULL,
115 | host_id text NOT NULL,
116 | member_role text NOT NULL UNIQUE,
117 | same_zone_multiplier smallint NOT NULL CHECK ( same_zone_multiplier BETWEEN 1 AND 5 ) DEFAULT 2,
118 |
119 | subscribed_local_shards json NOT NULL DEFAULT '[]',
120 | indexes json NOT NULL DEFAULT '[]',
121 | connected_local_shards json NOT NULL DEFAULT '[]',
122 | connected_remote_shards json NOT NULL DEFAULT '[]',
123 | users json NOT NULL DEFAULT '[]',
124 |
125 | PRIMARY KEY (replication_group_id, availability_zone, host_id)
126 | );
127 | COMMENT ON TABLE replication_group_member IS
128 | 'Represents a node in a cluster (replication group).
129 |
130 | A cluster consists of two types of nodes:
131 |
132 | * shard hosts - nodes that replicate and serve data
133 | * non replicating members - nodes that act only as proxies (ie. not hosting any shards)';
134 |
135 | CREATE TABLE shard_host (
136 | replication_group_id text NOT NULL,
137 | availability_zone text NOT NULL,
138 | host_id text NOT NULL,
139 | host_name text NOT NULL,
140 | port int NOT NULL CHECK ( port > 0 ),
141 |
142 | online boolean NOT NULL DEFAULT true,
143 |
144 | PRIMARY KEY (replication_group_id, availability_zone, host_id),
145 | FOREIGN KEY (replication_group_id, availability_zone, host_id)
146 | REFERENCES replication_group_member(replication_group_id, availability_zone, host_id)
147 | ON DELETE CASCADE,
148 | UNIQUE (host_name, port)
149 | );
150 | COMMENT ON TABLE shard_host IS
151 | 'Represents a data replicating node in a cluster (replication group).';
152 | COMMENT ON COLUMN shard_host.online IS
153 | 'Shard host marked as offline is not going to receive any requests for data from other nodes.
154 | It is still replicating shards assigned to it.
155 |
156 | This flag is supposed to be used in situation when a particular node must be
157 | temporarily disconnected from a cluster for maintenance purposes.';
158 |
159 | CREATE TABLE shard_host_weight (
160 | replication_group_id text NOT NULL,
161 | availability_zone text NOT NULL,
162 | host_id text NOT NULL,
163 | version config_version NOT NULL,
164 | weight int NOT NULL CHECK ( weight > 0 ),
165 |
166 | PRIMARY KEY (replication_group_id, availability_zone, host_id, version),
167 | FOREIGN KEY (replication_group_id, availability_zone, host_id)
168 | REFERENCES shard_host(replication_group_id, availability_zone, host_id)
169 | ON DELETE CASCADE,
170 | FOREIGN KEY (replication_group_id, version)
171 | REFERENCES replication_group_config(replication_group_id, version)
172 | ON DELETE CASCADE
173 | );
174 | COMMENT ON TABLE shard_host_weight IS
175 | 'Weight of a shard host in a specific configuration version';
176 |
177 | CREATE TABLE sharded_table (
178 | replication_group_id text NOT NULL,
179 | sharded_table_schema text NOT NULL,
180 | sharded_table_name text NOT NULL,
181 | version config_version NOT NULL,
182 | replication_factor decimal(5, 2) NOT NULL CHECK ( replication_factor BETWEEN 0 AND 100 ),
183 | sharding_key_expression text NOT NULL DEFAULT 'SELECT $1 || $2',
184 |
185 | PRIMARY KEY (replication_group_id, sharded_table_schema, sharded_table_name, version),
186 | FOREIGN KEY (replication_group_id, version)
187 | REFERENCES replication_group_config(replication_group_id, version)
188 | ON DELETE CASCADE
189 | );
190 |
191 | CREATE TABLE shard_index_template (
192 | replication_group_id text NOT NULL,
193 | version config_version NOT NULL,
194 | index_template_schema text NOT NULL,
195 | index_template_table_name text NOT NULL,
196 | index_template_name name NOT NULL,
197 | index_template text NOT NULL,
198 |
199 | PRIMARY KEY (replication_group_id, version, index_template_schema, index_template_table_name, index_template_name),
200 | FOREIGN KEY (replication_group_id, version) REFERENCES replication_group_config(replication_group_id, version) ON DELETE CASCADE
201 | );
202 |
203 | -- SNAPSHOT
204 |
205 | CREATE TABLE shard (
206 | replication_group_id text NOT NULL,
207 | version config_version NOT NULL,
208 | schema_name text NOT NULL,
209 | table_name text NOT NULL,
210 | sharded_table_schema text NOT NULL,
211 | sharded_table_name text NOT NULL,
212 |
213 | PRIMARY KEY (replication_group_id, version, schema_name, table_name),
214 | FOREIGN KEY (replication_group_id, version, sharded_table_schema, sharded_table_name)
215 | REFERENCES sharded_table (replication_group_id, version, sharded_table_schema, sharded_table_name),
216 | FOREIGN KEY (replication_group_id, version)
217 | REFERENCES replication_group_config_lock(replication_group_id, version)
218 | ON DELETE CASCADE
219 | );
220 |
221 | CREATE TABLE shard_assigned_host (
222 | replication_group_id text NOT NULL,
223 | version config_version NOT NULL,
224 | schema_name text NOT NULL,
225 | table_name text NOT NULL,
226 | availability_zone text NOT NULL,
227 | host_id text NOT NULL,
228 |
229 | PRIMARY KEY (replication_group_id, version, schema_name, table_name, availability_zone, host_id),
230 | FOREIGN KEY (replication_group_id, version, schema_name, table_name)
231 | REFERENCES shard(replication_group_id, version, schema_name, table_name)
232 | ON DELETE CASCADE,
233 | FOREIGN KEY (replication_group_id, version, availability_zone, host_id)
234 | REFERENCES shard_host_weight(replication_group_id, version, availability_zone, host_id)
235 | DEFERRABLE INITIALLY DEFERRED
236 | );
237 |
238 | CREATE TABLE shard_assigned_index (
239 | replication_group_id text NOT NULL,
240 | version config_version NOT NULL,
241 | schema_name text NOT NULL,
242 | table_name text NOT NULL,
243 | index_template_schema text NOT NULL,
244 | index_template_table_name text NOT NULL,
245 | index_template_name name NOT NULL,
246 |
247 | PRIMARY KEY (replication_group_id, version, schema_name, table_name, index_template_schema, index_template_table_name, index_template_name),
248 | FOREIGN KEY (replication_group_id, version, index_template_schema, index_template_table_name, index_template_name)
249 | REFERENCES shard_index_template(replication_group_id, version, index_template_schema, index_template_table_name, index_template_name)
250 | DEFERRABLE INITIALLY DEFERRED,
251 | FOREIGN KEY (replication_group_id, version, schema_name, table_name)
252 | REFERENCES shard(replication_group_id, version, schema_name, table_name)
253 | ON DELETE CASCADE
254 | );
255 |
256 | --------------------
257 | --------------------
258 | CREATE TABLE ping (
259 | last_time timestamptz NOT NULL PRIMARY KEY DEFAULT clock_timestamp()
260 | );
261 | SELECT exec_dynamic(format('GRANT SELECT ON ping TO %I', pgwrh_replica_role_name()));
262 |
--------------------------------------------------------------------------------
/src/master/triggers.sql:
--------------------------------------------------------------------------------
1 | -- name: master-triggers
2 | -- requires: core
3 | -- requires: publication-sync
4 | -- requires: master-snapshot
5 |
6 | -- pgwrh
7 | -- Copyright (C) 2024 Michal Kleczek
8 |
9 | -- This program is free software: you can redistribute it and/or modify
10 | -- it under the terms of the GNU Affero General Public License as published by
11 | -- the Free Software Foundation, either version 3 of the License, or
12 | -- (at your option) any later version.
13 |
14 | -- This program is distributed in the hope that it will be useful,
15 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | -- GNU Affero General Public License for more details.
18 |
19 | -- You should have received a copy of the GNU Affero General Public License
20 | -- along with this program. If not, see .
21 |
22 |
23 | CREATE OR REPLACE FUNCTION replication_group_prepare() RETURNS trigger LANGUAGE plpgsql AS
24 | $$
25 | BEGIN
26 | INSERT INTO "@extschema@".replication_group_config VALUES (NEW.replication_group_id, NEW.current_version)
27 | ON CONFLICT DO NOTHING;
28 | INSERT INTO "@extschema@".replication_group_config_lock VALUES (NEW.replication_group_id, NEW.current_version)
29 | ON CONFLICT DO NOTHING;
30 | INSERT INTO "@extschema@".replication_group_lock VALUES (NEW.replication_group_id);
31 | RETURN NEW;
32 | END
33 | $$;
34 | CREATE OR REPLACE TRIGGER replication_group_prepare
35 | AFTER INSERT ON replication_group
36 | FOR EACH ROW EXECUTE FUNCTION replication_group_prepare();
37 | COMMENT ON TRIGGER replication_group_prepare ON replication_group IS
38 | $$
39 | * Creates a default, empty configuration that is locked and marked as current.
40 | * Inserts a replication_group_lock to prevent accidental deletes of newly created replication_group.
41 | $$;
42 |
43 | CREATE FUNCTION version_lifecycle_check() RETURNS trigger LANGUAGE plpgsql AS
44 | $$
45 | BEGIN
46 | IF OLD.current_version = OLD.target_version THEN
47 | IF NEW.current_version <> OLD.current_version THEN
48 | RAISE 'Cannot switch current version directly. Please update target version first.';
49 | END IF;
50 | ELSE
51 | IF NEW.target_version <> NEW.current_version AND NEW.current_version <> OLD.current_version THEN
52 | RAISE 'Cannot swap version. Please rollback target version first.';
53 | END IF;
54 | END IF;
55 | RETURN NEW;
56 | END
57 | $$;
58 | CREATE TRIGGER version_lifecycle_check
59 | BEFORE UPDATE ON replication_group
60 | FOR EACH ROW EXECUTE FUNCTION version_lifecycle_check();
61 | COMMENT ON TRIGGER version_lifecycle_check ON replication_group IS
62 | $comment$
63 | Makes sure it is only possible to change current_version and target_version according to the following rules:
64 | * If there is no configuration change in progress (ie. current_version = target_version) then it is only possible to change target_version
65 | * If there is configuration change in progress it possible to either
66 | ** commit configuration change (ie. set current_version to target_version)
67 | ** or rollback configuration change (ie. set target_version to current_version)
68 |
69 | It must not be possible to change current_version without changing target_version first.
70 | $comment$;
71 |
72 | CREATE FUNCTION check_replication_group_rollout_done() RETURNS trigger LANGUAGE plpgsql AS
73 | $$
74 | BEGIN
75 | IF EXISTS (SELECT 1 FROM
76 | "@extschema@".missing_connected_local_shard
77 | WHERE
78 | version = NEW.current_version
79 | ) THEN
80 | RAISE 'Not all hosts confirmed configuration of required local shards'
81 | USING HINT = 'Check missing_connected_local_shard view for details';
82 | END IF;
83 | IF EXISTS (SELECT 1 FROM
84 | "@extschema@".missing_connected_remote_shard
85 | WHERE
86 | version = NEW.current_version
87 | ) THEN
88 | RAISE 'Not all hosts confirmed configuration of required remote shards'
89 | USING HINT = 'Check missing_connected_remote_shard view for details';
90 | END IF;
91 | RETURN NEW;
92 | END;
93 | $$;
94 | CREATE TRIGGER check_replication_group_rollout_done
95 | BEFORE UPDATE ON replication_group
96 | FOR EACH ROW
97 | WHEN ( NEW.current_version <> OLD.current_version )
98 | EXECUTE FUNCTION check_replication_group_rollout_done();
99 | COMMENT ON TRIGGER check_replication_group_rollout_done ON replication_group IS
100 | $comment$
101 | # Generated by DeepSeek-R1
102 | This trigger ensures that the replication group is only updated when all nodes have confirmed their configuration for the new version.
103 | It checks both local and remote shards to ensure consistency before allowing an update.
104 | The check_replication_group_rollout_done trigger is designed to ensure that all hosts have confirmed their configuration changes for a given replication group version. When an update occurs, this trigger checks if the new current_version exists in both missing_connected_local_shard and missing_connected_remote_shard tables. If any of these tables lack entries for the new version, it raises an error indicating that not all hosts have completed the configuration rollout. This helps maintain data consistency by preventing updates until all nodes are up to date.
105 |
106 | This trigger is crucial because it enforces a rollback mechanism in case there's an inconsistency after the update. By checking before applying any changes and ensuring all necessary configurations are in place, it helps maintain the integrity of the replication group across all participating hosts.
107 | $comment$;
108 |
109 | CREATE FUNCTION ping_on_version_change_trigger() RETURNS trigger LANGUAGE plpgsql AS
110 | $$
111 | BEGIN
112 | INSERT INTO "@extschema@".ping VALUES (now()) ON CONFLICT DO NOTHING;
113 | RETURN NEW;
114 | END;
115 | $$;
116 | CREATE TRIGGER ping_on_version_change AFTER UPDATE ON replication_group
117 | FOR EACH ROW
118 | WHEN ( NEW.current_version <> OLD.current_version OR NEW.target_version <> OLD.target_version )
119 | EXECUTE FUNCTION ping_on_version_change_trigger();
120 | COMMENT ON TRIGGER ping_on_version_change ON replication_group IS
121 | $$
122 | # Generated by DeepSeek-R1
123 | * This trigger is designed to send a ping check whenever the current or target version changes.
124 | * It ensures that any modifications to the replication group's configuration are properly monitored for consistency and potential issues.
125 | $$;
126 |
127 | CREATE OR REPLACE FUNCTION next_pending_version_trigger() RETURNS TRIGGER
128 | LANGUAGE plpgsql AS
129 | $$BEGIN
130 | NEW.version := "@extschema@".next_pending_version(NEW.replication_group_id);
131 | RETURN NEW;
132 | END$$;
133 |
134 | CREATE OR REPLACE FUNCTION forbid_locked_version_modifications() RETURNS TRIGGER
135 | LANGUAGE plpgsql AS
136 | $$
137 | BEGIN
138 | RAISE 'This config version is locked. Modifications in % are forbidden.', TG_RELID::regclass;
139 | RETURN NULL;
140 | END
141 | $$;
142 |
143 | CREATE OR REPLACE FUNCTION clone_config_trigger() RETURNS TRIGGER
144 | LANGUAGE plpgsql AS
145 | $$BEGIN
146 | INSERT INTO "@extschema@".replication_group_config_clone (replication_group_id, source_version, target_version)
147 | VALUES (NEW.replication_group_id, "@extschema@".prev_version(NEW.version), NEW.version)
148 | ON CONFLICT DO NOTHING;
149 | RETURN NEW;
150 | END$$;
151 |
152 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_update BEFORE UPDATE ON replication_group_config
153 | FOR EACH ROW
154 | WHEN (is_locked(OLD.replication_group_id, OLD.version) OR is_locked(NEW.replication_group_id, NEW.version))
155 | EXECUTE FUNCTION forbid_locked_version_modifications();
156 |
157 | CREATE OR REPLACE TRIGGER "00_next_pending_version" BEFORE INSERT ON shard_host_weight
158 | FOR EACH ROW EXECUTE FUNCTION next_pending_version_trigger();
159 |
160 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_insert BEFORE INSERT ON shard_host_weight
161 | FOR EACH ROW
162 | WHEN (is_locked(NEW.replication_group_id, NEW.version))
163 | EXECUTE FUNCTION forbid_locked_version_modifications();
164 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_update BEFORE UPDATE ON shard_host_weight
165 | FOR EACH ROW
166 | WHEN (is_locked(OLD.replication_group_id, OLD.version) OR is_locked(NEW.replication_group_id, NEW.version))
167 | EXECUTE FUNCTION forbid_locked_version_modifications();
168 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_delete BEFORE DELETE ON shard_host_weight
169 | FOR EACH ROW
170 | WHEN (is_locked(OLD.replication_group_id, OLD.version))
171 | EXECUTE FUNCTION forbid_locked_version_modifications();
172 |
173 | CREATE OR REPLACE TRIGGER clone_config AFTER INSERT ON shard_host_weight
174 | FOR EACH ROW EXECUTE FUNCTION clone_config_trigger();
175 |
176 | CREATE OR REPLACE TRIGGER "00_next_pending_version" BEFORE INSERT ON sharded_table
177 | FOR EACH ROW EXECUTE FUNCTION next_pending_version_trigger();
178 |
179 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_insert BEFORE INSERT ON sharded_table
180 | FOR EACH ROW
181 | WHEN (is_locked(NEW.replication_group_id, NEW.version))
182 | EXECUTE FUNCTION forbid_locked_version_modifications();
183 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_update BEFORE UPDATE ON sharded_table
184 | FOR EACH ROW
185 | WHEN (is_locked(OLD.replication_group_id, OLD.version) OR is_locked(NEW.replication_group_id, NEW.version))
186 | EXECUTE FUNCTION forbid_locked_version_modifications();
187 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_delete BEFORE DELETE ON sharded_table
188 | FOR EACH ROW
189 | WHEN (is_locked(OLD.replication_group_id, OLD.version))
190 | EXECUTE FUNCTION forbid_locked_version_modifications();
191 |
192 | CREATE OR REPLACE TRIGGER clone_config AFTER INSERT ON sharded_table
193 | FOR EACH ROW EXECUTE FUNCTION clone_config_trigger();
194 |
195 | CREATE OR REPLACE TRIGGER "00_next_pending_version" BEFORE INSERT ON shard_index_template
196 | FOR EACH ROW EXECUTE FUNCTION next_pending_version_trigger();
197 |
198 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_insert BEFORE INSERT ON shard_index_template
199 | FOR EACH ROW
200 | WHEN (is_locked(NEW.replication_group_id, NEW.version))
201 | EXECUTE FUNCTION forbid_locked_version_modifications();
202 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_update BEFORE UPDATE ON shard_index_template
203 | FOR EACH ROW
204 | WHEN (is_locked(OLD.replication_group_id, OLD.version) OR is_locked(NEW.replication_group_id, NEW.version))
205 | EXECUTE FUNCTION forbid_locked_version_modifications();
206 | CREATE OR REPLACE TRIGGER forbid_not_pending_version_delete BEFORE DELETE ON shard_index_template
207 | FOR EACH ROW
208 | WHEN (is_locked(OLD.replication_group_id, OLD.version))
209 | EXECUTE FUNCTION forbid_locked_version_modifications();
210 |
211 | CREATE OR REPLACE TRIGGER clone_config AFTER INSERT ON shard_index_template
212 | FOR EACH ROW EXECUTE FUNCTION clone_config_trigger();
213 |
214 | CREATE OR REPLACE FUNCTION replication_group_config_snapshot_trigger() RETURNS trigger LANGUAGE plpgsql AS
215 | $$
216 | BEGIN
217 | PERFORM "@extschema@".replication_group_config_snapshot(NEW.replication_group_id, NEW.version);
218 | RETURN NEW;
219 | END
220 | $$;
221 | CREATE OR REPLACE TRIGGER replication_group_config_snapshot AFTER INSERT ON replication_group_config_lock
222 | FOR EACH ROW EXECUTE FUNCTION replication_group_config_snapshot_trigger();
223 | --------------------
224 | --------------------
225 | CREATE FUNCTION before_clone_insert_trigger() RETURNS trigger LANGUAGE plpgsql AS
226 | $$
227 | BEGIN
228 | INSERT INTO "@extschema@".replication_group_config (replication_group_id, version, min_replica_count, min_replica_count_per_availability_zone)
229 | SELECT replication_group_id, NEW.target_version, min_replica_count, min_replica_count_per_availability_zone FROM
230 | "@extschema@".replication_group_config
231 | WHERE
232 | replication_group_id = NEW.replication_group_id
233 | AND version = NEW.source_version
234 | ON CONFLICT DO NOTHING;
235 | RETURN NEW;
236 | END
237 | $$;
238 | --------------------
239 | --------------------
240 | CREATE FUNCTION after_clone_insert_trigger()
241 | RETURNS trigger
242 | SET SEARCH_PATH FROM CURRENT
243 | LANGUAGE plpgsql AS
244 | $$
245 | BEGIN
246 | INSERT INTO shard_host_weight (replication_group_id, availability_zone, host_id, version, weight)
247 | SELECT
248 | replication_group_id, availability_zone, host_id, NEW.target_version, weight
249 | FROM
250 | shard_host_weight
251 | WHERE
252 | (replication_group_id, version) = (NEW.replication_group_id, NEW.source_version)
253 | ON CONFLICT DO NOTHING;
254 | INSERT INTO sharded_table (replication_group_id, sharded_table_schema, sharded_table_name, version, replication_factor)
255 | SELECT replication_group_id, sharded_table_schema, sharded_table_name, NEW.target_version, replication_factor
256 | FROM
257 | sharded_table
258 | WHERE
259 | (replication_group_id, version) = (NEW.replication_group_id, NEW.source_version)
260 | ON CONFLICT DO NOTHING;
261 | INSERT INTO shard_index_template (replication_group_id, version, index_template_schema, index_template_table_name, index_template_name, index_template)
262 | SELECT replication_group_id, NEW.target_version, index_template_schema, index_template_table_name, index_template_name, index_template
263 | FROM
264 | shard_index_template
265 | WHERE
266 | (replication_group_id, version) = (NEW.replication_group_id, NEW.source_version)
267 | ON CONFLICT DO NOTHING;
268 |
269 | RETURN NEW;
270 | END
271 | $$;
272 | COMMENT ON FUNCTION after_clone_insert_trigger() IS
273 | 'Copies configuration from one version to another. Ignores already existing items.';
274 |
275 | CREATE TRIGGER before_insert BEFORE INSERT ON replication_group_config_clone
276 | FOR EACH ROW EXECUTE FUNCTION before_clone_insert_trigger();
277 | CREATE TRIGGER after_insert AFTER INSERT ON replication_group_config_clone
278 | FOR EACH ROW EXECUTE FUNCTION after_clone_insert_trigger();
279 |
280 | CREATE FUNCTION make_sure_daemon_started_on_ping_trigger() RETURNS TRIGGER LANGUAGE plpgsql AS
281 | $$
282 | BEGIN
283 | PERFORM "@extschema@".start_sync_daemon(tg_argv[0]::real);
284 | RETURN NEW;
285 | END
286 | $$;
287 | COMMENT ON FUNCTION make_sure_daemon_started_on_ping_trigger() IS
288 | 'Starts sync daemon if it is not running.';
289 |
--------------------------------------------------------------------------------
/src/replica/api-management.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-api-management
2 | -- requires: replica-daemon
3 | -- requires: replica-helpers
4 |
5 | -- pgwrh
6 | -- Copyright (C) 2024 Michal Kleczek
7 |
8 | -- This program is free software: you can redistribute it and/or modify
9 | -- it under the terms of the GNU Affero General Public License as published by
10 | -- the Free Software Foundation, either version 3 of the License, or
11 | -- (at your option) any later version.
12 |
13 | -- This program is distributed in the hope that it will be useful,
14 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | -- GNU Affero General Public License for more details.
17 |
18 | -- You should have received a copy of the GNU Affero General Public License
19 | -- along with this program. If not, see .
20 |
21 | CREATE OR REPLACE FUNCTION configure_controller(host text, port text, username text, password text, start_daemon boolean DEFAULT true, refresh_seconds real DEFAULT 20)
22 | RETURNS void
23 | SET SEARCH_PATH FROM CURRENT
24 | SECURITY DEFINER
25 | LANGUAGE plpgsql AS
26 | $$
27 | DECLARE
28 | r record;
29 | BEGIN
30 | FOR r IN SELECT * FROM "@extschema@".update_server_options('replica_controller', host, port) AS u(cmd) LOOP
31 | EXECUTE r.cmd;
32 | END LOOP;
33 | FOR r IN SELECT * FROM "@extschema@".update_user_mapping('replica_controller', username, password) AS u(cmd) LOOP
34 | EXECUTE r.cmd;
35 | END LOOP;
36 | --format('host=%s port=%s user=%s password=%s dbname=%s target_session_attrs=primary'
37 | PERFORM exec_dynamic(format('CREATE TRIGGER make_sure_daemon_started_on_ping AFTER INSERT ON ping
38 | FOR ROW EXECUTE FUNCTION make_sure_daemon_started_on_ping_trigger(%s)', refresh_seconds));
39 | ALTER TABLE ping ENABLE REPLICA TRIGGER make_sure_daemon_started_on_ping;
40 | INSERT INTO shard_subscription (subname) VALUES ('pgwrh_replica_subscription');
41 | PERFORM * FROM "@extschema:pg_background@".pg_background_result(
42 | "@extschema:pg_background@".pg_background_launch(
43 | format('CREATE SUBSCRIPTION pgwrh_replica_subscription CONNECTION ''host=%s port=%s user=%s password=%s dbname=%s target_session_attrs=primary'' PUBLICATION %I WITH (copy_data = false, %s)',
44 | host, port, username, password, current_database(), 'pgwrh_controller_ping',
45 | (
46 | SELECT string_agg(format('%s = %L', key, val), ', ') FROM (
47 | SELECT
48 | 'slot_name' AS key,
49 | username || '_' || (random() * 10000000)::bigint::text AS val-- random slot_name
50 | UNION ALL
51 | -- add failover = 'true' option for PostgreSQL >= 17
52 | SELECT
53 | 'failover' AS key,
54 | 'true' AS val
55 | WHERE
56 | substring(current_setting('server_version') FROM '\d{2}')::int >= 17
57 | ) opts
58 | )
59 | )
60 | )
61 | ) AS discarded(result text);
62 | IF start_daemon THEN
63 | PERFORM "@extschema@".start_sync_daemon(refresh_seconds);
64 | END IF;
65 | END
66 | $$;
67 |
--------------------------------------------------------------------------------
/src/replica/daemon.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-daemon
2 | -- requires: replica-sync
3 | -- requires: replica-status
4 |
5 | -- pgwrh
6 | -- Copyright (C) 2024 Michal Kleczek
7 |
8 | -- This program is free software: you can redistribute it and/or modify
9 | -- it under the terms of the GNU Affero General Public License as published by
10 | -- the Free Software Foundation, either version 3 of the License, or
11 | -- (at your option) any later version.
12 |
13 | -- This program is distributed in the hope that it will be useful,
14 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | -- GNU Affero General Public License for more details.
17 |
18 | -- You should have received a copy of the GNU Affero General Public License
19 | -- along with this program. If not, see .
20 |
21 | CREATE OR REPLACE FUNCTION launch_in_background(commands text) RETURNS void LANGUAGE plpgsql AS
22 | $$
23 | DECLARE
24 | pid int;
25 | BEGIN
26 | pid := (select "@extschema:pg_background@".pg_background_launch(commands));
27 | PERFORM pg_sleep(0.1);
28 | PERFORM "@extschema:pg_background@".pg_background_detach(pid);
29 | END
30 | $$;
31 |
32 | CREATE OR REPLACE FUNCTION launch_sync() RETURNS void LANGUAGE sql AS
33 | $$
34 | SELECT "@extschema@".launch_in_background('CAll "@extschema@".sync_replica_worker();')
35 | $$;
36 |
37 | CREATE OR REPLACE PROCEDURE sync_daemon(seconds real, _application_name text DEFAULT 'pgwrh_sync_daemon') LANGUAGE plpgsql AS
38 | $$
39 | DECLARE
40 | err text;
41 | BEGIN
42 | IF pg_try_advisory_lock(517384732) THEN
43 | PERFORM set_config('application_name', _application_name, FALSE);
44 | LOOP
45 | BEGIN
46 | CAll "@extschema@".sync_replica_worker();
47 | EXCEPTION
48 | WHEN OTHERS THEN
49 | GET STACKED DIAGNOSTICS err = MESSAGE_TEXT;
50 | raise WARNING '%', err;
51 | END;
52 | COMMIT;
53 | PERFORM pg_sleep(seconds);
54 | EXIT WHEN NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pgwrh');
55 | END LOOP;
56 | END IF;
57 | END
58 | $$;
59 |
60 | CREATE OR REPLACE FUNCTION start_sync_daemon(seconds real, application_name text DEFAULT 'pgwrh_sync_daemon') RETURNS void LANGUAGE sql AS
61 | $$
62 | SELECT "@extschema@".launch_in_background(format('
63 | CALL "@extschema@".sync_daemon(%s, %L);
64 | ', seconds, application_name))
65 | $$;
66 |
67 | CREATE OR REPLACE FUNCTION exec_script(script text) RETURNS boolean LANGUAGE plpgsql AS
68 | $$
69 | DECLARE
70 | err text;
71 | BEGIN
72 | PERFORM * FROM "@extschema:pg_background@".pg_background_result("@extschema:pg_background@".pg_background_launch(script)) AS discarded(result text);
73 | RETURN TRUE;
74 | EXCEPTION
75 | WHEN OTHERS THEN
76 | GET STACKED DIAGNOSTICS err = MESSAGE_TEXT;
77 | raise WARNING '%', err;
78 | RETURN FALSE;
79 | END
80 | $$;
81 |
82 | CREATE OR REPLACE FUNCTION exec_non_tx_scripts(scripts text[]) RETURNS boolean LANGUAGE plpgsql AS
83 | $$
84 | DECLARE
85 | cmd text;
86 | err text;
87 | BEGIN
88 | FOREACH cmd IN ARRAY scripts LOOP
89 | PERFORM * FROM "@extschema:pg_background@".pg_background_result("@extschema:pg_background@".pg_background_launch(cmd)) AS discarded(result text);
90 | END LOOP;
91 | RETURN TRUE;
92 | EXCEPTION
93 | WHEN OTHERS THEN
94 | GET STACKED DIAGNOSTICS err = MESSAGE_TEXT;
95 | raise NOTICE '%', err;
96 | RETURN FALSE;
97 | END
98 | $$;
99 |
100 | CREATE OR REPLACE FUNCTION sync_step() RETURNS boolean LANGUAGE plpgsql AS
101 | $$
102 | DECLARE
103 | r record;
104 | cmd text;
105 | err text;
106 | BEGIN
107 | IF pg_try_advisory_xact_lock(2895359559) THEN
108 | -- Select commands to execute in a separate transaction so that we don't keep any locks here
109 | FOR r IN SELECT * FROM "@extschema:pg_background@".pg_background_result("@extschema:pg_background@".pg_background_launch('select async, transactional, description, commands from "@extschema@".sync')) AS (async boolean, transactional boolean, description text, commands text[]) LOOP
110 | RAISE NOTICE '%', r.description;
111 | IF r.transactional THEN
112 | IF r.async THEN
113 | PERFORM "@extschema@".launch_in_background(array_to_string(r.commands, ';'));
114 | ELSE
115 | PERFORM "@extschema@".exec_script(array_to_string(r.commands || 'SELECT '''''::text, ';'));
116 | END IF;
117 | ELSE
118 | IF r.async THEN
119 | IF array_length(r.commands, 1) > 1 THEN
120 | PERFORM "@extschema@".launch_in_background(format('SELECT "@extschema@".exec_non_tx_scripts(ARRAY[%s])', (SELECT string_agg(format('%L', c), ',') FROM unnest(r.commands) AS c)));
121 | ELSE
122 | PERFORM "@extschema@".launch_in_background(r.commands[1]);
123 | END IF;
124 | ELSE
125 | FOREACH cmd IN ARRAY r.commands LOOP
126 | PERFORM "@extschema@".exec_script(cmd);
127 | END LOOP;
128 | END IF;
129 | END IF;
130 | END LOOP;
131 | RETURN FOUND;
132 | ELSE
133 | RETURN FALSE;
134 | END IF;
135 | EXCEPTION
136 | WHEN OTHERS THEN
137 | GET STACKED DIAGNOSTICS err = MESSAGE_TEXT;
138 | raise WARNING '%', err;
139 | PERFORM pg_sleep(1);
140 | RETURN TRUE;
141 | END
142 | $$;
143 |
144 | CREATE OR REPLACE PROCEDURE sync_replica_worker() LANGUAGE plpgsql AS
145 | $$
146 | BEGIN
147 | WHILE r FROM "@extschema:pg_background@".pg_background_result("@extschema:pg_background@".pg_background_launch('SELECT "@extschema@".sync_step()')) AS r(r boolean) LOOP
148 | END LOOP;
149 | PERFORM * FROM "@extschema:pg_background@".pg_background_result("@extschema:pg_background@".pg_background_launch('SELECT ''ignored'' FROM "@extschema@".report_state()')) AS r(ignored text);
150 | PERFORM * FROM "@extschema:pg_background@".pg_background_result("@extschema:pg_background@".pg_background_launch('SELECT ''ignored'' FROM "@extschema@".cleanup_analyzed_pg_class()')) AS r(ignored text);
151 | END
152 | $$;
153 |
154 |
155 | -- -- CREATE OR REPLACE FUNCTION sync_trigger() RETURNS trigger LANGUAGE plpgsql AS
156 | -- -- $$BEGIN
157 | -- -- PERFORM @extschema@.launch_sync();
158 | -- -- RETURN NULL;
159 | -- -- END$$;
160 | -- -- CREATE OR REPLACE TRIGGER sync_trigger AFTER INSERT ON config_change FOR EACH ROW EXECUTE FUNCTION sync_trigger();
161 | -- -- ALTER TABLE config_change ENABLE REPLICA TRIGGER sync_trigger;
162 |
--------------------------------------------------------------------------------
/src/replica/deps.txt:
--------------------------------------------------------------------------------
1 | tables helpers
2 | helpers sync
3 | fdw sync
4 | fdw status
5 | helpers status
6 | tables ext-config-dump
7 | sync daemon
8 | status daemon
9 | daemon api-management
10 | helpers api-management
11 |
--------------------------------------------------------------------------------
/src/replica/ext-config-dump.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-ext-config-dump
2 | -- requires: replica-tables
3 |
4 | -- pgwrh
5 | -- Copyright (C) 2024 Michal Kleczek
6 |
7 | -- This program is free software: you can redistribute it and/or modify
8 | -- it under the terms of the GNU Affero General Public License as published by
9 | -- the Free Software Foundation, either version 3 of the License, or
10 | -- (at your option) any later version.
11 |
12 | -- This program is distributed in the hope that it will be useful,
13 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | -- GNU Affero General Public License for more details.
16 |
17 | -- You should have received a copy of the GNU Affero General Public License
18 | -- along with this program. If not, see .
19 |
20 | SELECT pg_catalog.pg_extension_config_dump('shard_subscription', '');
21 |
--------------------------------------------------------------------------------
/src/replica/fdw.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-fdw
2 |
3 | -- pgwrh
4 | -- Copyright (C) 2024 Michal Kleczek
5 |
6 | -- This program is free software: you can redistribute it and/or modify
7 | -- it under the terms of the GNU Affero General Public License as published by
8 | -- the Free Software Foundation, either version 3 of the License, or
9 | -- (at your option) any later version.
10 |
11 | -- This program is distributed in the hope that it will be useful,
12 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | -- GNU Affero General Public License for more details.
15 |
16 | -- You should have received a copy of the GNU Affero General Public License
17 | -- along with this program. If not, see .
18 |
19 | CREATE SERVER IF NOT EXISTS replica_controller FOREIGN DATA WRAPPER postgres_fdw OPTIONS (load_balance_hosts 'random');
20 | CREATE USER MAPPING FOR PUBLIC SERVER replica_controller;
21 |
22 | CREATE FOREIGN TABLE IF NOT EXISTS fdw_shard_assignment (
23 | schema_name text,
24 | table_name text,
25 | local boolean,
26 | shard_server_name text,
27 | host text,
28 | port text,
29 | dbname text,
30 | shard_server_user text,
31 | pubname text,
32 | connect_remote boolean,
33 | retained_shard_server_name text
34 | )
35 | SERVER replica_controller
36 | OPTIONS (table_name 'shard_assignment');
37 |
38 | CREATE FOREIGN TABLE IF NOT EXISTS fdw_shard_index (
39 | schema_name text,
40 | table_name text,
41 | index_name text,
42 | index_template text,
43 | optional boolean
44 | )
45 | SERVER replica_controller
46 | OPTIONS (table_name 'shard_index');
47 |
48 | CREATE FOREIGN TABLE IF NOT EXISTS fdw_shard_structure (
49 | schema_name text,
50 | table_name text,
51 | level int,
52 | create_table text
53 | )
54 | SERVER replica_controller
55 | OPTIONS (table_name 'shard_structure');
56 |
57 | CREATE FOREIGN TABLE fdw_replica_state (
58 | subscribed_local_shards json,
59 | indexes json,
60 | connected_local_shards json,
61 | connected_remote_shards json,
62 | users json
63 | ) SERVER replica_controller
64 | OPTIONS (table_name 'replica_state');
65 |
66 | CREATE FOREIGN TABLE fdw_credentials (
67 | username text,
68 | password text
69 | ) SERVER replica_controller
70 | OPTIONS (table_name 'credentials');
71 |
--------------------------------------------------------------------------------
/src/replica/helpers.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-helpers
2 |
3 | -- pgwrh
4 | -- Copyright (C) 2024 Michal Kleczek
5 |
6 | -- This program is free software: you can redistribute it and/or modify
7 | -- it under the terms of the GNU Affero General Public License as published by
8 | -- the Free Software Foundation, either version 3 of the License, or
9 | -- (at your option) any later version.
10 |
11 | -- This program is distributed in the hope that it will be useful,
12 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | -- GNU Affero General Public License for more details.
15 |
16 | -- You should have received a copy of the GNU Affero General Public License
17 | -- along with this program. If not, see .
18 |
19 | -- options parsing
20 | CREATE OR REPLACE FUNCTION opts(arr text[]) RETURNS TABLE(key text, value text, vals text[]) LANGUAGE sql AS
21 | $$
22 | SELECT kv[1] AS key, kv[2] AS value, vals FROM unnest(arr) AS o(val), string_to_array(o.val, '=') AS kv, string_to_array(kv[2], ',') vals
23 | $$;
24 |
25 | CREATE OR REPLACE FUNCTION update_server_options(_srvname text, srvoptions text[], host text, port text, dbname text DEFAULT current_database())
26 | RETURNS SETOF text
27 | STABLE
28 | LANGUAGE sql AS
29 | $$
30 | SELECT format('ALTER SERVER %I OPTIONS (%s)', srvname, string_agg(opts.cmd, ', '))
31 | FROM
32 | (
33 | SELECT
34 | _srvname,
35 | CASE
36 | WHEN opt.key IS NOT NULL THEN format('SET %s %L', toset.key, toset.val)
37 | ELSE format('ADD %s %L', toset.key, toset.val)
38 | END
39 | FROM
40 | unnest(ARRAY['host', 'port', 'dbname'], ARRAY[host, port, dbname]) AS toset(key, val)
41 | LEFT JOIN (SELECT * FROM "@extschema@".opts(srvoptions)) AS opt USING (key)
42 | WHERE
43 | opt.key IS NULL OR toset.val <> opt.value
44 | ) AS opts(srvname, cmd)
45 | GROUP BY
46 | srvname
47 | $$;
48 | CREATE OR REPLACE FUNCTION update_server_options(_srvname text, host text, port text, dbname text DEFAULT current_database())
49 | RETURNS SETOF text
50 | STABLE
51 | LANGUAGE sql
52 | AS
53 | $$
54 | SELECT "@extschema@".update_server_options(srvname, srvoptions, host, port, dbname)
55 | FROM
56 | pg_foreign_server
57 | WHERE
58 | srvname = _srvname;
59 | $$;
60 |
61 | CREATE OR REPLACE FUNCTION update_user_mapping(server_name text, umoptions text[], username text, password text)
62 | RETURNS SETOF text
63 | STABLE
64 | LANGUAGE sql AS
65 | $$
66 | SELECT format('ALTER USER MAPPING FOR PUBLIC SERVER %I OPTIONS (%s)', srvname, string_agg(opts.cmd, ', '))
67 | FROM
68 | (
69 | SELECT
70 | server_name,
71 | CASE
72 | WHEN opt.key IS NOT NULL THEN format('SET %s %L', toset.key, toset.val)
73 | ELSE format('ADD %s %L', toset.key, toset.val)
74 | END
75 | FROM
76 | unnest(ARRAY['user', 'password'], ARRAY[username, password]) AS toset(key, val)
77 | LEFT JOIN (SELECT * FROM "@extschema@".opts(umoptions)) AS opt USING (key)
78 | WHERE
79 | opt.key IS NULL OR toset.val <> opt.value
80 | ) AS opts(srvname, cmd)
81 | GROUP BY
82 | srvname
83 | $$;
84 | CREATE OR REPLACE FUNCTION update_user_mapping(_srvname text, username text, password text)
85 | RETURNS SETOF text
86 | STABLE
87 | LANGUAGE sql
88 | AS
89 | $$
90 | SELECT "@extschema@".update_user_mapping(srvname, umoptions, username, password)
91 | FROM
92 | pg_user_mappings
93 | WHERE
94 | srvname = _srvname;
95 | $$;
96 |
97 | CREATE TYPE rel_id AS (schema_name text, table_name text);
98 |
99 | CREATE FUNCTION fqn(rel_id) RETURNS text LANGUAGE sql AS
100 | $$
101 | SELECT format('%I.%I', $1.schema_name, $1.table_name)
102 | $$;
103 | CREATE FUNCTION add_ext_dependency(rel_id) RETURNS text LANGUAGE sql AS
104 | $$
105 | SELECT "@extschema@".select_add_ext_dependency('pg_class'::regclass, format('%L::regclass', "@extschema@".fqn($1)))
106 | $$;
107 |
108 | -- rel_id functions
109 | CREATE VIEW rel AS
110 | SELECT
111 | pc, pn,
112 | nspname AS schema_name,
113 | relname AS table_name,
114 | (nspname, relname)::rel_id AS rel_id,
115 | pc.oid::regclass AS reg_class
116 | FROM
117 | pg_class pc
118 | JOIN pg_namespace pn ON pn.oid = pc.relnamespace;
119 | CREATE OR REPLACE VIEW local_rel AS
120 | SELECT
121 | r.*,
122 | pg_get_expr((r).pc.relpartbound, (r).pc.oid) AS bound,
123 | parent,
124 | ((r.rel_id).schema_name || '_' || 'slot', (r.rel_id).table_name)::rel_id AS slot_rel_id
125 | FROM
126 | rel r
127 | LEFT JOIN pg_inherits pi ON (r).pc.oid = pi.inhrelid
128 | LEFT JOIN rel AS parent ON (parent).pc.oid = pi.inhparent;
129 |
130 | CREATE OR REPLACE VIEW shard_assignment_r AS
131 | SELECT
132 | lr.rel_id AS rel_id,
133 | lr.slot_rel_id AS slot_rel_id,
134 | (lr).slot_rel_id.schema_name AS slot_schema_name,
135 | remote_rel_id,
136 | shard_server_name,
137 | shard_server_schema AS shard_server_schema_name,
138 | template_rel_id,
139 | shard_template_schema AS template_schema_name,
140 | sa.local,
141 | CASE WHEN local THEN rel_id ELSE remote_rel_id END AS shard_rel_id,
142 | 'pgwrh_replica_subscription' AS subname,
143 | sa.pubname,
144 | sa.shard_server_user,
145 | sa.dbname,
146 | host,
147 | port,
148 | connect_remote,
149 | retained_shard_server_name,
150 | retained_shard_server_schema,
151 | retained_remote_rel_id,
152 | view_schema AS view_schema_name,
153 | view_rel_id,
154 | lr.reg_class,
155 | lr.parent,
156 | lr,
157 | parent IS NOT NULL AND (parent).rel_id = slot_rel_id AS connected
158 | FROM
159 | fdw_shard_assignment sa
160 | JOIN local_rel lr ON (sa.schema_name, sa.table_name) = ((lr).rel_id.schema_name, (lr).rel_id.table_name),
161 | format('%s_%s', sa.schema_name, shard_server_name) AS shard_server_schema,
162 | format('%s_%s', sa.schema_name, retained_shard_server_name) AS retained_shard_server_schema,
163 | format('%s_template', sa.schema_name) AS shard_template_schema,
164 | format('%s_shield', sa.schema_name) AS view_schema
165 | CROSS JOIN LATERAL (
166 | SELECT
167 | (shard_server_schema, (rel_id).table_name)::rel_id AS remote_rel_id,
168 | (shard_template_schema, (rel_id).table_name)::rel_id AS template_rel_id,
169 | (retained_shard_server_schema, (rel_id).table_name)::rel_id AS retained_remote_rel_id,
170 | (view_schema, (rel_id).table_name)::rel_id AS view_rel_id
171 | ) AS rels;
172 |
173 | CREATE VIEW subscribed_local_shard AS
174 | SELECT
175 | *
176 | FROM
177 | local_rel
178 | WHERE
179 | EXISTS (SELECT 1 FROM
180 | pg_subscription_rel sr
181 | JOIN pg_subscription s ON srsubid = s.oid
182 | JOIN shard_subscription USING (subname)
183 | WHERE srrelid = reg_class AND srsubstate = 'r'
184 | )
185 | ;
186 |
187 | CREATE VIEW created_index AS
188 | SELECT
189 | schema_name,
190 | table_name AS index_name
191 | FROM
192 | pg_index i
193 | JOIN rel r ON i.indexrelid = r.reg_class
194 | WHERE
195 | schema_name <> '@extschema@'
196 | AND is_dependent_object('pg_class'::regclass, i.indexrelid)
197 | ;
198 |
199 | CREATE VIEW remote_shard AS
200 | SELECT
201 | lr.*,
202 | s.srvname
203 | FROM
204 | local_rel lr
205 | JOIN pg_foreign_table ft ON ft.ftrelid = reg_class
206 | JOIN owned_server s ON
207 | s.oid = ft.ftserver
208 | ;
209 |
--------------------------------------------------------------------------------
/src/replica/status.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-status
2 | -- requires: replica-fdw
3 | -- requires: replica-helpers
4 |
5 | -- pgwrh
6 | -- Copyright (C) 2024 Michal Kleczek
7 |
8 | -- This program is free software: you can redistribute it and/or modify
9 | -- it under the terms of the GNU Affero General Public License as published by
10 | -- the Free Software Foundation, either version 3 of the License, or
11 | -- (at your option) any later version.
12 |
13 | -- This program is distributed in the hope that it will be useful,
14 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | -- GNU Affero General Public License for more details.
17 |
18 | -- You should have received a copy of the GNU Affero General Public License
19 | -- along with this program. If not, see .
20 |
21 | CREATE VIEW connected_local_shard AS
22 | SELECT
23 | ls.rel_id
24 | FROM
25 | subscribed_local_shard ls
26 | JOIN rel slot ON ls.slot_rel_id = slot.rel_id AND (ls).parent.reg_class = slot.reg_class
27 | ;
28 | COMMENT ON VIEW connected_local_shard IS
29 | $$
30 | Local shards ready and connected to slots.
31 |
32 | Local shard is considered ready if
33 | * it is subscribed and its subscription state is 'r'
34 | * all non-optional indexes are created
35 | $$;
36 |
37 | -- TODO maybe better would be to use pg_depend to link local and foreign tables for the same shard
38 | CREATE VIEW connected_remote_shard AS
39 | SELECT
40 | ls.rel_id
41 | FROM
42 | remote_shard rs
43 | JOIN rel slot ON slot.reg_class = (rs).parent.reg_class
44 | JOIN local_rel ls ON ls.slot_rel_id = slot.rel_id
45 | ;
46 | COMMENT ON VIEW connected_remote_shard IS
47 | $$
48 | Remote shards ready to use and connected to slots.
49 |
50 | Remote shard is considered ready if ANALYZE was performed on corresponding foreign table.
51 | $$;
52 |
53 | CREATE VIEW local_shard_index AS
54 | SELECT
55 | (ic).schema_name,
56 | (ic).table_name AS index_name
57 | FROM
58 | subscribed_local_shard ls
59 | JOIN pg_index i ON i.indrelid = ls.reg_class
60 | JOIN rel ic ON ic.reg_class = i.indexrelid
61 | WHERE
62 | NOT EXISTS (SELECT 1 FROM
63 | pg_constraint
64 | WHERE conindid = i.indexrelid
65 | )
66 | ;
67 | COMMENT ON VIEW local_shard_index IS
68 | $$
69 | Indexes on local shards except constraint indexes.
70 | $$;
71 |
72 | CREATE FUNCTION report_state() RETURNS void LANGUAGE sql AS
73 | $$
74 | UPDATE "@extschema@".fdw_replica_state
75 | SET
76 | subscribed_local_shards = (SELECT coalesce((SELECT json_agg(rel_id) FROM "@extschema@".subscribed_local_shard), '[]')),
77 | connected_local_shards = (SELECT coalesce((SELECT json_agg(rel_id) FROM "@extschema@".connected_local_shard), '[]')),
78 | connected_remote_shards = (SELECT coalesce((SELECT json_agg(rel_id) FROM "@extschema@".connected_remote_shard), '[]')),
79 | indexes = (SELECT coalesce((SELECT json_agg(i) FROM "@extschema@".local_shard_index i), '[]')),
80 | users = (SELECT coalesce((SELECT json_agg(u.rolname)
81 | FROM pg_roles u
82 | JOIN pg_auth_members ON member = u.oid
83 | JOIN pg_roles gr ON
84 | gr.oid = roleid
85 | AND gr.rolname = format('pgwrh_replica_%s', current_database())),
86 | '[]'));
87 | $$;
88 | COMMENT ON FUNCTION report_state() IS
89 | $$
90 | Updates controller with information about current state of a replica.
91 | # Details
92 | Function performs UPDATE on controller replica_state view setting
93 | subscribed_local_shards, connected_local_shards, connected_remote_shards, indexes
94 | columns to JSON arrays containing lists of tables and indexes having
95 | corresponding state.
96 | $$;
--------------------------------------------------------------------------------
/src/replica/sync.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-sync
2 | -- requires: replica-tables
3 | -- requires: replica-helpers
4 | -- requires: replica-fdw
5 |
6 | -- pgwrh
7 | -- Copyright (C) 2024 Michal Kleczek
8 |
9 | -- This program is free software: you can redistribute it and/or modify
10 | -- it under the terms of the GNU Affero General Public License as published by
11 | -- the Free Software Foundation, either version 3 of the License, or
12 | -- (at your option) any later version.
13 |
14 | -- This program is distributed in the hope that it will be useful,
15 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
16 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 | -- GNU Affero General Public License for more details.
18 |
19 | -- You should have received a copy of the GNU Affero General Public License
20 | -- along with this program. If not, see .
21 |
22 | CREATE OR REPLACE VIEW sync(async, transactional, description, commands) AS
23 | WITH shard_assignment AS MATERIALIZED (
24 | SELECT * FROM shard_assignment_r
25 | ),
26 | local_shard AS (
27 | SELECT * FROM shard_assignment WHERE local
28 | ),
29 | slot_schema AS (
30 | SELECT DISTINCT slot_schema_name FROM shard_assignment
31 | ),
32 | template_schema AS (
33 | SELECT DISTINCT template_schema_name FROM shard_assignment
34 | ),
35 | view_schema AS (
36 | SELECT DISTINCT view_schema_name FROM shard_assignment
37 | ),
38 | shard_structure AS MATERIALIZED (
39 | SELECT * FROM fdw_shard_structure
40 | ),
41 | shard_schema AS (
42 | SELECT DISTINCT schema_name FROM shard_structure
43 | ),
44 | shard_server AS (
45 | SELECT DISTINCT
46 | shard_server_name,
47 | shard_server_schema_name,
48 | host,
49 | port,
50 | dbname,
51 | shard_server_user
52 | FROM
53 | shard_assignment
54 | WHERE
55 | shard_server_name IS NOT NULL
56 | ),
57 | shard_server_schema AS (
58 | SELECT DISTINCT shard_server_schema_name
59 | FROM shard_assignment
60 | WHERE shard_server_name IS NOT NULL
61 | ),
62 | server_host_port AS (
63 | SELECT
64 | s.*,
65 | host,
66 | port
67 | FROM
68 | pg_foreign_server s,
69 | LATERAL (
70 | SELECT h.value AS host, p.value AS port
71 | FROM opts(srvoptions) AS h, opts(srvoptions) AS p
72 | WHERE h.key = 'host' AND p.key = 'port'
73 | ) AS opts
74 | ),
75 | owned_namespace AS (
76 | SELECT
77 | n.*
78 | FROM
79 | pg_namespace n JOIN owned_obj ON classid = 'pg_namespace'::regclass AND objid = n.oid
80 | ),
81 | owned_subscription AS (
82 | SELECT * FROM pg_subscription s JOIN shard_subscription USING (subname)
83 | ),
84 | shard_index AS (
85 | SELECT
86 | reg_class,
87 | rel_id,
88 | si.*
89 | FROM
90 | fdw_shard_index si
91 | JOIN local_rel lr ON (si.schema_name, si.table_name) = ((rel_id).schema_name, (rel_id).table_name)
92 | ),
93 | missing_index AS (
94 | SELECT
95 | *
96 | FROM
97 | shard_index si
98 | WHERE
99 | NOT EXISTS (
100 | SELECT 1 FROM pg_index i JOIN pg_class ic ON i.indexrelid = ic.oid
101 | WHERE
102 | i.indrelid = si.reg_class AND
103 | ic.relname = si.index_name
104 | )
105 | ),
106 | missing_required_index AS (
107 | SELECT
108 | *
109 | FROM
110 | missing_index
111 | WHERE
112 | NOT optional
113 | ),
114 | ready_remote_shard AS (
115 | SELECT
116 | *
117 | FROM
118 | remote_shard
119 | WHERE
120 | EXISTS (SELECT 1 FROM
121 | pg_statistic s
122 | WHERE s.starelid = reg_class
123 | )
124 | OR
125 | EXISTS (SELECT 1 FROM
126 | analyzed_remote_pg_class
127 | WHERE oid = reg_class
128 | )
129 | ),
130 | ready_local_shard AS (
131 | SELECT
132 | *
133 | FROM
134 | subscribed_local_shard s
135 | WHERE
136 | NOT EXISTS (
137 | SELECT 1 FROM missing_required_index
138 | WHERE
139 | reg_class = s.reg_class
140 | )
141 | ),
142 | roles AS (
143 | SELECT * FROM fdw_credentials
144 | ),
145 | scripts (async, transactional, description, commands) AS (
146 | SELECT
147 | FALSE,
148 | TRUE,
149 | format('Found schemas [%s] to create.',
150 | string_agg(format('%I', schema_name), ', ')),
151 | array_agg(format('CREATE SCHEMA IF NOT EXISTS %I', schema_name))
152 | ||
153 | array_agg(select_add_ext_dependency('pg_namespace', format('%L::regnamespace', schema_name)))
154 | FROM
155 | (
156 | SELECT schema_name FROM shard_schema
157 | UNION ALL
158 | SELECT slot_schema_name FROM slot_schema
159 | UNION ALL
160 | SELECT template_schema_name FROM template_schema
161 | UNION ALL
162 | SELECT view_schema_name FROM view_schema
163 | ) s(schema_name)
164 | WHERE NOT EXISTS (SELECT 1 FROM
165 | pg_namespace
166 | WHERE nspname = schema_name
167 | )
168 | GROUP BY 1, 2
169 |
170 | UNION ALL
171 | SELECT
172 | FALSE,
173 | TRUE,
174 | format('Found tables [%s] to create.',
175 | string_agg(format('%I.%I', schema_name, table_name), ', ')),
176 | array_agg(create_table ORDER BY level)
177 | ||
178 | array_agg(add_ext_dependency((schema_name, table_name)))
179 | FROM
180 | shard_structure s JOIN pg_namespace n ON nspname = s.schema_name
181 | WHERE
182 | NOT EXISTS (SELECT 1 FROM local_rel WHERE (schema_name, table_name) = (s.schema_name, s.table_name))
183 | GROUP BY 1, 2 -- make sure we produce empty set when no results
184 |
185 | UNION ALL
186 | -- CLEANUP: DROP unnecessary slot and remote (per shard server) schemas
187 | SELECT
188 | FALSE,
189 | TRUE,
190 | format('Removing unused schemas [%s]', string_agg(nspname, ', ')),
191 | ARRAY[
192 | format('DROP SCHEMA IF EXISTS %s CASCADE', string_agg(quote_ident(nspname), ','))
193 | ]
194 | FROM
195 | owned_namespace n
196 | WHERE
197 | n.nspname <> '@extschema@'
198 | AND NOT EXISTS (
199 | SELECT 1 FROM shard_schema WHERE n.nspname = schema_name
200 | )
201 | AND NOT EXISTS (
202 | SELECT 1 FROM slot_schema WHERE n.nspname = slot_schema_name
203 | )
204 | AND NOT EXISTS (
205 | SELECT 1 FROM template_schema WHERE n.nspname = template_schema_name
206 | )
207 | AND NOT EXISTS (
208 | SELECT 1 FROM view_schema WHERE n.nspname = view_schema_name
209 | )
210 | AND NOT EXISTS (
211 | SELECT 1 FROM shard_assignment WHERE n.nspname IN (shard_server_schema_name, retained_shard_server_schema)
212 | )
213 | -- Make sure not to drop schemas that contain subscribed tables
214 | -- This can happen because dropping publications from subscription
215 | -- is done in separate transaction so there is a race condition.
216 | -- Adding this condition resolves that by postponing dropping
217 | -- schemas until after publications drop.
218 | AND NOT EXISTS (SELECT 1 FROM
219 | pg_subscription_rel JOIN pg_class c ON srrelid = c.oid
220 | WHERE
221 | c.relnamespace = n.oid
222 | )
223 | GROUP BY 1, 2
224 |
225 | UNION ALL
226 | -- Make sure user accounts for local shards are created
227 | SELECT
228 | FALSE,
229 | TRUE,
230 | format('User accounts [%s] to access local shards need to be created.', string_agg(username, ', ')),
231 | array_agg(format('CREATE USER %I PASSWORD %L IN ROLE %I', username, password, "@extschema@".pgwrh_replica_role_name()))
232 | FROM
233 | roles
234 | WHERE
235 | NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = username)
236 | AND EXISTS (SELECT 1 FROM pg_roles WHERE rolname = "@extschema@".pgwrh_replica_role_name())
237 | GROUP BY 1, 2 -- make sure we produce empty set when no results
238 |
239 | UNION ALL
240 | -- Clean up
241 | SELECT
242 | FALSE,
243 | TRUE,
244 | format('Dropping no longer needed roles [%s]', string_agg(u.rolname, ', ')),
245 | array_agg(format('DROP ROLE %I', u.rolname))
246 | FROM
247 | pg_roles u
248 | JOIN pg_auth_members ON member = u.oid
249 | JOIN pg_roles gr ON gr.oid = roleid AND gr.rolname = "@extschema@".pgwrh_replica_role_name()
250 | WHERE
251 | NOT EXISTS (SELECT 1 FROM roles WHERE u.rolname = username)
252 | GROUP BY 1, 2 -- make sure we produce empty set when no results
253 |
254 | UNION ALL
255 | -- Grant USAGE on local shards view schemas
256 | SELECT
257 | FALSE,
258 | TRUE,
259 | format('Found view shard schemas [%s] without proper access rights for other replicas', string_agg(view_schema_name, ', ')),
260 | ARRAY[
261 | format('GRANT USAGE ON SCHEMA %s TO %I', string_agg(quote_ident(view_schema_name), ', '), pgwrh_replica_role_name())
262 | ]
263 | FROM
264 | view_schema s
265 | JOIN pg_namespace n ON n.nspname = s.view_schema_name
266 | JOIN pg_roles ON
267 | rolname = pgwrh_replica_role_name()
268 | AND NOT has_schema_privilege(rolname, n.oid, 'USAGE')
269 |
270 | GROUP BY 1, 2
271 |
272 | UNION ALL
273 | -- Create single table infrastructure: slot, template tables and views
274 | SELECT
275 | FALSE,
276 | TRUE,
277 | format('Found new shards [%s]. Preparing slot tables.', string_agg(reg_class::text, ', ')),
278 | array_agg(
279 | format('ALTER TABLE %s DETACH PARTITION %s',
280 | (parent).reg_class,
281 | reg_class
282 | )
283 | )
284 | ||
285 | array_agg(
286 | format('CREATE TABLE %s PARTITION OF %s %s PARTITION BY %s',
287 | fqn(slot_rel_id),
288 | (parent).reg_class,
289 | (lr).bound,
290 | pg_get_partkeydef((parent).pc.oid)
291 | )
292 | )
293 | ||
294 | array_agg(add_ext_dependency(slot_rel_id))
295 | ||
296 | array_agg(
297 | format('CREATE TABLE %s PARTITION OF %s %s PARTITION BY %s',
298 | fqn(template_rel_id),
299 | fqn(slot_rel_id),
300 | (lr).bound,
301 | pg_get_partkeydef((parent).pc.oid)
302 | )
303 | )
304 | ||
305 | array_agg(add_ext_dependency(template_rel_id))
306 | ||
307 | array_agg(
308 | format('CREATE VIEW %s AS SELECT * FROM %s', fqn(view_rel_id), (lr).reg_class)
309 | )
310 | ||
311 | array_agg(
312 | format('GRANT SELECT ON %s TO %I', fqn(view_rel_id), pgwrh_replica_role_name())
313 | )
314 | ||
315 | array_agg(add_ext_dependency(view_rel_id))
316 | FROM
317 | shard_assignment sc
318 | JOIN pg_namespace sns ON sns.nspname = slot_schema_name
319 | JOIN pg_namespace tns ON tns.nspname = template_schema_name
320 | JOIN pg_namespace vns ON vns.nspname = view_schema_name
321 | WHERE
322 | parent IS NOT NULL
323 | AND (parent).pn.oid <> sns.oid
324 | GROUP BY
325 | 1, 2
326 |
327 | UNION ALL
328 | -- Attach ready local shards to slots replacing existing attachments if necessary
329 | -- TODO partition check constraints handling to speed up attaching local shards
330 | SELECT
331 | FALSE,
332 | TRUE,
333 | format('Attaching local shards [%s] to slots', string_agg(format('%s', ready_shard.reg_class), ', ')),
334 | array_agg(format('ALTER TABLE %s DETACH PARTITION %s',
335 | slot.reg_class,
336 | i.inhrelid::regclass
337 | )
338 | ) FILTER (WHERE i IS NOT NULL)
339 | ||
340 | array_agg(format('ALTER TABLE %s ATTACH PARTITION %s %s',
341 | slot.reg_class,
342 | ready_shard.reg_class,
343 | slot.bound
344 | )
345 | )
346 | FROM
347 | shard_assignment sa
348 | JOIN local_rel slot ON slot.rel_id = sa.slot_rel_id
349 | JOIN ready_local_shard ready_shard ON sa.rel_id = ready_shard.rel_id
350 | LEFT JOIN pg_inherits i ON i.inhparent = slot.reg_class
351 | WHERE
352 | ready_shard.reg_class IS DISTINCT FROM i.inhrelid
353 | AND sa.local
354 | AND NOT sa.connect_remote
355 | GROUP BY 1, 2
356 |
357 | UNION ALL
358 | -- Attach ready remote shards to slots replacing
359 | -- existing attachments if necessary
360 | SELECT
361 | FALSE,
362 | TRUE,
363 | format('Attaching remote shards [%s] to slots', string_agg(format('%s', ready_shard.reg_class), ', ')),
364 | array_agg(format('ALTER TABLE %s DETACH PARTITION %s',
365 | slot.reg_class,
366 | i.inhrelid::regclass
367 | )
368 | ) FILTER (WHERE i IS NOT NULL)
369 | ||
370 | array_agg(format('ALTER TABLE %s ATTACH PARTITION %s %s',
371 | slot.reg_class,
372 | ready_shard.reg_class,
373 | slot.bound
374 | )
375 | )
376 | FROM
377 | shard_assignment sa
378 | JOIN local_rel slot ON slot.rel_id = sa.slot_rel_id
379 | JOIN ready_remote_shard ready_shard ON sa.remote_rel_id = ready_shard.rel_id
380 | LEFT JOIN pg_inherits i ON i.inhparent = slot.reg_class
381 | WHERE
382 | ready_shard.reg_class IS DISTINCT FROM i.inhrelid
383 | AND
384 | sa.connect_remote
385 | GROUP BY 1, 2
386 |
387 | UNION ALL
388 | -- Subscriptions
389 | SELECT
390 | FALSE,
391 | FALSE,
392 | format('Adding missing shards [%s] to subscription [%s]', string_agg((sc).reg_class::text, ', '), s.subname),
393 | ARRAY[
394 | format('TRUNCATE %s',
395 | string_agg((sc).reg_class::text, ', ')
396 | ),
397 | format('ALTER SUBSCRIPTION %I ADD PUBLICATION %s WITH (copy_data = true)',
398 | s.subname,
399 | string_agg(quote_ident(sc.pubname), ', ')
400 | )
401 | ]
402 | FROM
403 | local_shard sc JOIN owned_subscription s USING (subname)
404 | WHERE
405 | NOT EXISTS (
406 | SELECT 1 FROM unnest(s.subpublications) AS pub(name)
407 | WHERE pub.name = sc.pubname
408 | )
409 | GROUP BY
410 | s.subname
411 |
412 | UNION ALL
413 | -- create missing indexes
414 | SELECT * FROM
415 | (
416 | SELECT
417 | TRUE,
418 | TRUE,
419 | format('Creating missing index [%s] ON [%s]', index_name, reg_class),
420 | ARRAY[
421 | format('CREATE INDEX IF NOT EXISTS %I ON %s %s',
422 | index_name,
423 | reg_class,
424 | index_template
425 | ),
426 | add_ext_dependency(((rel_id).schema_name, index_name)::rel_id)
427 | ]
428 | FROM
429 | missing_index
430 | WHERE
431 | -- there is no way to find out what index is being created
432 | -- so we only allow one concurrent indexing for any given table
433 | NOT EXISTS (
434 | SELECT 1 FROM pg_stat_progress_create_index WHERE relid = reg_class
435 | )
436 | LIMIT
437 | -- make sure no more than max_worker_processes/2 indexing operations at the same time
438 | greatest(0, current_setting('max_worker_processes')::int/2 - (SELECT count(*) FROM pg_stat_progress_create_index))
439 | ) AS sub
440 |
441 | UNION ALL
442 | -- DROP indexes not defined in index_template
443 | -- make sure we do not drop constraint indexes
444 | SELECT
445 | FALSE,
446 | TRUE,
447 | format('Dropping unnecessary indexes [%s] on %s', string_agg(i.indexrelid::regclass::text, ', '), string_agg(reg_class::text, ', ')),
448 | ARRAY[
449 | format('DROP INDEX %s', string_agg(i.indexrelid::regclass::text, ', '))
450 | ]
451 | FROM
452 | pg_index i
453 | JOIN pg_class ic ON ic.oid = i.indexrelid
454 | JOIN shard_assignment sa ON sa.reg_class = i.indrelid
455 | WHERE
456 | NOT EXISTS (SELECT 1 FROM
457 | shard_index t
458 | WHERE ic.relname = t.index_name AND i.indrelid = reg_class
459 | )
460 | AND NOT EXISTS (SELECT 1 FROM
461 | pg_constraint
462 | WHERE conindid = i.indexrelid
463 | )
464 | GROUP BY 1, 2
465 |
466 | UNION ALL
467 | -- DROP subscriptions for no longer hosted shards
468 | SELECT
469 | FALSE,
470 | FALSE,
471 | format('Dropping subscribed publications for no longer hosted shards [%s]', string_agg(pub.name, ', ')),
472 | ARRAY[
473 | format('ALTER SUBSCRIPTION %I DROP PUBLICATION %s',
474 | s.subname,
475 | string_agg(quote_ident(pub.name), ', ')
476 | ),
477 | -- FIXME There is a race condition here when cascade delete shard schemas
478 | (
479 | SELECT format('TRUNCATE %s', string_agg(srrelid::regclass::text, ', '))
480 | FROM
481 | pg_subscription_rel
482 | WHERE
483 | srsubid = s.oid
484 | AND NOT EXISTS (
485 | SELECT 1 FROM local_shard WHERE reg_class = srrelid
486 | )
487 | )
488 | ]
489 | FROM
490 | owned_subscription s, unnest(s.subpublications) pub(name)
491 | WHERE
492 | NOT EXISTS (
493 | SELECT 1 FROM local_shard WHERE pubname = pub.name
494 | )
495 | AND pub.name NOT IN ('pgwrh_controller_ping')
496 | GROUP BY
497 | s.oid, s.subname
498 |
499 | ----- REMOTE SHARDS ------
500 | UNION ALL
501 | -- create missing foreign servers
502 | SELECT
503 | FALSE,
504 | TRUE,
505 | format('Found foreign servers [%s] to create.', string_agg(format('%I', shard_server_name), ', ')),
506 | array_agg(
507 | format('CREATE SERVER IF NOT EXISTS %I FOREIGN DATA WRAPPER postgres_fdw OPTIONS
508 | ( host %L, port %L, dbname %L,
509 | load_balance_hosts ''random'',
510 | async_capable ''true'',
511 | updatable ''false'',
512 | truncatable ''false'',
513 | extensions %L,
514 | fdw_tuple_cost ''99999'',
515 | analyze_sampling ''system'')',
516 | shard_server_name,
517 | host, port,
518 | dbname,
519 | (SELECT string_agg(extname, ', ') FROM pg_extension) -- assume remote server has all the same extensions
520 | )
521 | )
522 | ||
523 | array_agg(
524 | format('CREATE USER MAPPING FOR PUBLIC SERVER %I OPTIONS (user %L, password %L)',
525 | shard_server_name,
526 | username,
527 | password
528 | ))
529 | ||
530 | array_agg(select_add_ext_dependency('pg_foreign_server'::regclass, 'srvname', shard_server_name))
531 | FROM
532 | shard_server
533 | JOIN roles ON shard_server_user = username
534 | WHERE
535 | NOT EXISTS (SELECT 1 FROM pg_foreign_server WHERE srvname = shard_server_name)
536 | GROUP BY 1, 2
537 |
538 | UNION ALL
539 | -- create missing remote schemas
540 | SELECT
541 | FALSE,
542 | TRUE,
543 | format('Found remote schemas [%s] to create.', string_agg(shard_server_schema_name, ', ')),
544 | array_agg(format('CREATE SCHEMA IF NOT EXISTS %I', shard_server_schema_name))
545 | ||
546 | array_agg(select_add_ext_dependency('pg_namespace'::regclass, format('%L::regnamespace', shard_server_schema_name)))
547 | FROM
548 | shard_server_schema
549 | WHERE
550 | NOT EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = shard_server_schema_name)
551 | GROUP BY 1, 2
552 |
553 | UNION ALL
554 | -- Create missing remote shards
555 | SELECT
556 | FALSE,
557 | TRUE,
558 | format('Creating missing remote shards [%s]', string_agg(fqn(remote_rel_id), ', ')),
559 | array_agg(
560 | format('CREATE FOREIGN TABLE %s PARTITION OF %s %s SERVER %I OPTIONS (schema_name %L)',
561 | fqn(remote_rel_id),
562 | template.reg_class,
563 | slot.bound,
564 | shard_server_name,
565 | (sa).view_schema_name
566 | )
567 | )
568 | ||
569 | array_agg(add_ext_dependency(remote_rel_id))
570 | ||
571 | array_agg(
572 | format('ALTER TABLE %s DETACH PARTITION %s',
573 | template.reg_class,
574 | fqn(remote_rel_id)
575 | )
576 | )
577 | FROM
578 | shard_assignment sa
579 | JOIN local_rel template ON template.rel_id = sa.template_rel_id
580 | JOIN local_rel slot ON slot.rel_id = sa.slot_rel_id
581 | JOIN pg_namespace ns ON ns.nspname = shard_server_schema_name
582 | JOIN pg_foreign_server fs ON fs.srvname = shard_server_name
583 | WHERE
584 | NOT EXISTS (SELECT 1 FROM
585 | rel
586 | WHERE rel_id = remote_rel_id
587 | )
588 | GROUP BY 1, 2
589 |
590 | UNION ALL
591 | -- Analyze remote shards in parallel
592 | SELECT
593 | TRUE,
594 | TRUE,
595 | format('Analyze remote shards [%s]', reg_class),
596 | ARRAY [
597 | format('ANALYZE %s', reg_class),
598 | format('INSERT INTO "@extschema@".analyzed_remote_pg_class (oid) VALUES (%s) ON CONFLICT DO NOTHING', reg_class::oid)
599 | ]
600 | FROM (
601 | SELECT
602 | rs.reg_class
603 | FROM
604 | remote_shard rs
605 | JOIN shard_assignment ON rs.rel_id IN (remote_rel_id, retained_remote_rel_id)
606 | WHERE
607 | NOT EXISTS (SELECT 1 FROM
608 | pg_statistic s
609 | WHERE s.starelid = rs.reg_class
610 | )
611 | AND NOT EXISTS (SELECT 1 FROM
612 | analyzed_remote_pg_class
613 | WHERE oid = rs.reg_class
614 | )
615 | AND NOT EXISTS (SELECT 1 FROM
616 | pg_stat_progress_analyze
617 | WHERE
618 | datname = current_database()
619 | AND relid = rs.reg_class
620 | )
621 | -- run maximum 5 background analysis concurrently
622 | LIMIT greatest(
623 | 0,
624 | least(
625 | 5,
626 | current_setting('max_worker_processes')::int - 6 - (SELECT count(*) FROM pg_stat_progress_analyze WHERE datname = current_database())))
627 | ) sub
628 |
629 | UNION ALL
630 | -- DROP remote shards no longer in use
631 | SELECT
632 | FALSE,
633 | TRUE,
634 | format('Dropping remote shards [%s] no longer in use', string_agg(reg_class::text, ', ')),
635 | ARRAY[
636 | format('DROP FOREIGN TABLE IF EXISTS %s', string_agg(reg_class::text, ', '))
637 | ]
638 | FROM
639 | remote_shard rs
640 | WHERE
641 | NOT EXISTS (SELECT 1 FROM
642 | shard_assignment
643 | WHERE
644 | rs.rel_id IN (remote_rel_id, retained_remote_rel_id)
645 | )
646 | GROUP BY 1, 2
647 |
648 | UNION ALL
649 | -- Update foreign servers with updated host/port if changed
650 | SELECT
651 | FALSE,
652 | TRUE,
653 | format('Found modified host and port for server %I', srvname),
654 | ARRAY[
655 | cmd
656 | ]
657 | FROM
658 | owned_server
659 | JOIN shard_server ON srvname = shard_server_name,
660 | update_server_options(srvname, srvoptions, host, port) AS cmd
661 |
662 | UNION ALL
663 | -- Update user mapping with updated user/pass if changed
664 | SELECT
665 | FALSE,
666 | TRUE,
667 | format('Found modified user and pass for server %I', s.srvname),
668 | ARRAY[
669 | cmd
670 | ]
671 | FROM
672 | owned_server s
673 | JOIN pg_user_mappings um ON um.srvid = s.oid AND um.umuser = 0
674 | JOIN shard_server ON s.srvname = shard_server_name
675 | JOIN roles ON shard_server_user = username,
676 | update_user_mapping(s.srvname, umoptions, username, password) AS cmd
677 |
678 | UNION ALL
679 | -- DROP remote servers (and all dependent objects) for non-existent remote shards
680 | SELECT
681 | FALSE,
682 | TRUE,
683 | format('Found server %s for non-existent shard. Dropping.', string_agg(srvname, ', ')),
684 | array_agg(format('DROP SERVER IF EXISTS %I CASCADE', srvname))
685 | FROM
686 | owned_server fs
687 | WHERE
688 | fs.srvname <> 'replica_controller'
689 | AND NOT EXISTS (SELECT 1 FROM
690 | shard_assignment WHERE fs.srvname IN (shard_server_name, retained_shard_server_name)
691 | )
692 | GROUP BY 1, 2 -- make sure we produce empty set when no results
693 |
694 |
695 | )
696 | SELECT
697 | *
698 | FROM
699 | scripts
700 | ;
701 | -- FIXME should it be PUBLIC?
702 | GRANT SELECT ON sync TO PUBLIC;
703 |
704 | CREATE FUNCTION cleanup_analyzed_pg_class() RETURNS void LANGUAGE sql AS
705 | $$
706 | DELETE
707 | FROM "@extschema@".analyzed_remote_pg_class ac
708 | WHERE
709 | NOT EXISTS (SELECT 1 FROM pg_class WHERE oid = ac.oid)
710 | $$;
711 |
--------------------------------------------------------------------------------
/src/replica/tables.sql:
--------------------------------------------------------------------------------
1 | -- name: replica-tables
2 |
3 | -- pgwrh
4 | -- Copyright (C) 2024 Michal Kleczek
5 |
6 | -- This program is free software: you can redistribute it and/or modify
7 | -- it under the terms of the GNU Affero General Public License as published by
8 | -- the Free Software Foundation, either version 3 of the License, or
9 | -- (at your option) any later version.
10 |
11 | -- This program is distributed in the hope that it will be useful,
12 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | -- GNU Affero General Public License for more details.
15 |
16 | -- You should have received a copy of the GNU Affero General Public License
17 | -- along with this program. If not, see .
18 |
19 | CREATE TABLE IF NOT EXISTS shard_subscription (
20 | subname text NOT NULL PRIMARY KEY
21 | );
22 |
23 | CREATE TABLE IF NOT EXISTS analyzed_remote_pg_class (
24 | oid oid NOT NULL PRIMARY KEY
25 | );
26 |
--------------------------------------------------------------------------------
/test/master.sql:
--------------------------------------------------------------------------------
1 | -- pgwrh
2 | -- Copyright (C) 2024 Michal Kleczek
3 |
4 | -- This program is free software: you can redistribute it and/or modify
5 | -- it under the terms of the GNU Affero General Public License as published by
6 | -- the Free Software Foundation, either version 3 of the License, or
7 | -- (at your option) any later version.
8 |
9 | -- This program is distributed in the hope that it will be useful,
10 | -- but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | -- GNU Affero General Public License for more details.
13 |
14 | -- You should have received a copy of the GNU Affero General Public License
15 | -- along with this program. If not, see .
16 |
17 | CREATE ROLE test_replica;
18 |
19 | CREATE SCHEMA IF NOT EXISTS test;
20 | CREATE SCHEMA IF NOT EXISTS test_shards;
21 | ALTER SCHEMA test_shards OWNER TO test_replica;
22 |
23 | --GRANT USAGE ON SCHEMA test_shards TO test_replica;
24 | --ALTER DEFAULT PRIVILEGES IN SCHEMA test_shards GRANT SELECT ON TABLES TO test_replica;
25 |
26 | CREATE TABLE test.my_data (col1 text, col2 text, col3 date) PARTITION BY RANGE (col3);
27 |
28 | CREATE OR REPLACE PROCEDURE test.create_year_shard(year int) LANGUAGE plpgsql AS
29 | $$
30 | DECLARE
31 | r record;
32 | BEGIN
33 | EXECUTE format('CREATE TABLE test.my_data_%1$s PARTITION OF test.my_data FOR VALUES FROM (make_date(%1$s, 1, 1)) TO (make_date(%2$s, 1, 1)) PARTITION BY HASH (col2)', year, year + 1);
34 | FOR r IN
35 | SELECT
36 | format('CREATE TABLE test_shards.my_data_%1$s_%2$s PARTITION OF test.my_data_%1$s (PRIMARY KEY (col1)) FOR VALUES WITH (MODULUS 16, REMAINDER %2$s)', year, rem) stmt,
37 | format('ALTER TABLE test_shards.my_data_%1$s_%2$s OWNER TO test_replica', year, rem) own
38 | FROM generate_series(0, 15) rem
39 | LOOP
40 | EXECUTE r.stmt;
41 | EXECUTE r.own;
42 | END LOOP;
43 | END
44 | $$;
45 |
46 | DO
47 | $$
48 | DECLARE
49 | year int;
50 | BEGIN
51 | FOR year IN 2022..2025 LOOP
52 | CALL test.create_year_shard(year);
53 | END LOOP;
54 | END
55 | $$;
56 |
57 | CREATE OR REPLACE PROCEDURE test.insert_test_data(years VARIADIC int[]) LANGUAGE sql AS
58 | $$
59 | INSERT INTO test.my_data
60 | SELECT 'col1: ' || n, 'col2: ' || n, make_date(year, 1, 1) + n FROM unnest(years) AS year, generate_series(1, 300, 1) as n;
61 | $$;
62 |
63 | CALL test.insert_test_data(2022, 2023, 2024, 2025);
64 |
65 | INSERT INTO pgwrh.replication_group
66 | (replication_group_id, username, password)
67 | VALUES
68 | ('g1', 'u', 'p');
69 | INSERT INTO pgwrh.sharded_table
70 | (replication_group_id, sharded_table_schema, sharded_table_name, replication_factor)
71 | VALUES
72 | ('g1', 'test', 'my_data', 20),
73 | ('g1', 'test', 'my_data_2025', 100),
74 | ('g1', 'test', 'my_data_2024', 50);
75 |
76 | -- SELECT pgwrh.add_shard_host('g1', 'h1', 'localhost', 5533);
77 | -- SELECT pgwrh.add_shard_host('g1', 'h2', 'localhost', 5534);
78 | -- SELECT pgwrh.add_shard_host('g1', 'h3', 'localhost', 5535);
79 | -- SELECT pgwrh.add_shard_host('g1', 'h4', 'localhost', 5536);
80 |
--------------------------------------------------------------------------------
/test/requirements.txt:
--------------------------------------------------------------------------------
1 | # pgwrh
2 | # Copyright (C) 2024 Michal Kleczek
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Affero General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with this program. If not, see .
16 |
17 | asn1crypto==1.5.1
18 | iniconfig==2.0.0
19 | logging==0.4.9.6
20 | packaging==24.2
21 | pg8000==1.31.2
22 | pluggy==1.5.0
23 | port-for==0.7.4
24 | psutil==6.1.1
25 | pytest==8.3.4
26 | python-dateutil==2.9.0.post0
27 | scramp==1.4.5
28 | six==1.17.0
29 | testgres==1.10.3
30 |
--------------------------------------------------------------------------------
/test/test.py:
--------------------------------------------------------------------------------
1 | # pgwrh
2 | # Copyright (C) 2024 Michal Kleczek
3 |
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU Affero General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 |
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU Affero General Public License for more details.
13 |
14 | # You should have received a copy of the GNU Affero General Public License
15 | # along with this program. If not, see .
16 |
17 | import pytest
18 | import time
19 |
20 | from testgres import get_new_node, scoped_config
21 | from contextlib import ExitStack
22 |
23 | @pytest.fixture
24 | def new_postgres_node():
25 | with ExitStack() as stack:
26 | with scoped_config(use_python_logging=True):
27 | def do(name, master=False):
28 | node = get_new_node(name)
29 | stack.enter_context(node)
30 | node.init(allow_logical=master)
31 | (node
32 | .append_conf('max_worker_processes = 100')
33 | .append_conf('max_replication_slots = 100')
34 | .append_conf('max_wal_senders = 100'))
35 | node.start()
36 | node.execute('CREATE EXTENSION pgwrh CASCADE')
37 | return node
38 | yield do
39 |
40 | @pytest.fixture
41 | def master(new_postgres_node):
42 | pg_node = new_postgres_node('master', True)
43 | pg_node.psql(filename='masteride.sql')
44 | class Master:
45 | port = pg_node.port
46 | def with_node(self, action):
47 | action(pg_node)
48 | def publish_config_version(self):
49 | pg_node.execute('select pgwrh.mark_pending_version_ready(\'g1\')')
50 | def register_replica(self, replica):
51 | user = replica.name
52 | password = replica.name
53 | with pg_node.connect() as mc:
54 | mc.begin()
55 | mc.execute(f'CREATE USER {user} PASSWORD \'{password}\' REPLICATION IN ROLE test_replica;')
56 | mc.execute(f'SELECT pgwrh.add_shard_host(\'g1\', \'{user}\', \'localhost\', {replica.port})')
57 | mc.commit()
58 | return (replica.name, replica.name)
59 | def delete_pending_version(self):
60 | pg_node.execute('select pgwrh.delete_pending_version(\'g1\')')
61 | def assert_same_result(self, query, replicas):
62 | assert all(query(pg_node) == result for result in map(query, replicas))
63 | return Master()
64 |
65 | @pytest.fixture
66 | def register_replicas(master):
67 | def do(replicas):
68 | for replica in replicas:
69 | (user, password) = master.register_replica(replica)
70 | replica.execute(f'SELECT pgwrh.configure_controller(\'localhost\', \'{master.port}\', \'{user}\', \'{password}\', refresh_seconds := 0)')
71 | return do
72 |
73 | def poll_ready(replicas):
74 | for replica in replicas:
75 | replica.poll_query_until('SELECT pgwrh.replica_ready()')
76 |
77 | def test_dummy(master, register_replicas, new_postgres_node):
78 | replica1 = new_postgres_node('replica1')
79 | replica2 = new_postgres_node('replica2')
80 |
81 | replicas = [replica1, replica2]
82 |
83 | register_replicas(replicas)
84 |
85 | poll_ready(replicas)
86 |
87 | try:
88 | print(f'Count: {replica1.execute('select count(*) from test.my_data')[0]}')
89 | pytest.fail('Should have failed with fdw connection error')
90 | except:
91 | pass
92 |
93 | master.publish_config_version()
94 |
95 | poll_ready(replicas)
96 |
97 | query = lambda r: r.execute('select count(*) from test.my_data')[0]
98 | master.assert_same_result(query, replicas)
99 |
100 | replica3 = new_postgres_node('replica3')
101 | try:
102 | register_replicas([replica3])
103 | pytest.fail('Should faile with locked version')
104 | except:
105 | pass
106 |
107 | master.delete_pending_version()
108 | register_replicas([replica3])
109 | replicas.append(replica3)
110 |
111 | poll_ready(replicas)
112 |
113 | master.assert_same_result(query, replicas)
114 |
115 | master.publish_config_version()
116 |
117 | poll_ready(replicas)
118 |
119 | master.assert_same_result(query, replicas)
120 |
--------------------------------------------------------------------------------