├── .cirrus.yml ├── .clang-format ├── .editorconfig ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── pg_failover_slots.c └── t ├── 010_slot_sync.pl ├── 020_physical_before_logical.pl └── 030_failover.pl /.cirrus.yml: -------------------------------------------------------------------------------- 1 | env: 2 | DEBIAN_FRONTEND: noninteractive 3 | LANG: C 4 | 5 | task: 6 | name: Linux (Debian/Ubuntu) 7 | matrix: 8 | - container: 9 | image: ubuntu:22.04 10 | env: 11 | matrix: 12 | - PGVERSION: 16 13 | - PGVERSION: 15 14 | - PGVERSION: 14 15 | - PGVERSION: 13 16 | - PGVERSION: 12 17 | - PGVERSION: 11 18 | setup_script: 19 | - apt-get update 20 | - apt-get -y install curl gnupg lsb-release 21 | - curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - 22 | - echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" | tee /etc/apt/sources.list.d/pgdg.list 23 | - apt-get update 24 | - apt-get -y install gcc make postgresql-$PGVERSION postgresql-server-dev-$PGVERSION libkrb5-dev libipc-run-perl 25 | - pg_createcluster --start $PGVERSION test -p 55435 -- -A trust 26 | - useradd user 27 | - chown -R user . 28 | build_script: 29 | - PATH=/usr/lib/postgresql/$PGVERSION/bin:$PATH 30 | - su user -c "make all" 31 | - make install 32 | test_script: 33 | - PATH=/usr/lib/postgresql/$PGVERSION/bin:$PATH 34 | - su user -c "make installcheck" 35 | on_failure: 36 | testrun_artifacts: 37 | paths: 38 | - "**/*.log" 39 | - "**/*.diffs" 40 | - "**/regress_log_*" 41 | type: text/plain 42 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # -*- yaml -*- 2 | # git ls-files -i -x '*.[ch]' | xargs clang-format -i 3 | --- 4 | Language: Cpp 5 | # BasedOnStyle: LLVM 6 | 7 | # true would be better here. but it's bugged in combination with 8 | # "PointerAlignment: Right" which we also use as is more important 9 | AlignConsecutiveDeclarations: false 10 | AlignEscapedNewlines: Right 11 | AllowShortFunctionsOnASingleLine: None 12 | AlwaysBreakAfterDefinitionReturnType: true 13 | BreakBeforeBraces: Allman 14 | BreakBeforeTernaryOperators: false 15 | BreakConstructorInitializersBeforeComma: true 16 | BreakStringLiterals: false 17 | ColumnLimit: 79 18 | ForEachMacros: 19 | - foreach 20 | - forboth 21 | - dlist_foreach 22 | - dlist_foreach_modify 23 | - slist_foreach 24 | - slist_foreach_modify 25 | IncludeBlocks: Preserve 26 | IncludeCategories: # c.h and postgres.h should be first 27 | - Regex: '.*' 28 | Priority: 1 29 | - Regex: '^' 30 | Priority: -1 31 | - Regex: '^' 32 | Priority: -1 33 | IndentCaseLabels: true 34 | IndentWidth: 4 35 | MacroBlockBegin: "PG_TRY();|PG_CATCH();" 36 | MacroBlockEnd: "PG_END_TRY();" 37 | MaxEmptyLinesToKeep: 3 38 | PointerAlignment: Right 39 | SpaceAfterCStyleCast: true 40 | TabWidth: 4 41 | UseTab: Always 42 | ... 43 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.{c,h,pl,pm}] 4 | indent_style = tab 5 | indent_size = tab 6 | tab_width = 4 7 | 8 | [*.{sql,md,yml}] 9 | indent_style = space 10 | indent_size = 2 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tmp_check*/ 2 | *~ 3 | *.swo 4 | *.swp 5 | *.o 6 | *.so 7 | *.dylib 8 | *.gcov 9 | *.gcov.out 10 | *.gcda 11 | *.gcno 12 | *.bc 13 | .DS_Store 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Postgres Failover Slots (pg_failover_slots) 2 | 3 | Copyright (c) 2023, EnterpriseDB Corporation. 4 | 5 | Permission to use, copy, modify, and distribute this software and its 6 | documentation for any purpose, without fee, and without a written agreement is 7 | hereby granted, provided that the above copyright notice and this paragraph and 8 | the following two paragraphs appear in all copies. 9 | 10 | IN NO EVENT SHALL ENTERPRISEDB CORPORATION BE LIABLE TO ANY PARTY FOR 11 | DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST 12 | PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 13 | ENTERPRISEDB CORPORATION HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | ENTERPRISEDB CORPORATION SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 16 | BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 17 | PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND 18 | ENTERPRISEDB CORPORATION HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, 19 | UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MODULE_big = pg_failover_slots 2 | OBJS = pg_failover_slots.o 3 | 4 | PG_CPPFLAGS += -I $(libpq_srcdir) 5 | SHLIB_LINK += $(libpq) 6 | 7 | TAP_TESTS = 1 8 | 9 | PG_CONFIG = pg_config 10 | PGXS := $(shell $(PG_CONFIG) --pgxs) 11 | include $(PGXS) 12 | 13 | export PGCTLTIMEOUT = 180 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pg_failover_slots 2 | 3 | PG Failover Slots is for anyone with Logical Replication Slots on Postgres databases that are also part of a Physical Streaming Replication architecture. 4 | 5 | Since logical replication slots are only maintained on the primary node, downstream subscribers don't receive any new changes from a newly promoted primary until the slot is created, which is unsafe because the information that includes which data a subscriber has confirmed receiving and which log data still needs to be retained for the subscriber will have been lost, resulting in an unknown gap in data changes. PG Failover Slots makes logical replication slots usable across a physical failover using the following features: 6 | 7 | - Copies any missing replication slots from the primary to the standby 8 | - Removes any slots from the standby that aren't found on the primary 9 | - Periodically synchronizes the position of slots on the standby based on the primary 10 | - Ensures that selected standbys receive data before any of the logical slot walsenders can send data to consumers 11 | 12 | PostgreSQL 11 or higher is required. 13 | 14 | ## How to check the standby is ready 15 | 16 | The slots are not synchronized to the standby immediately, because of 17 | consistency reasons. The standby can be too behind logical slots, or too ahead 18 | of logical slots on primary when the pg_failover_slots module is activated, 19 | so the module does verification and only synchronizes slots when it's 20 | actually safe. 21 | 22 | This, however brings a need to verify that the slots are synchronized and 23 | that the standby is actually ready to be a failover target with consistent 24 | logical decoding for all slots. This only needs to be done initially, once 25 | the slots are synchronized the first time, they will always be consistent as 26 | long as the module is active in the cluster. 27 | 28 | The check for whether slots are fully synchronized with primary is relatively 29 | simple. The slots just need to be present in `pg_replication_slots` view on 30 | standby and have `active` state `false`. An `active` state `true` means the 31 | slots is currently being initialized. 32 | 33 | For example consider the following psql session: 34 | 35 | ```psql 36 | # SELECT slot_name, active FROM pg_replication_slots WHERE slot_type = 'logical'; 37 | slot_name | active 38 | -----------------+-------- 39 | regression_slot1 | f 40 | regression_slot2 | f 41 | regression_slot3 | t 42 | ``` 43 | 44 | This means that slots `regression_slot1` and `regression_slot2` are synchronized 45 | from primary to standby and `regression_slot3` is still being synchronized. If 46 | failover happens at this stage, the `regression_slot3` will be lost. 47 | 48 | Now let's wait a little and query again: 49 | 50 | ```psql 51 | # SELECT slot_name, active FROM pg_replication_slots WHERE slot_type = 'logical'; 52 | slot_name | active 53 | -----------------+-------- 54 | regression_slot1 | f 55 | regression_slot2 | f 56 | regression_slot3 | f 57 | ``` 58 | 59 | Now all the the three slots are synchronized and the standby can be used 60 | for failover without losing logical decoding state for any of them. 61 | 62 | ## Prerequisite settings 63 | 64 | The module throws hard errors if the following settings are not adjusted: 65 | 66 | - `hot_standby_feedback` should be `on` 67 | - `primary_slot_name` should be non-empty 68 | 69 | These are necessary to connect to the primary so it can send the xmin and 70 | catalog_xmin separately over hot_standby_feedback. 71 | 72 | ## Configuration options 73 | 74 | The module itself must be added to `shared_preload_libraries` on both the 75 | primary instance as well as any standby that is used for high availability 76 | (failover or switchover) purposes. 77 | 78 | The behavior of pg_failover_slots is configurable using these configuration 79 | options (set in `postgresql.conf`). 80 | 81 | ### pg_failover_slots.synchronize_slot_names 82 | 83 | This standby option allows setting which logical slots should be synchronized 84 | to this physical standby. It's a comma-separated list of slot filters. 85 | 86 | A slot filter is defined as `key:value` pair (separated by colon) where `key` 87 | can be one of: 88 | 89 | - `name` - specifies to match exact slot name 90 | - `name_like` - specifies to match slot name against SQL `LIKE` expression 91 | - `plugin` - specifies to match slot plugin name against the value 92 | 93 | The `key` can be omitted and will default to `name` in that case. 94 | 95 | For example, `'my_slot_name,plugin:test_decoding'` will 96 | synchronize the slot named "my_slot_name" and any slots that use the test_decoding plugin. 97 | 98 | If this is set to an empty string, no slots will be synchronized to this physical 99 | standby. 100 | 101 | The default value is `'name_like:%'`, which means all logical replication slots 102 | will be synchronized. 103 | 104 | 105 | ### pg_failover_slots.drop_extra_slots 106 | 107 | This standby option controls what happens to extra slots on the standby that are 108 | not found on the primary using the `pg_failover_slots.synchronize_slot_names` filter. 109 | If it's set to true (which is the default), they will be dropped, otherwise 110 | they will be kept. 111 | 112 | ### pg_failover_slots.primary_dsn 113 | 114 | A standby option for specifying the connection string to use to connect to the 115 | primary when fetching slot information. 116 | 117 | If empty (default), then use same connection string as `primary_conninfo`. 118 | 119 | Note that `primary_conninfo` cannot be used if there is a `password` field in 120 | the connection string because it gets obfuscated by PostgreSQL and 121 | pg_failover_slots can't actually see the password. In this case, 122 | `pg_failover_slots.primary_dsn` must be configured. 123 | 124 | ### pg_failover_slots.standby_slot_names 125 | 126 | This option is typically used in failover configurations to ensure that the 127 | failover-candidate streaming physical replica(s) have received and flushed 128 | all changes before they ever become visible to any subscribers. That guarantees 129 | that a commit cannot vanish on failover to a standby for the consumer of a logical 130 | slot. 131 | 132 | Replication slots whose names are listed in the comma-separated 133 | `pg_failover_slots.standby_slot_names` list are treated specially by the 134 | walsender on the primary. 135 | 136 | Logical replication walsenders will ensure that all local changes are sent and 137 | flushed to the replication slots in `pg_failover_slots.standby_slot_names` 138 | before the walsender sends those changes for the logical replication slots. 139 | Effectively, it provides a synchronous replication barrier between the named 140 | list of slots and all the consumers of logically decoded streams from walsender. 141 | 142 | Any replication slot may be listed in `pg_failover_slots.standby_slot_names`; 143 | both logical and physical slots work, but it's generally used for physical 144 | slots. 145 | 146 | Without this safeguard, two anomalies are possible where a commit can be 147 | received by a subscriber and then vanish from the provider on failover because 148 | the failover candidate hadn't received it yet: 149 | 150 | * For 1+ subscribers, the subscriber may have applied the change but the new 151 | provider may execute new transactions that conflict with the received change, 152 | as it never happened as far as the provider is concerned; 153 | 154 | and/or 155 | 156 | * For 2+ subscribers, at the time of failover, not all subscribers have applied 157 | the change. The subscribers now have inconsistent and irreconcilable states 158 | because the subscribers that didn't receive the commit have no way to get it 159 | now. 160 | 161 | Setting `pg_failover_slots.standby_slot_names` will (by design) cause subscribers to 162 | lag behind the provider if the provider's failover-candidate replica(s) are not 163 | keeping up. Monitoring is thus essential. 164 | 165 | ### pg_failover_slots.standby_slots_min_confirmed 166 | 167 | Controls how many of the `pg_failover_slots.standby_slot_names` have to 168 | confirm before we send data through the logical replication 169 | slots. Setting -1 (the default) means to wait for all entries in 170 | `pg_failover_slots.standby_slot_names`. 171 | 172 | ### pg_failover_slots.worker_nap_time 173 | 174 | Time to sleep (in ms) between two synchronisation attempts. Defaults to 60s. 175 | 176 | ### pg_failover_slots.maintenance_db 177 | 178 | Database name to use when using primary_conninfo to connect to the primary server and fetch the replication slots list. 179 | Defaults to `postgres`. 180 | 181 | 182 | ## Release notes 183 | 184 | ### v1.1.0 185 | 186 | Version 1.1.0 contains bug fixes, enhanced configurability, and 187 | support for the most recent PostgreSQL major version. 188 | 189 | - Add support for PostgreSQL 17 190 | 191 | - Bug fix: Do not drop physical slots on standby 192 | 193 | It would previously also drop physical replication slots on the 194 | standby if they did not exist on the primary. This was never the 195 | intention. Now it only touches logical replication slots. 196 | 197 | - New configuration setting: `pg_failover_slots.maintenance_db` 198 | 199 | This value was previously hardcoded. 200 | 201 | - New configuration setting: `pg_failover_slots.worker_nap_time` 202 | 203 | This value was previously hardcoded. 204 | 205 | ### v1.0.1 206 | 207 | Version 1.0.1 fixes several compatibility bugs. 208 | 209 | - Fix support for PG13 and older 210 | 211 | The missing interfaces caused either disconnects or outright crashes on PG13 212 | and older. 213 | 214 | - Test compatibility improvements 215 | 216 | Tests now work on PG11, and are more resilient to testing on slower machines. 217 | 218 | - PG16 compatibility improvements 219 | 220 | - Various minor cleanups 221 | -------------------------------------------------------------------------------- /pg_failover_slots.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Postgres Failover Slots (pg_failover_slots) 3 | * 4 | * Copyright (c) 2023, EnterpriseDB Corporation. 5 | */ 6 | #include "postgres.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "funcapi.h" 14 | #include "miscadmin.h" 15 | #include "pgstat.h" 16 | 17 | #include "access/genam.h" 18 | #if PG_VERSION_NUM >= 120000 19 | #include "access/table.h" 20 | #else 21 | #include "access/heapam.h" 22 | #define table_open heap_open 23 | #define table_close heap_close 24 | #endif 25 | #include "access/xact.h" 26 | #if PG_VERSION_NUM >= 150000 27 | #include "access/xlogrecovery.h" 28 | #endif 29 | 30 | #include "catalog/indexing.h" 31 | #include "catalog/pg_database.h" 32 | 33 | #include "postmaster/bgworker.h" 34 | #if PG_VERSION_NUM >= 130000 35 | #include "postmaster/interrupt.h" 36 | #endif 37 | 38 | #include "replication/decode.h" 39 | #include "replication/logical.h" 40 | #include "replication/slot.h" 41 | #include "replication/walreceiver.h" 42 | #include "replication/walsender.h" 43 | 44 | #include "storage/ipc.h" 45 | #include "storage/procarray.h" 46 | 47 | #include "tcop/tcopprot.h" 48 | 49 | #include "utils/builtins.h" 50 | #include "utils/fmgroids.h" 51 | #include "utils/fmgrprotos.h" 52 | #include "utils/guc.h" 53 | #include "utils/memutils.h" 54 | #include "utils/pg_lsn.h" 55 | #include "utils/resowner.h" 56 | #include "utils/snapmgr.h" 57 | #include "utils/varlena.h" 58 | 59 | #include "libpq-fe.h" 60 | #include "libpq/auth.h" 61 | #include "libpq/libpq.h" 62 | 63 | #define PG_FAILOVER_SLOTS_VERSION "1.1.0" 64 | 65 | PG_MODULE_MAGIC; 66 | 67 | #if PG_VERSION_NUM < 130000 68 | #define SignalHandlerForConfigReload PostgresSigHupHandler 69 | #define GetWalRcvFlushRecPtr GetWalRcvWriteRecPtr 70 | #endif 71 | 72 | #define EXTENSION_NAME "pg_failover_slots" 73 | #define WORKER_WAIT_FEEDBACK 10000L 74 | 75 | typedef struct RemoteSlot 76 | { 77 | char *name; 78 | char *plugin; 79 | char *database; 80 | bool two_phase; 81 | XLogRecPtr restart_lsn; 82 | XLogRecPtr confirmed_lsn; 83 | TransactionId catalog_xmin; 84 | } RemoteSlot; 85 | 86 | typedef enum FailoverSlotFilterKey 87 | { 88 | FAILOVERSLOT_FILTER_NAME = 1, 89 | FAILOVERSLOT_FILTER_NAME_LIKE, 90 | FAILOVERSLOT_FILTER_PLUGIN 91 | } FailoverSlotFilterKey; 92 | 93 | typedef struct FailoverSlotFilter 94 | { 95 | FailoverSlotFilterKey key; 96 | char *val; /* eg: test_decoding */ 97 | } FailoverSlotFilter; 98 | 99 | /* Used for physical-before-logical ordering */ 100 | static char *standby_slot_names_raw; 101 | static char *standby_slot_names_string = NULL; 102 | List *standby_slot_names = NIL; 103 | int standby_slots_min_confirmed; 104 | XLogRecPtr standby_slot_names_oldest_flush_lsn = InvalidXLogRecPtr; 105 | 106 | /* Various configuration */ 107 | int worker_nap_time; 108 | char *pg_failover_maintenance_db; 109 | 110 | /* Slots to sync */ 111 | char *pg_failover_slots_dsn; 112 | char *pg_failover_slot_names; 113 | static char *pg_failover_slot_names_str = NULL; 114 | static List *pg_failover_slot_names_list = NIL; 115 | static bool pg_failover_slots_drop = true; 116 | 117 | char *pg_failover_slots_version_str; 118 | 119 | void _PG_init(void); 120 | PGDLLEXPORT void pg_failover_slots_main(Datum main_arg); 121 | 122 | static bool 123 | check_failover_slot_names(char **newval, void **extra, GucSource source) 124 | { 125 | List *namelist = NIL; 126 | char *rawname = pstrdup(*newval); 127 | bool valid; 128 | 129 | valid = SplitIdentifierString(rawname, ',', &namelist); 130 | 131 | if (!valid) 132 | GUC_check_errdetail("List syntax is invalid."); 133 | 134 | pfree(rawname); 135 | list_free(namelist); 136 | 137 | return valid; 138 | } 139 | 140 | static void 141 | assign_failover_slot_names(const char *newval, void *extra) 142 | { 143 | MemoryContext old_ctx; 144 | List *slot_names_list = NIL; 145 | ListCell *lc; 146 | 147 | /* cleanup memory to prevent leaking or SET/config reload */ 148 | if (pg_failover_slot_names_str) 149 | pfree(pg_failover_slot_names_str); 150 | if (pg_failover_slot_names_list) 151 | { 152 | foreach (lc, pg_failover_slot_names_list) 153 | { 154 | FailoverSlotFilter *filter = lfirst(lc); 155 | 156 | /* val was pointer to pg_failover_slot_names_str */ 157 | pfree(filter); 158 | } 159 | list_free(pg_failover_slot_names_list); 160 | } 161 | 162 | pg_failover_slot_names_list = NIL; 163 | 164 | /* Allocate memory in long lasting context. */ 165 | old_ctx = MemoryContextSwitchTo(TopMemoryContext); 166 | 167 | pg_failover_slot_names_str = pstrdup(newval); 168 | SplitIdentifierString(pg_failover_slot_names_str, ',', &slot_names_list); 169 | 170 | foreach (lc, slot_names_list) 171 | { 172 | char *raw_val = lfirst(lc); 173 | char *key = strtok(raw_val, ":"); 174 | FailoverSlotFilter *filter = palloc(sizeof(FailoverSlotFilter)); 175 | 176 | filter->val = strtok(NULL, ":"); 177 | 178 | /* Default key is name */ 179 | if (!filter->val) 180 | { 181 | filter->val = key; 182 | filter->key = FAILOVERSLOT_FILTER_NAME; 183 | } 184 | else if (strcmp(key, "name") == 0) 185 | filter->key = FAILOVERSLOT_FILTER_NAME; 186 | else if (strcmp(key, "name_like") == 0) 187 | filter->key = FAILOVERSLOT_FILTER_NAME_LIKE; 188 | else if (strcmp(key, "plugin") == 0) 189 | filter->key = FAILOVERSLOT_FILTER_PLUGIN; 190 | else 191 | ereport( 192 | ERROR, 193 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 194 | errmsg( 195 | "unrecognized synchronize_failover_slot_names key \"%s\"", 196 | key))); 197 | 198 | /* Check that there was just one ':' */ 199 | if (strtok(NULL, ":")) 200 | ereport( 201 | ERROR, 202 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 203 | errmsg( 204 | "unrecognized synchronize_failover_slot_names format"))); 205 | 206 | pg_failover_slot_names_list = 207 | lappend(pg_failover_slot_names_list, filter); 208 | } 209 | 210 | /* Clean the temporary list, but not the contents. */ 211 | list_free(slot_names_list); 212 | MemoryContextSwitchTo(old_ctx); 213 | } 214 | 215 | static bool 216 | check_standby_slot_names(char **newval, void **extra, GucSource source) 217 | { 218 | List *namelist = NIL; 219 | char *rawname = pstrdup(*newval); 220 | bool valid; 221 | 222 | valid = SplitIdentifierString(rawname, ',', &namelist); 223 | 224 | if (!valid) 225 | GUC_check_errdetail("List syntax is invalid."); 226 | 227 | pfree(rawname); 228 | list_free(namelist); 229 | 230 | return valid; 231 | } 232 | 233 | static void 234 | assign_standby_slot_names(const char *newval, void *extra) 235 | { 236 | MemoryContext old_ctx; 237 | 238 | if (standby_slot_names_string) 239 | pfree(standby_slot_names_string); 240 | if (standby_slot_names) 241 | list_free(standby_slot_names); 242 | 243 | /* 244 | * We must invalidate our idea of the oldest lsn in all the named slots if 245 | * we might have changed the list. 246 | */ 247 | standby_slot_names_oldest_flush_lsn = InvalidXLogRecPtr; 248 | 249 | old_ctx = MemoryContextSwitchTo(TopMemoryContext); 250 | standby_slot_names_string = pstrdup(newval); 251 | (void) SplitIdentifierString(standby_slot_names_string, ',', 252 | &standby_slot_names); 253 | (void) MemoryContextSwitchTo(old_ctx); 254 | } 255 | 256 | /* 257 | * Get failover slots from upstream 258 | */ 259 | static List * 260 | remote_get_primary_slot_info(PGconn *conn, List *slot_filter) 261 | { 262 | PGresult *res; 263 | int i; 264 | char *op = ""; 265 | List *slots = NIL; 266 | ListCell *lc; 267 | StringInfoData query; 268 | 269 | initStringInfo(&query); 270 | if (PQserverVersion(conn) >= 140000) 271 | { 272 | appendStringInfoString( 273 | &query, 274 | "SELECT slot_name, plugin, database, two_phase, catalog_xmin, restart_lsn, confirmed_flush_lsn" 275 | " FROM pg_catalog.pg_replication_slots" 276 | " WHERE database IS NOT NULL AND ("); 277 | } 278 | else 279 | { 280 | appendStringInfoString( 281 | &query, 282 | "SELECT slot_name, plugin, database, false AS two_phase, catalog_xmin, restart_lsn, confirmed_flush_lsn" 283 | " FROM pg_catalog.pg_replication_slots" 284 | " WHERE database IS NOT NULL AND ("); 285 | } 286 | 287 | foreach (lc, slot_filter) 288 | { 289 | FailoverSlotFilter *filter = lfirst(lc); 290 | 291 | switch (filter->key) 292 | { 293 | case FAILOVERSLOT_FILTER_NAME: 294 | appendStringInfo( 295 | &query, " %s slot_name OPERATOR(pg_catalog.=) %s", op, 296 | PQescapeLiteral(conn, filter->val, strlen(filter->val))); 297 | break; 298 | case FAILOVERSLOT_FILTER_NAME_LIKE: 299 | appendStringInfo( 300 | &query, " %s slot_name LIKE %s", op, 301 | PQescapeLiteral(conn, filter->val, strlen(filter->val))); 302 | break; 303 | case FAILOVERSLOT_FILTER_PLUGIN: 304 | appendStringInfo( 305 | &query, " %s plugin OPERATOR(pg_catalog.=) %s", op, 306 | PQescapeLiteral(conn, filter->val, strlen(filter->val))); 307 | break; 308 | default: 309 | Assert(0); 310 | elog(ERROR, "unrecognized slot filter key %u", filter->key); 311 | } 312 | 313 | op = "OR"; 314 | } 315 | 316 | appendStringInfoString(&query, ")"); 317 | 318 | res = PQexec(conn, query.data); 319 | pfree(query.data); 320 | 321 | if (PQresultStatus(res) != PGRES_TUPLES_OK) 322 | elog(ERROR, "could not fetch slot information from provider: %s\n", 323 | res != NULL ? PQresultErrorMessage(res) : PQerrorMessage(conn)); 324 | 325 | for (i = 0; i < PQntuples(res); i++) 326 | { 327 | RemoteSlot *slot = palloc0(sizeof(RemoteSlot)); 328 | 329 | slot->name = pstrdup(PQgetvalue(res, i, 0)); 330 | slot->plugin = pstrdup(PQgetvalue(res, i, 1)); 331 | slot->database = pstrdup(PQgetvalue(res, i, 2)); 332 | parse_bool(PQgetvalue(res, i, 3), &slot->two_phase); 333 | slot->catalog_xmin = !PQgetisnull(res, i, 4) ? 334 | atoi(PQgetvalue(res, i, 4)) : 335 | InvalidTransactionId; 336 | slot->restart_lsn = 337 | !PQgetisnull(res, i, 5) ? 338 | DatumGetLSN(DirectFunctionCall1( 339 | pg_lsn_in, CStringGetDatum(PQgetvalue(res, i, 5)))) : 340 | InvalidXLogRecPtr; 341 | slot->confirmed_lsn = 342 | !PQgetisnull(res, i, 6) ? 343 | DatumGetLSN(DirectFunctionCall1( 344 | pg_lsn_in, CStringGetDatum(PQgetvalue(res, i, 6)))) : 345 | InvalidXLogRecPtr; 346 | 347 | slots = lappend(slots, slot); 348 | } 349 | 350 | PQclear(res); 351 | 352 | return slots; 353 | } 354 | 355 | static XLogRecPtr 356 | remote_get_physical_slot_lsn(PGconn *conn, const char *slot_name) 357 | { 358 | PGresult *res; 359 | XLogRecPtr lsn; 360 | StringInfoData query; 361 | 362 | initStringInfo(&query); 363 | appendStringInfo(&query, 364 | "SELECT restart_lsn" 365 | " FROM pg_catalog.pg_replication_slots" 366 | " WHERE slot_name OPERATOR(pg_catalog.=) %s", 367 | PQescapeLiteral(conn, slot_name, strlen(slot_name))); 368 | res = PQexec(conn, query.data); 369 | 370 | if (PQresultStatus(res) != PGRES_TUPLES_OK) 371 | elog(ERROR, "could not fetch slot information from provider: %s\n", 372 | res != NULL ? PQresultErrorMessage(res) : PQerrorMessage(conn)); 373 | 374 | if (PQntuples(res) != 1) 375 | elog(ERROR, "physical slot %s not found on primary", slot_name); 376 | 377 | if (PQgetisnull(res, 0, 0)) 378 | lsn = InvalidXLogRecPtr; 379 | else 380 | lsn = DatumGetLSN(DirectFunctionCall1( 381 | pg_lsn_in, CStringGetDatum(PQgetvalue(res, 0, 0)))); 382 | 383 | PQclear(res); 384 | 385 | return lsn; 386 | } 387 | 388 | /* 389 | * Can't use get_database_oid from dbcommands.c because it does not work 390 | * without db connection. 391 | */ 392 | static Oid 393 | get_database_oid(const char *dbname) 394 | { 395 | HeapTuple tuple; 396 | Relation relation; 397 | SysScanDesc scan; 398 | ScanKeyData key[1]; 399 | Oid dboid = InvalidOid; 400 | 401 | /* 402 | * form a scan key 403 | */ 404 | ScanKeyInit(&key[0], Anum_pg_database_datname, BTEqualStrategyNumber, 405 | F_NAMEEQ, CStringGetDatum(dbname)); 406 | 407 | /* 408 | * Open pg_database and fetch a tuple. Force heap scan if we haven't yet 409 | * built the critical shared relcache entries (i.e., we're starting up 410 | * without a shared relcache cache file). 411 | */ 412 | relation = table_open(DatabaseRelationId, AccessShareLock); 413 | scan = systable_beginscan(relation, DatabaseNameIndexId, 414 | criticalSharedRelcachesBuilt, NULL, 1, key); 415 | 416 | tuple = systable_getnext(scan); 417 | 418 | /* Must copy tuple before releasing buffer */ 419 | if (HeapTupleIsValid(tuple)) 420 | #if PG_VERSION_NUM < 120000 421 | dboid = HeapTupleGetOid(tuple); 422 | #else 423 | { 424 | Form_pg_database datForm = (Form_pg_database) GETSTRUCT(tuple); 425 | dboid = datForm->oid; 426 | } 427 | #endif 428 | else 429 | ereport(ERROR, (errcode(ERRCODE_UNDEFINED_DATABASE), 430 | errmsg("database \"%s\" does not exist", dbname))); 431 | 432 | /* all done */ 433 | systable_endscan(scan); 434 | table_close(relation, AccessShareLock); 435 | 436 | return dboid; 437 | } 438 | 439 | /* 440 | * Fill connection string info based on config. 441 | * 442 | * This is slightly complicated because we default to primary_conninfo if 443 | * user didn't explicitly set anything and we might need to request explicit 444 | * database name override, that's why we need dedicated function for this. 445 | */ 446 | static void 447 | make_sync_failover_slots_dsn(StringInfo connstr, char *db_name) 448 | { 449 | Assert(db_name); 450 | if (pg_failover_slots_dsn && strlen(pg_failover_slots_dsn) > 0) 451 | { 452 | if (db_name) 453 | appendStringInfo(connstr, "%s dbname=%s", pg_failover_slots_dsn, 454 | db_name); 455 | else 456 | appendStringInfoString(connstr, pg_failover_slots_dsn); 457 | } 458 | else 459 | { 460 | Assert(WalRcv); 461 | appendStringInfo(connstr, "%s dbname=%s", WalRcv->conninfo, db_name); 462 | } 463 | } 464 | 465 | /* 466 | * Connect to remote pg server 467 | */ 468 | static PGconn * 469 | remote_connect(const char *connstr, const char *appname) 470 | { 471 | #define CONN_PARAM_ARRAY_SIZE 8 472 | int i = 0; 473 | PGconn *conn; 474 | const char *keys[CONN_PARAM_ARRAY_SIZE]; 475 | const char *vals[CONN_PARAM_ARRAY_SIZE]; 476 | StringInfoData s; 477 | 478 | initStringInfo(&s); 479 | appendStringInfoString(&s, connstr); 480 | 481 | keys[i] = "dbname"; 482 | vals[i] = connstr; 483 | i++; 484 | keys[i] = "application_name"; 485 | vals[i] = appname; 486 | i++; 487 | keys[i] = "connect_timeout"; 488 | vals[i] = "30"; 489 | i++; 490 | keys[i] = "keepalives"; 491 | vals[i] = "1"; 492 | i++; 493 | keys[i] = "keepalives_idle"; 494 | vals[i] = "20"; 495 | i++; 496 | keys[i] = "keepalives_interval"; 497 | vals[i] = "20"; 498 | i++; 499 | keys[i] = "keepalives_count"; 500 | vals[i] = "5"; 501 | i++; 502 | keys[i] = NULL; 503 | vals[i] = NULL; 504 | 505 | Assert(i <= CONN_PARAM_ARRAY_SIZE); 506 | 507 | /* 508 | * We use the expand_dbname parameter to process the connection string 509 | * (or URI), and pass some extra options. 510 | */ 511 | conn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true); 512 | if (PQstatus(conn) != CONNECTION_OK) 513 | { 514 | ereport(ERROR, 515 | (errmsg("could not connect to the postgresql server: %s", 516 | PQerrorMessage(conn)), 517 | errdetail("dsn was: %s", s.data))); 518 | } 519 | 520 | resetStringInfo(&s); 521 | 522 | elog(DEBUG2, "established connection to remote backend with pid %d", 523 | PQbackendPID(conn)); 524 | 525 | return conn; 526 | } 527 | 528 | 529 | /* 530 | * Wait for remote slot to pass locally reserved position. 531 | * 532 | * Wait until the slot named in 'remote_slot' on the host at 'conn' has all its 533 | * requirements satisfied by the local slot 'slot' by polling 'conn'. This 534 | * relies on us having already reserved the WAL for the old position of 535 | * `remote_slot` so `slot` can't continue to advance. 536 | */ 537 | static bool 538 | wait_for_primary_slot_catchup(ReplicationSlot *slot, RemoteSlot *remote_slot) 539 | { 540 | List *slots; 541 | PGconn *conn; 542 | StringInfoData connstr; 543 | TimestampTz cb_wait_start = 544 | 0; /* first invocation should happen immediately */ 545 | 546 | elog( 547 | LOG, 548 | "waiting for remote slot %s lsn (%X/%X) and catalog xmin (%u) to pass local slot lsn (%X/%X) and catalog xmin (%u)", 549 | remote_slot->name, (uint32) (remote_slot->restart_lsn >> 32), 550 | (uint32) (remote_slot->restart_lsn), remote_slot->catalog_xmin, 551 | (uint32) (slot->data.restart_lsn >> 32), 552 | (uint32) (slot->data.restart_lsn), slot->data.catalog_xmin); 553 | 554 | initStringInfo(&connstr); 555 | /* 556 | * Append the dbname of the remote slot. We don't use a generic db 557 | * like postgres here because plugin callback bellow might want to invoke 558 | * extension functions. 559 | */ 560 | make_sync_failover_slots_dsn(&connstr, remote_slot->database); 561 | 562 | conn = remote_connect(connstr.data, "pg_failover_slots"); 563 | pfree(connstr.data); 564 | 565 | for (;;) 566 | { 567 | RemoteSlot *new_slot; 568 | int rc; 569 | FailoverSlotFilter *filter = palloc(sizeof(FailoverSlotFilter)); 570 | XLogRecPtr receivePtr; 571 | 572 | CHECK_FOR_INTERRUPTS(); 573 | 574 | if (!RecoveryInProgress()) 575 | { 576 | /* 577 | * The remote slot didn't pass the locally reserved position 578 | * at the time of local promotion, so it's not safe to use. 579 | */ 580 | ereport( 581 | WARNING, 582 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), 583 | errmsg( 584 | "replication slot sync wait for slot %s interrupted by promotion", 585 | remote_slot->name))); 586 | PQfinish(conn); 587 | return false; 588 | } 589 | 590 | filter->key = FAILOVERSLOT_FILTER_NAME; 591 | filter->val = remote_slot->name; 592 | slots = remote_get_primary_slot_info(conn, list_make1(filter)); 593 | 594 | if (!list_length(slots)) 595 | { 596 | /* Slot on provider vanished */ 597 | PQfinish(conn); 598 | return false; 599 | } 600 | 601 | receivePtr = GetWalRcvFlushRecPtr(NULL, NULL); 602 | 603 | Assert(list_length(slots) == 1); 604 | 605 | new_slot = linitial(slots); 606 | if (new_slot->restart_lsn > receivePtr) 607 | new_slot->restart_lsn = receivePtr; 608 | if (new_slot->confirmed_lsn > receivePtr) 609 | new_slot->confirmed_lsn = receivePtr; 610 | 611 | if (new_slot->restart_lsn >= slot->data.restart_lsn && 612 | TransactionIdFollowsOrEquals(new_slot->catalog_xmin, 613 | MyReplicationSlot->data.catalog_xmin)) 614 | { 615 | remote_slot->restart_lsn = new_slot->restart_lsn; 616 | remote_slot->confirmed_lsn = new_slot->confirmed_lsn; 617 | remote_slot->catalog_xmin = new_slot->catalog_xmin; 618 | PQfinish(conn); 619 | return true; 620 | } 621 | 622 | /* 623 | * Invoke any callbacks that will help move the slots along 624 | */ 625 | if (TimestampDifferenceExceeds( 626 | cb_wait_start, GetCurrentTimestamp(), 627 | Min(wal_retrieve_retry_interval * 5, PG_WAIT_EXTENSION))) 628 | { 629 | if (cb_wait_start > 0) 630 | elog( 631 | LOG, 632 | "still waiting for remote slot %s lsn (%X/%X) and catalog xmin (%u) to pass local slot lsn (%X/%X) and catalog xmin (%u)", 633 | remote_slot->name, (uint32) (new_slot->restart_lsn >> 32), 634 | (uint32) (new_slot->restart_lsn), new_slot->catalog_xmin, 635 | (uint32) (slot->data.restart_lsn >> 32), 636 | (uint32) (slot->data.restart_lsn), 637 | slot->data.catalog_xmin); 638 | 639 | cb_wait_start = GetCurrentTimestamp(); 640 | } 641 | 642 | rc = 643 | WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 644 | wal_retrieve_retry_interval, PG_WAIT_EXTENSION); 645 | 646 | if (rc & WL_POSTMASTER_DEATH) 647 | proc_exit(1); 648 | 649 | 650 | ResetLatch(MyLatch); 651 | } 652 | } 653 | 654 | /* 655 | * Synchronize one logical replication slot's state from the master to this 656 | * standby, creating it if necessary. 657 | * 658 | * Note that this only works safely because we know for sure that this is 659 | * executed on standby where primary has another slot which reserves resources 660 | * at the position to which we are moving the local slot to. 661 | * 662 | * This standby uses a physical replication slot to connect to the master so it 663 | * can send the xmin and catalog_xmin separately over hot_standby_feedback. Our 664 | * physical slot on the master ensures the master's catalog_xmin never goes 665 | * below ours after the initial setup period. 666 | */ 667 | static void 668 | synchronize_one_slot(RemoteSlot *remote_slot) 669 | { 670 | int i; 671 | bool found = false; 672 | 673 | if (!RecoveryInProgress()) 674 | { 675 | /* Should only happen when promotion occurs at the same time we sync */ 676 | ereport( 677 | WARNING, 678 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), 679 | errmsg( 680 | "attempted to sync slot from master when not in recovery"))); 681 | return; 682 | } 683 | 684 | SetCurrentStatementStartTimestamp(); 685 | StartTransactionCommand(); 686 | PushActiveSnapshot(GetTransactionSnapshot()); 687 | 688 | /* Search for the named slot locally */ 689 | LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); 690 | for (i = 0; i < max_replication_slots; i++) 691 | { 692 | ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; 693 | 694 | /* Not in use, not interesting. */ 695 | if (!s->in_use) 696 | continue; 697 | 698 | if (strcmp(NameStr(s->data.name), remote_slot->name) == 0) 699 | { 700 | found = true; 701 | break; 702 | } 703 | } 704 | LWLockRelease(ReplicationSlotControlLock); 705 | 706 | /* 707 | * Remote slot exists locally, acquire and move. There's a race here where 708 | * the slot could've been dropped since we checked, but we'll just ERROR 709 | * out in `ReplicationSlotAcquire` and retry next loop so it's harmless. 710 | * 711 | * Moving the slot this way does not do logical decoding. We're not 712 | * processing WAL, we're just updating the slot metadata. 713 | */ 714 | if (found) 715 | { 716 | ReplicationSlotAcquire(remote_slot->name, true); 717 | 718 | /* 719 | * We can't satisfy this remote slot's requirements with our known-safe 720 | * local restart_lsn, catalog_xmin and xmin. 721 | * 722 | * This shouldn't happen for existing slots unless someone else messed 723 | * with our physical replication slot on the master. 724 | */ 725 | if (remote_slot->restart_lsn < MyReplicationSlot->data.restart_lsn || 726 | TransactionIdPrecedes(remote_slot->catalog_xmin, 727 | MyReplicationSlot->data.catalog_xmin)) 728 | { 729 | elog( 730 | WARNING, 731 | "not synchronizing slot %s; synchronization would move it backward", 732 | remote_slot->name); 733 | 734 | ReplicationSlotRelease(); 735 | PopActiveSnapshot(); 736 | CommitTransactionCommand(); 737 | return; 738 | } 739 | 740 | LogicalConfirmReceivedLocation(remote_slot->confirmed_lsn); 741 | LogicalIncreaseXminForSlot(remote_slot->confirmed_lsn, 742 | remote_slot->catalog_xmin); 743 | LogicalIncreaseRestartDecodingForSlot(remote_slot->confirmed_lsn, 744 | remote_slot->restart_lsn); 745 | ReplicationSlotMarkDirty(); 746 | ReplicationSlotSave(); 747 | 748 | elog( 749 | DEBUG2, 750 | "synchronized existing slot %s to lsn (%X/%X) and catalog xmin (%u)", 751 | remote_slot->name, (uint32) (remote_slot->restart_lsn >> 32), 752 | (uint32) (remote_slot->restart_lsn), remote_slot->catalog_xmin); 753 | } 754 | /* 755 | * Otherwise create the local slot and initialize it to the state of the 756 | * upstream slot. There's a race here where the slot could've been 757 | * concurrently created, but we'll just ERROR out and retry so it's 758 | * harmless. 759 | */ 760 | else 761 | { 762 | TransactionId xmin_horizon = InvalidTransactionId; 763 | ReplicationSlot *slot; 764 | 765 | /* 766 | * We have to create the slot to reserve its name and resources, but 767 | * don't want it to persist if we fail. 768 | */ 769 | #if PG_VERSION_NUM >= 170000 770 | ReplicationSlotCreate(remote_slot->name, true, RS_EPHEMERAL, 771 | remote_slot->two_phase, false, false); 772 | #elif PG_VERSION_NUM >= 140000 773 | ReplicationSlotCreate(remote_slot->name, true, RS_EPHEMERAL, 774 | remote_slot->two_phase); 775 | #else 776 | ReplicationSlotCreate(remote_slot->name, true, RS_EPHEMERAL); 777 | #endif 778 | slot = MyReplicationSlot; 779 | 780 | SpinLockAcquire(&slot->mutex); 781 | slot->data.database = get_database_oid(remote_slot->database); 782 | strlcpy(NameStr(slot->data.plugin), remote_slot->plugin, NAMEDATALEN); 783 | SpinLockRelease(&slot->mutex); 784 | 785 | /* 786 | * Stop our physical slot from advancing past the position needed 787 | * by the new remote slot by making its reservations locally 788 | * effective. It's OK if we can't guarantee their safety yet, 789 | * the slot isn't visible to anyone else at this point. 790 | */ 791 | ReplicationSlotReserveWal(); 792 | 793 | LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); 794 | xmin_horizon = GetOldestSafeDecodingTransactionId(true); 795 | slot->effective_catalog_xmin = xmin_horizon; 796 | slot->data.catalog_xmin = xmin_horizon; 797 | ReplicationSlotsComputeRequiredXmin(true); 798 | LWLockRelease(ProcArrayLock); 799 | 800 | /* 801 | * Our xmin and/or catalog_xmin may be > that required by one or more 802 | * of the slots we are trying to sync from the master, and/or we don't 803 | * have enough retained WAL for the slot's restart_lsn. 804 | * 805 | * If we persist the slot locally in that state it'll make a false 806 | * promise we can't satisfy. 807 | * 808 | * This can happen if this replica is fairly new or has only recently 809 | * started failover slot sync. 810 | * 811 | * TODO: Don't stop synchronization of other slots for this, we can't 812 | * add timeout because that could result in some slots never being 813 | * synchronized as they will always be behind the physical slot. 814 | */ 815 | if (remote_slot->restart_lsn < MyReplicationSlot->data.restart_lsn || 816 | TransactionIdPrecedes(remote_slot->catalog_xmin, 817 | MyReplicationSlot->data.catalog_xmin)) 818 | { 819 | if (!wait_for_primary_slot_catchup(MyReplicationSlot, remote_slot)) 820 | { 821 | /* Provider slot didn't catch up to locally reserved position 822 | */ 823 | ReplicationSlotRelease(); 824 | PopActiveSnapshot(); 825 | CommitTransactionCommand(); 826 | return; 827 | } 828 | } 829 | 830 | /* 831 | * We can locally satisfy requirements of remote slot's current 832 | * position now. Apply the new position if any and make it persistent. 833 | */ 834 | LogicalConfirmReceivedLocation(remote_slot->confirmed_lsn); 835 | LogicalIncreaseXminForSlot(remote_slot->confirmed_lsn, 836 | remote_slot->catalog_xmin); 837 | LogicalIncreaseRestartDecodingForSlot(remote_slot->confirmed_lsn, 838 | remote_slot->restart_lsn); 839 | ReplicationSlotMarkDirty(); 840 | 841 | ReplicationSlotPersist(); 842 | 843 | elog(DEBUG1, 844 | "synchronized new slot %s to lsn (%X/%X) and catalog xmin (%u)", 845 | remote_slot->name, (uint32) (remote_slot->restart_lsn >> 32), 846 | (uint32) (remote_slot->restart_lsn), remote_slot->catalog_xmin); 847 | } 848 | 849 | ReplicationSlotRelease(); 850 | PopActiveSnapshot(); 851 | CommitTransactionCommand(); 852 | } 853 | 854 | /* 855 | * Synchronize the slot states from master to standby. 856 | * 857 | * This logic emulates the "failover slots" behaviour unsuccessfully proposed 858 | * for 9.6 using the PostgreSQL 10 features "catalog xmin in hot standby 859 | * feedback" and "logical decoding follows timeline switches". 860 | * 861 | * This is only called in recovery from main loop of manager and only in PG10+ 862 | * because in older versions the manager worker uses 863 | * bgw_start_time = BgWorkerStart_RecoveryFinished. 864 | * 865 | * We could technically synchronize slot positions even on older versions of 866 | * PostgreSQL but since logical decoding can't go over the timeline switch 867 | * before PG10, it's pointless to have slots synchronized. Also, older versions 868 | * can't keep catalog_xmin separate from xmin in hot standby feedback, so 869 | * sending the feedback we need to preserve our catalog_xmin could cause severe 870 | * table bloat on the master. 871 | * 872 | * This runs periodically. That's safe when the slots on the master already 873 | * exist locally because we have their resources reserved via hot standby 874 | * feedback. New subscriptions can't move that position backwards... but we 875 | * won't immediately know they exist when the master creates them. So there's a 876 | * window after each new subscription is created on the master where failover 877 | * to this standby will break that subscription. 878 | */ 879 | static long 880 | synchronize_failover_slots(long sleep_time) 881 | { 882 | List *slots; 883 | ListCell *lc; 884 | PGconn *conn; 885 | XLogRecPtr safe_lsn; 886 | XLogRecPtr lsn = InvalidXLogRecPtr; 887 | static bool was_lsn_safe = false; 888 | bool is_lsn_safe = false; 889 | StringInfoData connstr; 890 | 891 | if (!WalRcv || !HotStandbyActive() || 892 | list_length(pg_failover_slot_names_list) == 0) 893 | return sleep_time; 894 | 895 | /* XXX should these be errors or just soft return like above? */ 896 | if (!hot_standby_feedback) 897 | elog( 898 | ERROR, 899 | "cannot synchronize replication slot positions because hot_standby_feedback is off"); 900 | if (WalRcv->slotname[0] == '\0') 901 | elog( 902 | ERROR, 903 | "cannot synchronize replication slot positions because primary_slot_name is not set"); 904 | 905 | elog(DEBUG1, "starting replication slot synchronization from primary"); 906 | 907 | initStringInfo(&connstr); 908 | make_sync_failover_slots_dsn(&connstr, pg_failover_maintenance_db); 909 | conn = remote_connect(connstr.data, "pg_failover_slots"); 910 | 911 | /* 912 | * Do not synchronize WAL decoder slots on a physical standy. 913 | * 914 | * WAL decoder slots are used to produce LCRs. These LCRs are not 915 | * synchronized on a physical standby after initial backup and hence are 916 | * not included in the base backup. Thus WAL decoder slots, if synchronized 917 | * on physical standby, do not reflect the status of LCR directory as they 918 | * do on primary. 919 | * 920 | * There are other slots whose WAL senders use LCRs. These other slots are 921 | * synchronized and used after promotion. Since the WAL decoder slots are 922 | * ahead of these other slots, the WAL decoder when started after promotion 923 | * might miss LCRs required by WAL senders of the other slots. This would 924 | * cause data inconsistency after promotion. 925 | * 926 | * Hence do not synchronize WAL decoder slot. Those will be created after 927 | * promotion 928 | */ 929 | slots = remote_get_primary_slot_info(conn, pg_failover_slot_names_list); 930 | safe_lsn = remote_get_physical_slot_lsn(conn, WalRcv->slotname); 931 | 932 | /* 933 | * Delete locally-existing slots that don't exist on the master. 934 | */ 935 | for (;;) 936 | { 937 | int i; 938 | char *dropslot = NULL; 939 | 940 | LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); 941 | for (i = 0; i < max_replication_slots; i++) 942 | { 943 | ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; 944 | bool active; 945 | bool found = false; 946 | 947 | active = (s->active_pid != 0); 948 | 949 | /* Only check inactive slots. */ 950 | if (!s->in_use || active) 951 | continue; 952 | 953 | /* Only check for logical slots. */ 954 | if (SlotIsPhysical(s)) 955 | continue; 956 | 957 | /* Try to find slot in slots returned by primary. */ 958 | foreach (lc, slots) 959 | { 960 | RemoteSlot *remote_slot = lfirst(lc); 961 | 962 | if (strcmp(NameStr(s->data.name), remote_slot->name) == 0) 963 | { 964 | found = true; 965 | break; 966 | } 967 | } 968 | 969 | /* 970 | * Not found, should be dropped if synchronize_failover_slots_drop 971 | * is enabled. 972 | */ 973 | if (!found && pg_failover_slots_drop) 974 | { 975 | dropslot = pstrdup(NameStr(s->data.name)); 976 | break; 977 | } 978 | } 979 | LWLockRelease(ReplicationSlotControlLock); 980 | 981 | if (dropslot) 982 | { 983 | elog(WARNING, "dropping replication slot \"%s\"", dropslot); 984 | ReplicationSlotDrop(dropslot, false); 985 | pfree(dropslot); 986 | } 987 | else 988 | break; 989 | } 990 | 991 | if (!list_length(slots)) 992 | { 993 | PQfinish(conn); 994 | return sleep_time; 995 | } 996 | 997 | /* Find oldest restart_lsn still needed by any failover slot. */ 998 | foreach (lc, slots) 999 | { 1000 | RemoteSlot *remote_slot = lfirst(lc); 1001 | 1002 | if (lsn == InvalidXLogRecPtr || remote_slot->restart_lsn < lsn) 1003 | lsn = remote_slot->restart_lsn; 1004 | } 1005 | 1006 | if (safe_lsn == InvalidXLogRecPtr || 1007 | WalRcv->latestWalEnd == InvalidXLogRecPtr) 1008 | { 1009 | ereport( 1010 | WARNING, 1011 | (errmsg( 1012 | "cannot synchronize replication slot positions yet because feedback was not sent yet"))); 1013 | was_lsn_safe = false; 1014 | PQfinish(conn); 1015 | return Min(sleep_time, WORKER_WAIT_FEEDBACK); 1016 | } 1017 | else if (WalRcv->latestWalEnd < lsn) 1018 | { 1019 | ereport( 1020 | WARNING, 1021 | (errmsg( 1022 | "requested slot synchronization point %X/%X is ahead of the standby position %X/%X, not synchronizing slots", 1023 | (uint32) (lsn >> 32), (uint32) (lsn), 1024 | (uint32) (WalRcv->latestWalEnd >> 32), 1025 | (uint32) (WalRcv->latestWalEnd)))); 1026 | was_lsn_safe = false; 1027 | PQfinish(conn); 1028 | return Min(sleep_time, WORKER_WAIT_FEEDBACK); 1029 | } 1030 | 1031 | foreach (lc, slots) 1032 | { 1033 | RemoteSlot *remote_slot = lfirst(lc); 1034 | XLogRecPtr receivePtr; 1035 | 1036 | /* 1037 | * If we haven't received WAL for a remote slot's current 1038 | * confirmed_flush_lsn our local copy shouldn't reflect a confirmed 1039 | * position in the future. Cap it at the position we really received. 1040 | * 1041 | * Because the client will use a replication origin to track its 1042 | * position, in most cases it'll still fast-forward to the new 1043 | * confirmed position even if that skips over a gap of WAL we never 1044 | * received from the provider before failover. We can't detect or 1045 | * prevent that as the same fast forward is normal when we lost slot 1046 | * state in a provider crash after subscriber committed but before we 1047 | * saved the new confirmed flush lsn. The master will also fast forward 1048 | * the slot over irrelevant changes and then the subscriber will update 1049 | * its confirmed_flush_lsn in response to master standby status 1050 | * updates. 1051 | */ 1052 | receivePtr = GetWalRcvFlushRecPtr(NULL, NULL); 1053 | if (remote_slot->confirmed_lsn > receivePtr) 1054 | remote_slot->confirmed_lsn = receivePtr; 1055 | 1056 | /* 1057 | * For simplicity we always move restart_lsn of all slots to the 1058 | * restart_lsn needed by the furthest-behind master slot. 1059 | */ 1060 | if (remote_slot->restart_lsn > lsn) 1061 | remote_slot->restart_lsn = lsn; 1062 | 1063 | synchronize_one_slot(remote_slot); 1064 | } 1065 | 1066 | PQfinish(conn); 1067 | 1068 | if (!was_lsn_safe && is_lsn_safe) 1069 | elog(LOG, "slot synchronization from primary now active"); 1070 | 1071 | was_lsn_safe = is_lsn_safe; 1072 | 1073 | return sleep_time; 1074 | } 1075 | 1076 | void 1077 | pg_failover_slots_main(Datum main_arg) 1078 | { 1079 | /* Establish signal handlers. */ 1080 | pqsignal(SIGUSR1, procsignal_sigusr1_handler); 1081 | pqsignal(SIGTERM, die); 1082 | pqsignal(SIGHUP, SignalHandlerForConfigReload); 1083 | BackgroundWorkerUnblockSignals(); 1084 | 1085 | /* Make it easy to identify our processes. */ 1086 | SetConfigOption("application_name", MyBgworkerEntry->bgw_name, 1087 | PGC_SU_BACKEND, PGC_S_OVERRIDE); 1088 | 1089 | elog(LOG, "starting pg_failover_slots replica worker"); 1090 | 1091 | /* Setup connection to pinned catalogs (we only ever read pg_database). */ 1092 | BackgroundWorkerInitializeConnection(NULL, NULL, 0); 1093 | 1094 | /* Main wait loop. */ 1095 | while (true) 1096 | { 1097 | int rc; 1098 | long sleep_time = worker_nap_time; 1099 | 1100 | CHECK_FOR_INTERRUPTS(); 1101 | 1102 | if (RecoveryInProgress()) 1103 | sleep_time = synchronize_failover_slots(worker_nap_time); 1104 | else 1105 | sleep_time = worker_nap_time * 10; 1106 | 1107 | rc = 1108 | WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1109 | sleep_time, PG_WAIT_EXTENSION); 1110 | 1111 | ResetLatch(MyLatch); 1112 | 1113 | /* Emergency bailout if postmaster has died. */ 1114 | if (rc & WL_POSTMASTER_DEATH) 1115 | proc_exit(1); 1116 | 1117 | /* Reload the config if needed. */ 1118 | if (ConfigReloadPending) 1119 | { 1120 | ConfigReloadPending = false; 1121 | ProcessConfigFile(PGC_SIGHUP); 1122 | } 1123 | } 1124 | } 1125 | 1126 | static bool 1127 | list_member_str(List *l, const char *str) 1128 | { 1129 | ListCell *lc; 1130 | foreach (lc, l) 1131 | if (strcmp((const char *) lfirst(lc), str) == 0) 1132 | return true; 1133 | return false; 1134 | } 1135 | 1136 | 1137 | /* 1138 | * Check whether we want to actually wait for standby_slot_names 1139 | */ 1140 | static bool 1141 | skip_standby_slot_names(XLogRecPtr commit_lsn) 1142 | { 1143 | static List *cached_standby_slot_names = NIL; 1144 | 1145 | if (standby_slot_names != cached_standby_slot_names) 1146 | { 1147 | if (MyReplicationSlot) 1148 | { 1149 | if (list_member_str(standby_slot_names, 1150 | NameStr(MyReplicationSlot->data.name))) 1151 | { 1152 | standby_slots_min_confirmed = 0; 1153 | elog( 1154 | DEBUG1, 1155 | "found my slot in pg_failover_slots.standby_slot_names, no need to wait for confirmations"); 1156 | } 1157 | } 1158 | 1159 | cached_standby_slot_names = standby_slot_names; 1160 | } 1161 | 1162 | /* 1163 | * If we already know all slots of interest satisfy the requirement we can 1164 | * skip checks entirely. The assignment hook for 1165 | * pg_failover_slots.standby_slot_names invalidates the cache. 1166 | */ 1167 | if (standby_slot_names_oldest_flush_lsn >= commit_lsn || 1168 | standby_slots_min_confirmed == 0 || 1169 | list_length(standby_slot_names) == 0) 1170 | return true; 1171 | 1172 | return false; 1173 | } 1174 | 1175 | /* 1176 | * Wait until the nominated set of standbys, if any, have flushed past the 1177 | * specified lsn. Standbys are identified by slot name, not application_name 1178 | * like in synchronous_standby_names. 1179 | * 1180 | * confirmed_flush_lsn is used for physical slots, restart_lsn for logical 1181 | * slots. 1182 | * 1183 | */ 1184 | static void 1185 | wait_for_standby_confirmation(XLogRecPtr commit_lsn) 1186 | { 1187 | XLogRecPtr flush_pos = InvalidXLogRecPtr; 1188 | TimestampTz wait_start = GetCurrentTimestamp(); 1189 | 1190 | if (skip_standby_slot_names(commit_lsn)) 1191 | return; 1192 | 1193 | while (1) 1194 | { 1195 | int i; 1196 | int wait_slots_remaining; 1197 | XLogRecPtr oldest_flush_pos = InvalidXLogRecPtr; 1198 | int rc; 1199 | 1200 | if (standby_slots_min_confirmed == -1) 1201 | { 1202 | /* 1203 | * Default pg_failover_slots.standby_slots_min_confirmed (-1) is to 1204 | * wait for all entries in pg_failover_slots.standby_slot_names. 1205 | */ 1206 | wait_slots_remaining = list_length(standby_slot_names); 1207 | } 1208 | else 1209 | { 1210 | /* 1211 | * pg_failover_slots.standby_slots_min_confirmed cannot wait for 1212 | * more slots than are named in the 1213 | * pg_failover_slots.standby_slot_names. 1214 | */ 1215 | wait_slots_remaining = Min(standby_slots_min_confirmed, 1216 | list_length(standby_slot_names)); 1217 | } 1218 | 1219 | Assert(wait_slots_remaining > 0 && 1220 | wait_slots_remaining <= list_length(standby_slot_names)); 1221 | 1222 | LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); 1223 | for (i = 0; i < max_replication_slots; i++) 1224 | { 1225 | ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; 1226 | 1227 | if (!s->in_use) 1228 | continue; 1229 | 1230 | if (!list_member_str(standby_slot_names, NameStr(s->data.name))) 1231 | continue; 1232 | 1233 | SpinLockAcquire(&s->mutex); 1234 | 1235 | if (s->data.database == InvalidOid) 1236 | /* Physical slots advance restart_lsn on flush and ignore 1237 | * confirmed_flush_lsn */ 1238 | flush_pos = s->data.restart_lsn; 1239 | else 1240 | /* For logical slots we must wait for commit and flush */ 1241 | flush_pos = s->data.confirmed_flush; 1242 | 1243 | SpinLockRelease(&s->mutex); 1244 | 1245 | /* We want to find out the min(flush pos) over all named slots */ 1246 | if (oldest_flush_pos == InvalidXLogRecPtr || 1247 | oldest_flush_pos > flush_pos) 1248 | oldest_flush_pos = flush_pos; 1249 | 1250 | if (flush_pos >= commit_lsn && wait_slots_remaining > 0) 1251 | wait_slots_remaining--; 1252 | } 1253 | LWLockRelease(ReplicationSlotControlLock); 1254 | 1255 | if (wait_slots_remaining == 0) 1256 | { 1257 | /* 1258 | * If the oldest slot pos across all named slots advanced, update 1259 | * the cache so we can skip future calls. It'll be invalidated 1260 | * if the GUCs change. 1261 | */ 1262 | if (standby_slot_names_oldest_flush_lsn < oldest_flush_pos) 1263 | standby_slot_names_oldest_flush_lsn = oldest_flush_pos; 1264 | 1265 | return; 1266 | } 1267 | 1268 | /* 1269 | * Ideally we'd be able to ask these walsenders to wake us if they 1270 | * advance past the point of interest, but that'll require some core 1271 | * patching. For now, poll. 1272 | * 1273 | * We don't test for postmaster death here because it turns out to 1274 | * be really slow. The postmaster should kill us, we'll notice when 1275 | * we time out, and it's not a long sleep. 1276 | * 1277 | * TODO some degree of backoff on sleeps? 1278 | */ 1279 | rc = 1280 | WaitLatch(MyLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, 1281 | 100L, PG_WAIT_EXTENSION); 1282 | 1283 | if (rc & WL_POSTMASTER_DEATH) 1284 | proc_exit(1); 1285 | 1286 | ResetLatch(MyLatch); 1287 | 1288 | CHECK_FOR_INTERRUPTS(); 1289 | 1290 | if (wal_sender_timeout > 0 && 1291 | GetCurrentTimestamp() > 1292 | TimestampTzPlusMilliseconds(wait_start, wal_sender_timeout)) 1293 | { 1294 | ereport( 1295 | COMMERROR, 1296 | (errmsg( 1297 | "terminating walsender process due to pg_failover_slots.standby_slot_names replication timeout"))); 1298 | proc_exit(0); 1299 | } 1300 | 1301 | /* 1302 | * The user might modify or clear pg_failover_slots.standby_slot_names. 1303 | * If we don't notice, we'll keep looping indefinitely here, 1304 | * so we have to check for config changes. 1305 | */ 1306 | if (ConfigReloadPending) 1307 | { 1308 | ConfigReloadPending = false; 1309 | ProcessConfigFile(PGC_SIGHUP); 1310 | 1311 | if (skip_standby_slot_names(commit_lsn)) 1312 | return; 1313 | } 1314 | } 1315 | } 1316 | 1317 | /* 1318 | * Hackery to inject ourselves into walsender's logical stream starts here 1319 | */ 1320 | static const PQcommMethods *OldPqCommMethods; 1321 | 1322 | static void 1323 | socket_comm_reset(void) 1324 | { 1325 | OldPqCommMethods->comm_reset(); 1326 | } 1327 | 1328 | static int 1329 | socket_flush(void) 1330 | { 1331 | return OldPqCommMethods->flush(); 1332 | } 1333 | 1334 | static int 1335 | socket_flush_if_writable(void) 1336 | { 1337 | return OldPqCommMethods->flush_if_writable(); 1338 | } 1339 | 1340 | static bool 1341 | socket_is_send_pending(void) 1342 | { 1343 | return OldPqCommMethods->is_send_pending(); 1344 | } 1345 | 1346 | 1347 | static int 1348 | socket_putmessage(char msgtype, const char *s, size_t len) 1349 | { 1350 | return OldPqCommMethods->putmessage(msgtype, s, len); 1351 | } 1352 | 1353 | static void 1354 | socket_putmessage_noblock(char msgtype, const char *s, size_t len) 1355 | { 1356 | if (msgtype == 'd' && len >= 17) 1357 | { 1358 | if (s[0] == 'w') 1359 | { 1360 | XLogRecPtr lsn; 1361 | /* 1362 | * Extract the lsn from the wal message, and convert it from 1363 | * network byte order. 1364 | */ 1365 | memcpy(&lsn, &s[1], sizeof(XLogRecPtr)); 1366 | lsn = pg_ntoh64(lsn); 1367 | /* Wait for the lsn */ 1368 | wait_for_standby_confirmation(lsn); 1369 | } 1370 | } 1371 | 1372 | OldPqCommMethods->putmessage_noblock(msgtype, s, len); 1373 | } 1374 | 1375 | #if PG_VERSION_NUM < 140000 1376 | static void 1377 | socket_startcopyout(void) 1378 | { 1379 | OldPqCommMethods->startcopyout(); 1380 | } 1381 | 1382 | static void 1383 | socket_endcopyout(bool errorAbort) 1384 | { 1385 | OldPqCommMethods->endcopyout(errorAbort); 1386 | } 1387 | #endif 1388 | 1389 | 1390 | #if PG_VERSION_NUM >= 120000 1391 | static const 1392 | #else 1393 | static 1394 | #endif 1395 | PQcommMethods PqCommSocketMethods = { 1396 | socket_comm_reset, socket_flush, socket_flush_if_writable, 1397 | socket_is_send_pending, socket_putmessage, socket_putmessage_noblock 1398 | #if PG_VERSION_NUM < 140000 1399 | , socket_startcopyout, socket_endcopyout 1400 | #endif 1401 | }; 1402 | 1403 | static ClientAuthentication_hook_type original_client_auth_hook = NULL; 1404 | 1405 | static void 1406 | attach_to_walsender(Port *port, int status) 1407 | { 1408 | /* 1409 | * Any other plugins which use ClientAuthentication_hook. 1410 | */ 1411 | if (original_client_auth_hook) 1412 | original_client_auth_hook(port, status); 1413 | 1414 | if (am_db_walsender) 1415 | { 1416 | OldPqCommMethods = PqCommMethods; 1417 | PqCommMethods = &PqCommSocketMethods; 1418 | } 1419 | } 1420 | 1421 | void 1422 | _PG_init(void) 1423 | { 1424 | BackgroundWorker bgw; 1425 | 1426 | if (!process_shared_preload_libraries_in_progress) 1427 | elog(ERROR, "pg_failover_slots is not in shared_preload_libraries"); 1428 | 1429 | DefineCustomStringVariable( 1430 | "pg_failover_slots.version", "pg_failover_slots module version", "", 1431 | &pg_failover_slots_version_str, PG_FAILOVER_SLOTS_VERSION, 1432 | PGC_INTERNAL, GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE, NULL, NULL, 1433 | NULL); 1434 | 1435 | DefineCustomStringVariable( 1436 | "pg_failover_slots.standby_slot_names", 1437 | "list of names of slot that must confirm changes before they're sent by the decoding plugin", 1438 | "List of physical replication slots that must confirm durable " 1439 | "flush of a given lsn before commits up to that lsn may be " 1440 | "replicated to logical peers by the output plugin. " 1441 | "Imposes ordering of physical replication before logical " 1442 | "replication.", 1443 | &standby_slot_names_raw, "", PGC_SIGHUP, GUC_LIST_INPUT, 1444 | check_standby_slot_names, assign_standby_slot_names, NULL); 1445 | 1446 | 1447 | DefineCustomIntVariable( 1448 | "pg_failover_slots.standby_slots_min_confirmed", 1449 | "Number of slots from pg_failover_slots.standby_slot_names that must confirm lsn", 1450 | "Modifies behaviour of pg_failover_slots.standby_slot_names so to allow " 1451 | "logical replication of a transaction after at least " 1452 | "pg_failover_slots.standby_slots_min_confirmed physical peers have confirmed " 1453 | "the transaction as durably flushed. " 1454 | "The value -1 (default) means all entries in pg_failover_slots.standby_slot_names" 1455 | "must confirm the write. The value 0 causes " 1456 | "pg_failover_slots.standby_slots_min_confirmedto be effectively ignored.", 1457 | &standby_slots_min_confirmed, -1, -1, 100, PGC_SIGHUP, 0, NULL, NULL, 1458 | NULL); 1459 | 1460 | DefineCustomStringVariable( 1461 | "pg_failover_slots.synchronize_slot_names", 1462 | "list of slots to synchronize from primary to physical standby", "", 1463 | &pg_failover_slot_names, "name_like:%%", 1464 | PGC_SIGHUP, /* Sync ALL slots by default */ 1465 | GUC_LIST_INPUT, check_failover_slot_names, assign_failover_slot_names, 1466 | NULL); 1467 | 1468 | 1469 | DefineCustomBoolVariable( 1470 | "pg_failover_slots.drop_extra_slots", 1471 | "whether to drop extra slots on standby that don't match pg_failover_slots.synchronize_slot_names", 1472 | NULL, &pg_failover_slots_drop, true, PGC_SIGHUP, 0, NULL, NULL, NULL); 1473 | 1474 | DefineCustomStringVariable( 1475 | "pg_failover_slots.primary_dsn", 1476 | "connection string to the primary server for synchronization logical slots on standby", 1477 | "if empty, uses the defaults to primary_conninfo", 1478 | &pg_failover_slots_dsn, "", PGC_SIGHUP, GUC_SUPERUSER_ONLY, NULL, NULL, 1479 | NULL); 1480 | 1481 | DefineCustomIntVariable( 1482 | "pg_failover_slots.worker_nap_time", 1483 | "Time to sleep between two synchronisation attempts.", 1484 | NULL, 1485 | &worker_nap_time, 60000, 1000, INT_MAX, PGC_SIGHUP, 1486 | GUC_SUPERUSER_ONLY | GUC_UNIT_MS, NULL, NULL, NULL); 1487 | 1488 | DefineCustomStringVariable( 1489 | "pg_failover_slots.maintenance_db", 1490 | "Database to connect to when using the primary_conninfo", 1491 | "When connecting to the primary using the primary_conninfo instead of a specifically set " 1492 | "pg_failover_slots.primary_dsn, use this datbase to query the pg_replication_slots view.", 1493 | &pg_failover_maintenance_db, "postgres", PGC_SIGHUP, GUC_SUPERUSER_ONLY, 1494 | NULL, NULL, NULL); 1495 | 1496 | 1497 | if (IsBinaryUpgrade) 1498 | return; 1499 | 1500 | /* Run the worker. */ 1501 | memset(&bgw, 0, sizeof(bgw)); 1502 | bgw.bgw_flags = 1503 | BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; 1504 | bgw.bgw_start_time = BgWorkerStart_ConsistentState; 1505 | snprintf(bgw.bgw_library_name, BGW_MAXLEN, EXTENSION_NAME); 1506 | snprintf(bgw.bgw_function_name, BGW_MAXLEN, "pg_failover_slots_main"); 1507 | snprintf(bgw.bgw_name, BGW_MAXLEN, "pg_failover_slots worker"); 1508 | bgw.bgw_restart_time = 60; 1509 | 1510 | RegisterBackgroundWorker(&bgw); 1511 | 1512 | /* Install Hooks */ 1513 | original_client_auth_hook = ClientAuthentication_hook; 1514 | ClientAuthentication_hook = attach_to_walsender; 1515 | } 1516 | -------------------------------------------------------------------------------- /t/010_slot_sync.pl: -------------------------------------------------------------------------------- 1 | 2 | use strict; 3 | use warnings; 4 | use File::Path qw(rmtree); 5 | use PostgreSQL::Test::Cluster; 6 | use PostgreSQL::Test::Utils; 7 | use Test::More; 8 | 9 | # Test set-up 10 | my $node_primary = PostgreSQL::Test::Cluster->new('test'); 11 | $node_primary->init(allows_streaming => 'logical'); 12 | $node_primary->append_conf('postgresql.conf', 'shared_preload_libraries = pg_failover_slots'); 13 | $node_primary->start; 14 | is( $node_primary->psql( 15 | 'postgres', 16 | qq[SELECT pg_create_physical_replication_slot('standby_1');]), 17 | 0, 18 | 'physical slot created on primary'); 19 | my $backup_name = 'my_backup'; 20 | 21 | # Take backup 22 | $node_primary->backup($backup_name); 23 | 24 | # Create streaming standby linking to primary 25 | my $node_standby = PostgreSQL::Test::Cluster->new('standby_1'); 26 | $node_standby->init_from_backup($node_primary, $backup_name, 27 | has_streaming => 1); 28 | $node_standby->append_conf('postgresql.conf', 'hot_standby_feedback = on'); 29 | 30 | my $pg_version = `pg_config --version | awk '{print \$2}'`; 31 | if ($pg_version >= 12) { 32 | $node_standby->append_conf('postgresql.conf', 'primary_slot_name = standby_1'); 33 | } 34 | else { 35 | $node_standby->append_conf('recovery.conf', 'primary_slot_name = standby_1'); 36 | } 37 | $node_standby->start; 38 | 39 | # Wait for the sync worker to start 40 | $node_standby->poll_query_until('postgres', "SELECT count(*) > 0 FROM pg_stat_activity where application_name LIKE 'pg_failover_slots%'"); 41 | 42 | # Create table. 43 | $node_primary->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 serial)"); 44 | 45 | # Create replication slots. 46 | $node_primary->safe_psql( 47 | 'postgres', qq[ 48 | SELECT pg_create_logical_replication_slot('regression_slot1', 'test_decoding'); 49 | SELECT pg_create_logical_replication_slot('regression_slot2', 'test_decoding'); 50 | SELECT pg_create_logical_replication_slot('regression_slot3', 'test_decoding'); 51 | SELECT pg_create_logical_replication_slot('regression_slot4', 'test_decoding'); 52 | ]); 53 | 54 | # Simulate some small load to move things forward and wait for slots to be 55 | # synced downstream. 56 | while (1) { 57 | $node_primary->safe_psql( 58 | 'postgres', qq[ 59 | SELECT data FROM pg_logical_slot_get_changes('regression_slot1', NULL, 60 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 61 | SELECT data FROM pg_logical_slot_get_changes('regression_slot2', NULL, 62 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 63 | SELECT data FROM pg_logical_slot_get_changes('regression_slot3', NULL, 64 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 65 | SELECT data FROM pg_logical_slot_get_changes('regression_slot4', NULL, 66 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 67 | ]); 68 | 69 | $node_primary->safe_psql('postgres', "INSERT INTO test_repl_stat DEFAULT VALUES;"); 70 | 71 | last if ($node_standby->safe_psql('postgres',"SELECT count(*) > 3 FROM pg_replication_slots WHERE NOT active") eq "t"); 72 | 73 | sleep(1); 74 | } 75 | 76 | # Now that slots moves they should be all synced 77 | is($node_standby->safe_psql('postgres', "SELECT slot_name FROM pg_replication_slots ORDER BY slot_name"), q[regression_slot1 78 | regression_slot2 79 | regression_slot3 80 | regression_slot4], 'all slots synced'); 81 | 82 | # Wait for replication to catch up 83 | my $primary_lsn = $node_primary->lsn('write'); 84 | $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); 85 | 86 | # Test to drop one of the replication slot 87 | $node_primary->safe_psql('postgres', 88 | "SELECT pg_drop_replication_slot('regression_slot4')"); 89 | 90 | $node_primary->stop; 91 | $node_primary->start; 92 | 93 | $node_primary->stop; 94 | my $datadir = $node_primary->data_dir; 95 | my $slot3_replslotdir = "$datadir/pg_replslot/regression_slot3"; 96 | 97 | rmtree($slot3_replslotdir); 98 | 99 | $node_primary->append_conf('postgresql.conf', 'max_replication_slots = 3'); 100 | $node_primary->start; 101 | 102 | # cleanup 103 | $node_primary->safe_psql('postgres', 104 | "SELECT pg_drop_replication_slot('regression_slot1')"); 105 | $node_primary->safe_psql('postgres', "DROP TABLE test_repl_stat"); 106 | 107 | # Wait for replication to catch up 108 | $primary_lsn = $node_primary->lsn('write'); 109 | $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); 110 | 111 | # Check that the slots were dropped on standby too 112 | $node_standby->poll_query_until('postgres', "SELECT count(*) < 2 FROM pg_replication_slots"); 113 | is($node_standby->safe_psql('postgres', "SELECT slot_name FROM pg_replication_slots ORDER BY slot_name"), q[regression_slot2], 'all slots were dropped on standby too'); 114 | 115 | # shutdown 116 | $node_standby->stop; 117 | $node_primary->stop; 118 | 119 | done_testing(); 120 | -------------------------------------------------------------------------------- /t/020_physical_before_logical.pl: -------------------------------------------------------------------------------- 1 | 2 | use strict; 3 | use warnings; 4 | use File::Path qw(rmtree); 5 | use PostgreSQL::Test::Cluster; 6 | use PostgreSQL::Test::Utils; 7 | use Test::More; 8 | 9 | my $offset = 0; 10 | 11 | # Test set-up 12 | my $node_primary = PostgreSQL::Test::Cluster->new('test'); 13 | $node_primary->init(allows_streaming => 'logical'); 14 | $node_primary->append_conf('postgresql.conf', "shared_preload_libraries = pg_failover_slots"); 15 | # Setup physical before logical slot 16 | $node_primary->append_conf('postgresql.conf', "pg_failover_slots.standby_slot_names = 'standby_1'"); 17 | 18 | $node_primary->start; 19 | is( $node_primary->psql( 20 | 'postgres', 21 | qq[SELECT pg_create_physical_replication_slot('standby_1');]), 22 | 0, 23 | 'physical slot created on primary'); 24 | my $backup_name = 'my_backup'; 25 | 26 | # Take backup 27 | $node_primary->backup($backup_name); 28 | 29 | # Create streaming standby linking to primary 30 | my $node_standby = PostgreSQL::Test::Cluster->new('standby_1'); 31 | $node_standby->init_from_backup($node_primary, $backup_name, 32 | has_streaming => 1); 33 | $node_standby->append_conf('postgresql.conf', 'hot_standby_feedback = on'); 34 | 35 | my $pg_version = `pg_config --version | awk '{print \$2}'`; 36 | if ($pg_version >= 12) { 37 | $node_standby->append_conf('postgresql.conf', 'primary_slot_name = standby_1'); 38 | } 39 | else { 40 | $node_standby->append_conf('recovery.conf', 'primary_slot_name = standby_1'); 41 | } 42 | 43 | # Create table. 44 | $node_primary->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 int)"); 45 | 46 | # Create subscriber node 47 | my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); 48 | $node_subscriber->init(allows_streaming => 'logical'); 49 | $node_subscriber->start; 50 | 51 | $node_subscriber->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 int)"); 52 | 53 | my $node_primary_connstr = $node_primary->connstr . ' dbname=postgres application_name=tap_sub'; 54 | $node_primary->safe_psql('postgres', "CREATE PUBLICATION tap_pub FOR ALL TABLES"); 55 | $node_subscriber->safe_psql('postgres', 56 | "CREATE SUBSCRIPTION tap_sub CONNECTION '$node_primary_connstr' PUBLICATION tap_pub" 57 | ); 58 | $node_primary->wait_for_catchup('tap_sub'); 59 | 60 | # Create replication slots. 61 | $node_primary->safe_psql( 62 | 'postgres', qq[ 63 | SELECT pg_create_logical_replication_slot('regression_slot1', 'test_decoding'); 64 | ]); 65 | 66 | # Insert some data. 67 | $node_primary->safe_psql('postgres', 68 | "INSERT INTO test_repl_stat values(generate_series(1, 5));"); 69 | 70 | # Fetching using pg_logical_slot_get_changes should work fine 71 | $node_primary->safe_psql( 72 | 'postgres', qq[ 73 | SELECT data FROM pg_logical_slot_get_changes('regression_slot1', NULL, 74 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 75 | ]); 76 | 77 | # Replication via pub/sub should time out though 78 | $offset = $node_primary->wait_for_log( 79 | qr/terminating walsender process due to pg_failover_slots.standby_slot_names replication timeout/, 80 | 0); 81 | 82 | # And subscriber should have nothing 83 | is($node_subscriber->safe_psql('postgres', "SELECT * FROM test_repl_stat"), "", 'subscriber has nothing'); 84 | 85 | # Start standby 86 | $node_standby->start; 87 | 88 | # Wait for it to replicate 89 | my $primary_lsn = $node_primary->lsn('write'); 90 | $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); 91 | 92 | # Make sure subscriber replicates 93 | $node_subscriber->poll_query_until('postgres', "SELECT count(*) > 4 FROM test_repl_stat"); 94 | 95 | # Stop standby again 96 | $node_standby->stop; 97 | 98 | # Insert more data 99 | $node_primary->safe_psql('postgres', 100 | "INSERT INTO test_repl_stat values(generate_series(10, 15));"); 101 | 102 | # Pub/Sub replication should timeout again 103 | $offset = $node_primary->wait_for_log( 104 | qr/terminating walsender process due to pg_failover_slots.standby_slot_names replication timeout/, 105 | $offset); 106 | 107 | # shutdown 108 | $node_primary->stop; 109 | $node_subscriber->stop; 110 | 111 | done_testing(); 112 | -------------------------------------------------------------------------------- /t/030_failover.pl: -------------------------------------------------------------------------------- 1 | 2 | use strict; 3 | use warnings; 4 | use File::Path qw(rmtree); 5 | use PostgreSQL::Test::Cluster; 6 | use PostgreSQL::Test::Utils; 7 | use Test::More; 8 | 9 | # Test set-up 10 | my $node_primary = PostgreSQL::Test::Cluster->new('test'); 11 | $node_primary->init(allows_streaming => 'logical'); 12 | $node_primary->append_conf('postgresql.conf', 'shared_preload_libraries = pg_failover_slots'); 13 | $node_primary->start; 14 | is( $node_primary->psql( 15 | 'postgres', 16 | qq[SELECT pg_create_physical_replication_slot('standby_1');]), 17 | 0, 18 | 'physical slot created on primary'); 19 | my $backup_name = 'my_backup'; 20 | 21 | # Take backup 22 | $node_primary->backup($backup_name); 23 | 24 | # Create streaming standby linking to primary 25 | my $node_standby = PostgreSQL::Test::Cluster->new('standby_1'); 26 | $node_standby->init_from_backup($node_primary, $backup_name, 27 | has_streaming => 1); 28 | $node_standby->append_conf('postgresql.conf', 'hot_standby_feedback = on'); 29 | 30 | my $pg_version = `pg_config --version | awk '{print \$2}'`; 31 | if ($pg_version >= 12) { 32 | $node_standby->append_conf('postgresql.conf', 'primary_slot_name = standby_1'); 33 | } 34 | else { 35 | $node_standby->append_conf('recovery.conf', 'primary_slot_name = standby_1'); 36 | } 37 | $node_standby->start; 38 | 39 | # Wait for the sync worker to start 40 | $node_standby->poll_query_until('postgres', "SELECT count(*) > 0 FROM pg_stat_activity where application_name LIKE 'pg_failover_slots%'"); 41 | 42 | # Create table. 43 | $node_primary->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 serial)"); 44 | 45 | # Create replication slots. 46 | $node_primary->safe_psql( 47 | 'postgres', qq[ 48 | SELECT pg_create_logical_replication_slot('regression_slot1', 'test_decoding'); 49 | SELECT pg_create_logical_replication_slot('regression_slot2', 'test_decoding'); 50 | SELECT pg_create_logical_replication_slot('regression_slot3', 'test_decoding'); 51 | SELECT pg_create_logical_replication_slot('regression_slot4', 'test_decoding'); 52 | ]); 53 | 54 | # Simulate some small load to move things forward and wait for slots to be 55 | # synced downstream. 56 | while (1) { 57 | $node_primary->safe_psql( 58 | 'postgres', qq[ 59 | SELECT data FROM pg_logical_slot_get_changes('regression_slot1', NULL, 60 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 61 | SELECT data FROM pg_logical_slot_get_changes('regression_slot2', NULL, 62 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 63 | SELECT data FROM pg_logical_slot_get_changes('regression_slot3', NULL, 64 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 65 | SELECT data FROM pg_logical_slot_get_changes('regression_slot4', NULL, 66 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 67 | ]); 68 | 69 | $node_primary->safe_psql('postgres', "INSERT INTO test_repl_stat DEFAULT VALUES;"); 70 | 71 | last if ($node_standby->safe_psql('postgres',"SELECT count(*) > 3 FROM pg_replication_slots WHERE NOT active") eq "t"); 72 | 73 | sleep(1); 74 | } 75 | 76 | # Now that slots moves they should be all synced 77 | is($node_standby->safe_psql('postgres', "SELECT slot_name FROM pg_replication_slots ORDER BY slot_name"), q[regression_slot1 78 | regression_slot2 79 | regression_slot3 80 | regression_slot4], 'all slots synced'); 81 | 82 | # Wait for replication to catch up 83 | my $primary_lsn = $node_primary->lsn('write'); 84 | $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); 85 | 86 | # failover to standby 87 | $node_primary->stop; 88 | $node_standby->promote; 89 | 90 | # Check that slots are on promoted standby 91 | is($node_standby->safe_psql('postgres', "SELECT slot_name FROM pg_replication_slots ORDER BY slot_name"), q[regression_slot1 92 | regression_slot2 93 | regression_slot3 94 | regression_slot4], 'slots are on promoted standby'); 95 | 96 | # Write on promoted standby 97 | $node_standby->safe_psql('postgres', "INSERT INTO test_repl_stat DEFAULT VALUES;"); 98 | 99 | # Check that slots are consumable on promoted stanby 100 | $node_standby->safe_psql( 101 | 'postgres', qq[ 102 | SELECT data FROM pg_logical_slot_get_changes('regression_slot1', NULL, 103 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 104 | SELECT data FROM pg_logical_slot_get_changes('regression_slot2', NULL, 105 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 106 | SELECT data FROM pg_logical_slot_get_changes('regression_slot3', NULL, 107 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 108 | SELECT data FROM pg_logical_slot_get_changes('regression_slot4', NULL, 109 | NULL, 'include-xids', '0', 'skip-empty-xacts', '1'); 110 | ]); 111 | 112 | # shutdown 113 | $node_standby->stop; 114 | 115 | done_testing(); 116 | --------------------------------------------------------------------------------