From e9686483ac70985714e54d0cc9f33f1dbddaf7a4 Mon Sep 17 00:00:00 2001
From: Alejandro Do Nascimento Mora <alejandrodnm@gmail.com>
Date: Tue, 24 Oct 2023 23:11:00 +0200
Subject: [PATCH 1/5] Add live migration from postgres

---
 .../dual-write-from-postgres.md               |   1 +
 .../live-migration-from-postgres.md           | 321 ++++++++++++++++++
 migrate/page-index/page-index.js              |  13 +
 3 files changed, 335 insertions(+)
 create mode 100644 migrate/live-migration/live-migration-from-postgres.md
diff --git a/migrate/dual-write-and-backfill/dual-write-from-postgres.md b/migrate/dual-write-and-backfill/dual-write-from-postgres.md
index 4eb1761cd2..73d2f9db96 100644
--- a/migrate/dual-write-and-backfill/dual-write-from-postgres.md
+++ b/migrate/dual-write-and-backfill/dual-write-from-postgres.md
@@ -18,6 +18,7 @@ import SourceTargetNote from "versionContent/_partials/_migrate_source_target_no
 import DumpDatabaseRoles from "versionContent/_partials/_migrate_dual_write_dump_database_roles.mdx";
 import Step6eTurnOnCompressionPolicies from "versionContent/_partials/_migrate_dual_write_6e_turn_on_compression_policies.mdx";
 import Step6aThroughc from "versionContent/_partials/_migrate_dual_write_6a_through_c.mdx";
+import ExplainPgDumpFlags from "versionContent/_partials/_migrate_explain_pg_dump_flags.mdx";
 
 # Dual-write and backfill from PostgreSQL database
 
diff --git a/migrate/live-migration/live-migration-from-postgres.md b/migrate/live-migration/live-migration-from-postgres.md
new file mode 100644
index 0000000000..3263e7df6a
--- /dev/null
+++ b/migrate/live-migration/live-migration-from-postgres.md
@@ -0,0 +1,321 @@
+---
+title: Migrate from PostgreSQL using live migration
+excerpt: Migrate from a PostgreSQL database using the low-downtime live migration method
+products: [cloud]
+keywords: [migration, low-downtime]
+tags: [migration, logical backup, replication]
+---
+
+import GettingHelp from "versionContent/_partials/_migrate_dual_write_backfill_getting_help.mdx";
+import SourceTargetNote from "versionContent/_partials/_migrate_source_target_note.mdx";
+import StepOne from "versionContent/_partials/_migrate_dual_write_step1.mdx";
+import DumpDatabaseRoles from "versionContent/_partials/_migrate_dual_write_dump_database_roles.mdx";
+import ExplainPgDumpFlags from "versionContent/_partials/_migrate_explain_pg_dump_flags.mdx";
+
+# Live migration from PostgreSQL database with pgcopydb
+
+This document provides detailed step-by-step instructions to migrate data using
+[pgcopydb][pgcopydb] to perform a live migration from a source database which
+is using PostgreSQL to Timescale.
+
+Before beginning the migration process, ensure that tools `psql`, `pg_dump`,
+`pg_dumpall`, and `pgcopydb` are installed and available on the system that
+performs the migration.
+
+For Debian and Ubuntu systems, you can install all the tools with:
+
+```
+sudo apt update
+sudo apt install -y postgresql-common
+sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
+sudo apt install -y pgcopydb
+```
+
+- `pgcopydb`: Installation instructions can be found in the [official
+  repository][install-pgcopydb]. When installing from package managers like
+  `apt`, `yum`, or `dnf`, the other required tools are usually also installed
+  as dependencies to `pgcopydb`.
+
+- `psql`, `pg_dump`, and `pg_dumpall`: These can be installed by following the
+  instructions in the [How to Install psql on Mac, Ubuntu, Debian, Windows][install-psql]
+  blog post. Although the instructions specifically mention `psql`, following
+  them also installs `pg_dump` and `pg_dumpall`.
+
+<SourceTargetNote />
+
+<!-- TODO:  what about schema changes are they allowed? What other conditions
+are there? -->
+
+In detail, the migration process consists of the following steps:
+
+1. Set up a target database instance in Timescale.
+1. Set up a replication slot and snapshot.
+1. Migrate roles and schema from source to target.
+1. Convert hypertables and enable Timescale features.
+1. Migrate initial data from source to target.
+1. Apply replication changes.
+1. Promote target database as new primary.
+
+<GettingHelp />
+
+<StepOne />
+
+## 2. Set up a replication slot and snapshot
+
+The [replication slot][replication-slot] forms the backbone of the replication
+strategy.
+
+> A slot represents a stream of changes that can be replayed to a client in the
+order they were made on the origin server.
+
+The stream of changes emitted by the slot are buffered into disk until they are
+applied on the target. The instance used to orchestrate the migration (the one
+running the commands) should have enough capacity to store the files, and it
+should be actively monitored to prevent any issues that might result due to
+lack of space.
+
+Use the `pgcopydb`'s follow command to create a replication slot:
+
+```sh
+pgcopydb follow \
+  --source "$SOURCE" \
+  --target "$TARGET" \
+  --fail-fast \
+  --plugin wal2json
+```
+
+<SetupSourceTarget />
+
+This command is going to be active during most of the migration process. You
+can run it on a separate terminal instance, or start it in the background. To
+start it in the background append `> pgcopydb_follow.log 2>&1 &` to the
+command. For example:
+
+```sh
+pgcopydb follow \
+  --source "$SOURCE" \
+  --target "$TARGET" \
+  --fail-fast \
+  --plugin wal2json > pgcopydb_follow.log 2>&1 &
+```
+
+The `> pgcopydb_follow.log 2>&1` part redirects all the messages to the
+`pgcopydb_follow.log` file, this is optional but recommended. The `pgcopydb
+follow` command outputs many messages, if they are not redirected, using the
+terminal becomes cumbersome due to the constant pop up of messages.
+
+The `follow` command not only creates the replication slot for streaming
+changes, but also exports a [snapshot][snapshot] ID to `tmp/pgcopydb/snapshot`.
+This ID can be used to migrate the data that was stored in the database prior
+to the creation of the replication slot.
+
+> A snapshot determines which data is visible to the transaction that is using
+the snapshot. Synchronized snapshots are necessary when two or more sessions
+need to see identical content in the database.
+
+<!-- TODO:  mention the housekeeping script that clears the buffered file s -->
+
+Before the stream of changes can be applied, the schema and data that existed
+prior to the creation of the replication slot must be migrated ([step
+3][step-3]). The point that marks the beginning of the replication and
+buffering of changes is given by the exported snapshot. The larger the
+database, the more time it takes to perform the initial migration, and the
+longer the buffered files need to be stored.
+
+## 3. Migrate roles and schema from source to target
+
+### 3a. Dump the database roles from the source database
+
+<DumpDatabaseRoles />
+
+### 3b. Dump the database schema from the source database
+
+```sh
+pg_dump -d "$SOURCE" \
+  --format=plain \
+  --quote-all-identifiers \
+  --no-tablespaces \
+  --no-owner \
+  --no-privileges \
+  --schema-only \
+  --file=dump.sql \
+  --snapshot=$(cat /tmp/pgcopydb/snapshot)
+```
+
+- `--schema-only` is used to dump only the object definitions (schema), not
+  data.
+
+- `--snapshot` is use to specified the synchronized [snapshot][snapshot] when
+  making a dump of the database.
+
+<ExplainPgDumpFlags />
+
+### 3c. Load the roles and schema into the target database
+
+```sh
+psql -X -d "$TARGET" \
+  -v ON_ERROR_STOP=1 \
+  --echo-errors \
+  -f roles.sql \
+  -f dump.sql
+```
+
+## 4. Convert hypertables and enable Timescale features
+
+This is the ideal point to convert regular tables into hypertables. In simple
+terms, you'll want to convert the tables that contain time series data. For
+each table that's going to be converted to a Hypertable in the target database,
+run the following command:
+
+```sh
+psql -X -d "$TARGET" \
+  -v ON_ERROR_STOP=1 \
+  -c "SELECT create_hypertable('<table name>', '<time column name>')"
+```
+
+A more detailed explanation can be found in the [hypertable
+documentation][hypertable-docs].
+
+Once the table is converted, you can follow the guides to enable more Timescale
+features like [retention][retention-docs], [compression][compression-docs], and
+[data tiering][data-tiering-docs].
+
+This step is optional. Although it's strongly recommended to perform it at this
+point, it can be done after all the data has been migrated, since Hypertables
+can be created from tables with existing data.
+
+The disadvantage of converting after inserting all the data is that the table
+must be locked while the data is being partitioned. This means that no new data
+can be written into the table while the table is being converted. The larger
+the table, the longer it is going to remain locked. Conversely, inserting into
+an already converted Hypertable is a fast operation that partitions the data
+while it's being inserted.
+
+## 5. Migrate initial data from source to target
+
+Use `pgcopydb` and the snapshot ID obtained in the [step 2][step-2]
+(`/tmp/pgcopydb/snapshot`) to copy the data:
+
+```sh
+pgcopydb copy table-data \
+  --source "$SOURCE" \
+  --target "$TARGET" \
+  --snapshot $(cat /tmp/pgcopydb/snapshot) \
+  --split-tables-larger-than <same-table concurrency size threshold>  \
+  --table-jobs <jobs: usually the number of core in SOURCE>
+```
+
+## 6. Apply the replication changes
+
+With the schema and initial data loaded, you can now apply the buffered and
+live changes emitted by the replication slot:
+
+```sh
+pgcopydb stream sentinel set apply --source $SOURCE
+```
+
+The replication progress can be monitored by querying the [pg_stat_replication
+view][pg-stat-replication-docs]:
+
+```sh
+psql $SOURCE \
+  -f - <<'EOF'
+SELECT replay_lag
+FROM pg_stat_replication
+WHERE application_name='pgcopydb' AND state='streaming'
+EOF
+```
+
+- `replay_lag`: Time elapsed between flushing recent WAL locally and receiving
+  notification that this standby server has written, flushed and applied it.
+
+## 7. Promote target database as new primary
+
+To proceed with this step, you need to wait until the replication lag is
+minimized as much as possible. For instance, you can move forward when the
+`replay_lag` is close to just a few seconds. At this point, the source and
+target should be almost identical.
+
+Ideally the `replay_lag` should be as close to zero as possible. The next step
+is about stopping write traffic to process all the pending changes. The bigger
+the replay lag, the longer is going to take to apply the pending changes.
+Since ingest has to be turned off to prevent data loss, the longer it takes to
+apply the pending changes, the longer the application needs to have write
+traffic off. When deciding the moment to perform the switch-over and make target
+your new primary database, take into consideration what `replay_lag` value you're
+comfortable with and how that value translate in time without ingest.
+
+To continue with the switch-over, stop the write traffic to the source
+database. With no new data being ingested the replication slot should not emit
+any more changes and `replay_lag` should decrease considerably faster.
+
+With ingest turned off, `pgcopydb` can be instructed to stop when it's done
+applying the changes that have been generated up to this point:
+
+```sh
+pgcopydb stream sentinel set endpos --current --source $SOURCE
+```
+
+Data written to the source database after this point won't be replicated.
+That's the reason why you've stop the write traffic.
+
+Changes to sequences are not replicated. Fortunately, `pgcopydb` has a command
+to migrate them:
+
+```sh
+pgcopydb copy sequences \
+  --source "$SOURCE" \
+  --target "$TARGET" \
+  --resume \
+  --not-consistent
+```
+
+Wait until the `pgcopydb follow` process from [step 2][step-2] finishes its
+execution. The process runs until the end position is reached. If you started
+`pgcopydb follow` in the background you can bring it to the foreground with the
+`fg` command.
+
+A successful execution of the follow command should have logs stating that the
+end position has been reached and the that the process is done.
+
+```
+07:58:28.859 119 INFO   Transform reached end position 0/2ECDB58 at 0/2ECDB58
+07:58:29.108 120 INFO   Replay reached end position 0/2ECDB58 at 0/2ECDB58
+07:58:29.168 120 INFO   Replayed up to replay_lsn 0/1AB61F8, stopping
+07:58:29.473 8 INFO   Subprocesses for prefetch, transform, and catchup have now all exited
+07:58:29.534 8 INFO   Current sentinel replay_lsn is 0/2ECDB58, endpos is 0/2ECDB58
+07:58:29.534 8 INFO   Current endpos 0/2ECDB58 has been reached at 0/2ECDB58
+07:58:29.534 8 INFO   Follow mode is now done, reached replay_lsn 0/2ECDB58 with endpos 0/2ECDB58
+```
+
+If the command output was redirected to a file the messages won't be shown in
+the terminal even if you bring the process to the foreground. In this case,
+inspect the log file.
+
+Update the table statistics by running `ANALYZE` on all data:
+
+```sh
+psql $TARGET -c "ANALYZE;"
+```
+
+Finally, switch the application to use the target database and clean up the
+`pgcopydb` artifacts:
+
+```sh
+pgcopydb stream cleanup \
+  --source "$SOURCE" \
+  --target "$TARGET"
+```
+
+[pgcopydb]: https://github.com/dimitri/pgcopydb
+[install-psql]: https://www.timescale.com/blog/how-to-install-psql-on-mac-ubuntu-debian-windows/
+[install-pgcopydb]: https://github.com/dimitri/pgcopydb#installing-pgcopydb
+[replication-slot]: https://www.postgresql.org/docs/current/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS
+[snapshot]: https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-SNAPSHOT-SYNCHRONIZATION
+[step-3]: #3.-set-up-schema-and-migrate-initial-data-to-target-database
+[step-2]: #2.-set-up-a-replication-slot-and-snapshot
+[hypertable-docs]: /use-timescale/:currentVersion:/hypertables/
+[retention-docs]: /use-timescale/:currentVersion:/data-retention/
+[compression-docs]: /use-timescale/:currentVersion:/compression/
+[data-tiering-docs]: /use-timescale/:currentVersion:/data-tiering/
+[pg-stat-replication-docs]: https://postgresql.org/docs/16/interactive/monitoring-stats.html#MONITORING-PG-STAT-REPLICATION-VIEW
diff --git a/migrate/page-index/page-index.js b/migrate/page-index/page-index.js
index 949e099c1c..c813b81a2f 100644
--- a/migrate/page-index/page-index.js
+++ b/migrate/page-index/page-index.js
@@ -24,6 +24,19 @@ module.exports = [
           },
         ]
       },
+      {
+        title: "Low-downtime: live migration",
+        href: "live-migration",
+        excerpt: "Migrate a large database with low downtime",
+        children: [
+          {
+            title: "Live migration from PostgreSQL",
+            href: "live-migration-from-postgres",
+            excerpt:
+                "Migrate from PostgreSQL using live migration",
+          },
+        ],
+      },
       {
         title: "Low-downtime: Dual-write and backfill",
         href: "dual-write-and-backfill",

From 75cff102bec8a38f7615fb3c87e88c89d56fa2fb Mon Sep 17 00:00:00 2001
From: alejandrodnm <alejandrodnm@gmail.com>
Date: Thu, 26 Oct 2023 11:21:50 +0200
Subject: [PATCH 2/5] Apply suggestions from code review

Co-authored-by: James Guthrie <JamesGuthrie@users.noreply.github.com>
Signed-off-by: alejandrodnm <alejandrodnm@gmail.com>
---
 migrate/live-migration/live-migration-from-postgres.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/migrate/live-migration/live-migration-from-postgres.md b/migrate/live-migration/live-migration-from-postgres.md
index 3263e7df6a..bb728b33ec 100644
--- a/migrate/live-migration/live-migration-from-postgres.md
+++ b/migrate/live-migration/live-migration-from-postgres.md
@@ -31,6 +31,7 @@ sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh
 sudo apt install -y pgcopydb
 ```
 
+On other distributions, you can use the following installation instructions:
 - `pgcopydb`: Installation instructions can be found in the [official
   repository][install-pgcopydb]. When installing from package managers like
   `apt`, `yum`, or `dnf`, the other required tools are usually also installed
@@ -74,7 +75,7 @@ running the commands) should have enough capacity to store the files, and it
 should be actively monitored to prevent any issues that might result due to
 lack of space.
 
-Use the `pgcopydb`'s follow command to create a replication slot:
+Use `pgcopydb`'s follow command to create a replication slot:
 
 ```sh
 pgcopydb follow \
@@ -105,7 +106,7 @@ follow` command outputs many messages, if they are not redirected, using the
 terminal becomes cumbersome due to the constant pop up of messages.
 
 The `follow` command not only creates the replication slot for streaming
-changes, but also exports a [snapshot][snapshot] ID to `tmp/pgcopydb/snapshot`.
+changes, but also exports a [snapshot][snapshot] ID to `/tmp/pgcopydb/snapshot`.
 This ID can be used to migrate the data that was stored in the database prior
 to the creation of the replication slot.
 
@@ -145,7 +146,7 @@ pg_dump -d "$SOURCE" \
 - `--schema-only` is used to dump only the object definitions (schema), not
   data.
 
-- `--snapshot` is use to specified the synchronized [snapshot][snapshot] when
+- `--snapshot` is used to specified the synchronized [snapshot][snapshot] when
   making a dump of the database.
 
 <ExplainPgDumpFlags />
@@ -193,7 +194,7 @@ while it's being inserted.
 
 ## 5. Migrate initial data from source to target
 
-Use `pgcopydb` and the snapshot ID obtained in the [step 2][step-2]
+Use `pgcopydb` and the snapshot ID obtained in [step 2][step-2]
 (`/tmp/pgcopydb/snapshot`) to copy the data:
 
 ```sh

From de7e9e0a07652ef7947fbdc73d9c1d5f949d39b1 Mon Sep 17 00:00:00 2001
From: Alejandro Do Nascimento Mora <alejandrodnm@gmail.com>
Date: Thu, 26 Oct 2023 15:16:41 +0200
Subject: [PATCH 3/5] Add prepare source section

---
 migrate/index.md                              |   5 +-
 migrate/live-migration/index.md               |   9 ++
 .../live-migration-from-postgres.md           | 131 ++++++++++++++----
 migrate/page-index/page-index.js              |   2 +-
 4 files changed, 117 insertions(+), 30 deletions(-)
 create mode 100644 migrate/live-migration/index.md

diff --git a/migrate/index.md b/migrate/index.md
index ae1d89fafc..a614c5a60b 100644
--- a/migrate/index.md
+++ b/migrate/index.md
@@ -35,8 +35,9 @@ migrating from PostgreSQL.
 
 If you're using PostgreSQL, you may also have heard of logical replication
 being the recommended strategy for migrations with low downtime. Currently,
-TimescaleDB doesn't work with logical replication, so this is not a viable
-option, but we are actively working on making this possible.
+TimescaleDB supports logical replication only from sources that are using
+PostgreSQL without Timescale. There's active work being made for supporting
+Timescale sources.
 
 If you're migrating from something other than PostgreSQL, and don't want to use
 the dual-write and backfill approach, then the easiest way to move your data to
diff --git a/migrate/live-migration/index.md b/migrate/live-migration/index.md
new file mode 100644
index 0000000000..65ae44dc01
--- /dev/null
+++ b/migrate/live-migration/index.md
@@ -0,0 +1,9 @@
+---
+title: Live migration
+excerpt: Migrate an entire database with low downtime
+products: [cloud]
+keywords: [migration, low-downtime, backup]
+tags: [recovery, logical backup, replication]
+---
+
+# Live migration
diff --git a/migrate/live-migration/live-migration-from-postgres.md b/migrate/live-migration/live-migration-from-postgres.md
index bb728b33ec..cca9b2dd66 100644
--- a/migrate/live-migration/live-migration-from-postgres.md
+++ b/migrate/live-migration/live-migration-from-postgres.md
@@ -18,6 +18,12 @@ This document provides detailed step-by-step instructions to migrate data using
 [pgcopydb][pgcopydb] to perform a live migration from a source database which
 is using PostgreSQL to Timescale.
 
+It's suggested to provision a dedicated instance to run the migration steps.
+Ideally an AWS EC2 instance that's in the same region as the Timescale target
+service. For a 10K transactions/s ingestion load, with the assumption of
+historical data copy takes 2 days, the recommend is 4 CPU with 4 to 8 GiB of
+RAM and a 1.2 TiB of storage.
+
 Before beginning the migration process, ensure that tools `psql`, `pg_dump`,
 `pg_dumpall`, and `pgcopydb` are installed and available on the system that
 performs the migration.
@@ -32,6 +38,7 @@ sudo apt install -y pgcopydb
 ```
 
 On other distributions, you can use the following installation instructions:
+
 - `pgcopydb`: Installation instructions can be found in the [official
   repository][install-pgcopydb]. When installing from package managers like
   `apt`, `yum`, or `dnf`, the other required tools are usually also installed
@@ -44,24 +51,60 @@ On other distributions, you can use the following installation instructions:
 
 <SourceTargetNote />
 
-<!-- TODO:  what about schema changes are they allowed? What other conditions
-are there? -->
-
 In detail, the migration process consists of the following steps:
 
 1. Set up a target database instance in Timescale.
+1. Prepare the source database for the live migration.
 1. Set up a replication slot and snapshot.
 1. Migrate roles and schema from source to target.
-1. Convert hypertables and enable Timescale features.
+1. Enable hypertables.
 1. Migrate initial data from source to target.
-1. Apply replication changes.
+1. Apply the replicated changes from source.
 1. Promote target database as new primary.
 
 <GettingHelp />
 
 <StepOne />
 
-## 2. Set up a replication slot and snapshot
+## 2. Prepare the source database for the live migration
+
+It's important to ensure that the `old_snapshot_threshold` value is set to the
+default value of `-1` in your source database. This prevents PostgreSQL from
+treating the data in a snapshot as outdated. If this value is set other than
+`-1`, it might affect the existing data migration step.
+
+To check the current value of `old_snapshot_threshold`, run the command:
+
+```sh
+psql -X -d $SOURCE -c 'show old_snapshot_threshold'
+```
+
+If the query returns something other than `-1`, run the following command
+to set it up:
+
+```sh
+psql -X -d $SOURCE -c 'alter system set old_snapshot_threshold=-1'
+```
+
+Next, you should set `wal_level` to `logical` so that the write-ahead log (WAL)
+records information that is needed for logical decoding.
+
+To check the current value of  `wal_level`, run the command:
+
+```sh
+psql -X -d $SOURCE -c 'show wal_level'
+```
+
+If the query returns something other than `logical`, run the following command
+to set it up:
+
+```sh
+psql -X -d $SOURCE -c 'alter system set wal_level=logical'
+```
+
+You should restart your database for the changes to take effect.
+
+## 3. Set up a replication slot and snapshot
 
 The [replication slot][replication-slot] forms the backbone of the replication
 strategy.
@@ -75,7 +118,30 @@ running the commands) should have enough capacity to store the files, and it
 should be actively monitored to prevent any issues that might result due to
 lack of space.
 
-Use `pgcopydb`'s follow command to create a replication slot:
+Before starting, there's an important caveat. To replicate `DELETE` and
+`UPDATE` operations, the source table must either have a primary key or
+`REPLICA IDENTITY` set. Replica identity assists logical decoding in
+identifying the rows being modified. It defaults to using the table's primary
+key.
+
+If a table doesn't have a primary key, you'll have to manually set the replica
+identity. One option is to use a unique, non-partial, non-deferrable index that
+includes only columns marked as `NOT NULL`. This can be set as the replica
+identity:
+
+```sql
+ALTER TABLE {table_name} REPLICA IDENTITY USING INDEX {_index_name}
+```
+
+If there's no primary key or viable unique index to use, you'll have to set it
+to full:
+
+```sql
+ALTER TABLE {table_name} REPLICA IDENTITY FULL
+```
+
+Once you're sure all your tables are properly configured for replication, use
+`pgcopydb`'s follow command to create a replication slot:
 
 ```sh
 pgcopydb follow \
@@ -114,22 +180,20 @@ to the creation of the replication slot.
 the snapshot. Synchronized snapshots are necessary when two or more sessions
 need to see identical content in the database.
 
-<!-- TODO:  mention the housekeeping script that clears the buffered file s -->
-
 Before the stream of changes can be applied, the schema and data that existed
 prior to the creation of the replication slot must be migrated ([step
-3][step-3]). The point that marks the beginning of the replication and
+4][step-4]). The point that marks the beginning of the replication and
 buffering of changes is given by the exported snapshot. The larger the
 database, the more time it takes to perform the initial migration, and the
 longer the buffered files need to be stored.
 
-## 3. Migrate roles and schema from source to target
+## 4. Migrate roles and schema from source to target
 
-### 3a. Dump the database roles from the source database
+### 4a. Dump the database roles from the source database
 
 <DumpDatabaseRoles />
 
-### 3b. Dump the database schema from the source database
+### 4b. Dump the database schema from the source database
 
 ```sh
 pg_dump -d "$SOURCE" \
@@ -151,7 +215,7 @@ pg_dump -d "$SOURCE" \
 
 <ExplainPgDumpFlags />
 
-### 3c. Load the roles and schema into the target database
+### 4c. Load the roles and schema into the target database
 
 ```sh
 psql -X -d "$TARGET" \
@@ -161,7 +225,7 @@ psql -X -d "$TARGET" \
   -f dump.sql
 ```
 
-## 4. Convert hypertables and enable Timescale features
+## 5. Enable hypertables
 
 This is the ideal point to convert regular tables into hypertables. In simple
 terms, you'll want to convert the tables that contain time series data. For
@@ -178,8 +242,7 @@ A more detailed explanation can be found in the [hypertable
 documentation][hypertable-docs].
 
 Once the table is converted, you can follow the guides to enable more Timescale
-features like [retention][retention-docs], [compression][compression-docs], and
-[data tiering][data-tiering-docs].
+features like [retention][retention-docs] and [compression][compression-docs].
 
 This step is optional. Although it's strongly recommended to perform it at this
 point, it can be done after all the data has been migrated, since Hypertables
@@ -192,21 +255,35 @@ the table, the longer it is going to remain locked. Conversely, inserting into
 an already converted Hypertable is a fast operation that partitions the data
 while it's being inserted.
 
-## 5. Migrate initial data from source to target
+## 6. Migrate initial data from source to target
 
-Use `pgcopydb` and the snapshot ID obtained in [step 2][step-2]
-(`/tmp/pgcopydb/snapshot`) to copy the data:
+Use pgcopydb and the snapshot ID obtained in [step 3][step-3]
+(/tmp/pgcopydb/snapshot) to copy the data. This action can take a significant
+amount of time, up to several days, depending on the size of the source
+database. You must wait until it finishes before moving on to the next step. To
+start copying data, execute the following command:
 
 ```sh
 pgcopydb copy table-data \
   --source "$SOURCE" \
   --target "$TARGET" \
   --snapshot $(cat /tmp/pgcopydb/snapshot) \
-  --split-tables-larger-than <same-table concurrency size threshold>  \
-  --table-jobs <jobs: usually the number of core in SOURCE>
+  --split-tables-larger-than '10 GB'  \
+  --table-jobs 8
 ```
 
-## 6. Apply the replication changes
+- `--snapshot` specifies the synchronized [snapshot][snapshot] to use when
+  retrieving the data. It must be set up to the snapshot exported in
+  [step-3][step-3], that guarantees that the only data copied is the one that
+  existed on the database prior to the creation of the replication slot.
+
+- `--split-tables-larger-than` defines the table size threshold after which
+  more than once process is going be use at the same time to process a single
+  source table.
+
+- `--table-jobs` is the number of concurrent `COPY` jobs to run.
+
+## 7. Apply the replicated changes from source
 
 With the schema and initial data loaded, you can now apply the buffered and
 live changes emitted by the replication slot:
@@ -230,7 +307,7 @@ EOF
 - `replay_lag`: Time elapsed between flushing recent WAL locally and receiving
   notification that this standby server has written, flushed and applied it.
 
-## 7. Promote target database as new primary
+## 8. Promote target database as new primary
 
 To proceed with this step, you need to wait until the replication lag is
 minimized as much as possible. For instance, you can move forward when the
@@ -271,7 +348,7 @@ pgcopydb copy sequences \
   --not-consistent
 ```
 
-Wait until the `pgcopydb follow` process from [step 2][step-2] finishes its
+Wait until the `pgcopydb follow` process from [step 3][step-3] finishes its
 execution. The process runs until the end position is reached. If you started
 `pgcopydb follow` in the background you can bring it to the foreground with the
 `fg` command.
@@ -313,8 +390,8 @@ pgcopydb stream cleanup \
 [install-pgcopydb]: https://github.com/dimitri/pgcopydb#installing-pgcopydb
 [replication-slot]: https://www.postgresql.org/docs/current/logicaldecoding-explanation.html#LOGICALDECODING-REPLICATION-SLOTS
 [snapshot]: https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-SNAPSHOT-SYNCHRONIZATION
-[step-3]: #3.-set-up-schema-and-migrate-initial-data-to-target-database
-[step-2]: #2.-set-up-a-replication-slot-and-snapshot
+[step-4]: #4.-set-up-schema-and-migrate-initial-data-to-target-database
+[step-3]: #3.-set-up-a-replication-slot-and-snapshot
 [hypertable-docs]: /use-timescale/:currentVersion:/hypertables/
 [retention-docs]: /use-timescale/:currentVersion:/data-retention/
 [compression-docs]: /use-timescale/:currentVersion:/compression/
diff --git a/migrate/page-index/page-index.js b/migrate/page-index/page-index.js
index c813b81a2f..c13ef99926 100644
--- a/migrate/page-index/page-index.js
+++ b/migrate/page-index/page-index.js
@@ -25,7 +25,7 @@ module.exports = [
         ]
       },
       {
-        title: "Low-downtime: live migration",
+        title: "Low-downtime: Live migration",
         href: "live-migration",
         excerpt: "Migrate a large database with low downtime",
         children: [

From 6aefc15e9b61fea827d9eef8e5f77651d84ad600 Mon Sep 17 00:00:00 2001
From: James Guthrie <jguthrie@timescale.com>
Date: Mon, 30 Oct 2023 11:52:39 +0100
Subject: [PATCH 4/5] Minor tweaks and clarification

---
 .../live-migration-from-postgres.md           | 108 ++++++++++--------
 1 file changed, 62 insertions(+), 46 deletions(-)

diff --git a/migrate/live-migration/live-migration-from-postgres.md b/migrate/live-migration/live-migration-from-postgres.md
index cca9b2dd66..9a1e27be4b 100644
--- a/migrate/live-migration/live-migration-from-postgres.md
+++ b/migrate/live-migration/live-migration-from-postgres.md
@@ -18,11 +18,11 @@ This document provides detailed step-by-step instructions to migrate data using
 [pgcopydb][pgcopydb] to perform a live migration from a source database which
 is using PostgreSQL to Timescale.
 
-It's suggested to provision a dedicated instance to run the migration steps.
+You should provision a dedicated instance to run the migration steps from.
 Ideally an AWS EC2 instance that's in the same region as the Timescale target
-service. For a 10K transactions/s ingestion load, with the assumption of
-historical data copy takes 2 days, the recommend is 4 CPU with 4 to 8 GiB of
-RAM and a 1.2 TiB of storage.
+service. For an ingestion load of 10'000 transactions/s, and assuming that the
+historical data copy takes 2 days, we recommend 4 CPUs with 4 to 8 GiB of RAM
+and 1.2 TiB of storage.
 
 Before beginning the migration process, ensure that tools `psql`, `pg_dump`,
 `pg_dumpall`, and `pgcopydb` are installed and available on the system that
@@ -79,13 +79,17 @@ To check the current value of `old_snapshot_threshold`, run the command:
 psql -X -d $SOURCE -c 'show old_snapshot_threshold'
 ```
 
-If the query returns something other than `-1`, run the following command
-to set it up:
+If the query returns something other than `-1`, you must change it.
+
+If you have a superuser on a self-hosted database, run the following command:
 
 ```sh
 psql -X -d $SOURCE -c 'alter system set old_snapshot_threshold=-1'
 ```
 
+Otherwise, if you are using a managed service, use your cloud provider's
+configuration mechanism to set `old_snapshot_threshold` to `-1`.
+
 Next, you should set `wal_level` to `logical` so that the write-ahead log (WAL)
 records information that is needed for logical decoding.
 
@@ -95,14 +99,19 @@ To check the current value of  `wal_level`, run the command:
 psql -X -d $SOURCE -c 'show wal_level'
 ```
 
-If the query returns something other than `logical`, run the following command
-to set it up:
+If the query returns something other than `logical`, you must change it.
+
+If you have a superuser on a self-hosted database, run the following command:
 
 ```sh
 psql -X -d $SOURCE -c 'alter system set wal_level=logical'
 ```
 
-You should restart your database for the changes to take effect.
+Otherwise, if you are using a managed service, use your cloud provider's
+configuration mechanism to set `wal_level` to `logical`.
+
+Restart your database for the changes to take effect, and verify that the
+settings are reflected in your database.
 
 ## 3. Set up a replication slot and snapshot
 
@@ -133,8 +142,12 @@ identity:
 ALTER TABLE {table_name} REPLICA IDENTITY USING INDEX {_index_name}
 ```
 
-If there's no primary key or viable unique index to use, you'll have to set it
-to full:
+If there's no primary key or viable unique index to use, you'll have to set
+`REPLICA IDENTITY` to `FULL`. If you are expecting a large number of `UPDATE`
+or `DELETE` operations on the table we **do not recommend** using `FULL`. For
+each `UPDATE` or `DELETE` statement, Postgres will have to read the whole table
+to find all matching rows, which will result in significantly slower
+replication.
 
 ```sql
 ALTER TABLE {table_name} REPLICA IDENTITY FULL
@@ -244,16 +257,13 @@ documentation][hypertable-docs].
 Once the table is converted, you can follow the guides to enable more Timescale
 features like [retention][retention-docs] and [compression][compression-docs].
 
-This step is optional. Although it's strongly recommended to perform it at this
-point, it can be done after all the data has been migrated, since Hypertables
-can be created from tables with existing data.
+<Highlight type="note">
+This step is optional, but we strongly recommend that you perform it now.
 
-The disadvantage of converting after inserting all the data is that the table
-must be locked while the data is being partitioned. This means that no new data
-can be written into the table while the table is being converted. The larger
-the table, the longer it is going to remain locked. Conversely, inserting into
-an already converted Hypertable is a fast operation that partitions the data
-while it's being inserted.
+While it is possible to convert a table to a hypertable after the migration is
+complete, that requires effectively rewriting all data in the table, which
+locks the table for the duration of the operation and prevents writes.
+</Highlight>
 
 ## 6. Migrate initial data from source to target
 
@@ -274,7 +284,7 @@ pgcopydb copy table-data \
 
 - `--snapshot` specifies the synchronized [snapshot][snapshot] to use when
   retrieving the data. It must be set up to the snapshot exported in
-  [step-3][step-3], that guarantees that the only data copied is the one that
+  [step 3][step-3], that guarantees that the only data copied is the one that
   existed on the database prior to the creation of the replication slot.
 
 - `--split-tables-larger-than` defines the table size threshold after which
@@ -309,33 +319,40 @@ EOF
 
 ## 8. Promote target database as new primary
 
-To proceed with this step, you need to wait until the replication lag is
-minimized as much as possible. For instance, you can move forward when the
-`replay_lag` is close to just a few seconds. At this point, the source and
-target should be almost identical.
+In this step you will switch production traffic over to the new database. In
+order to achieve transactional consistency, your application must stop writing
+data to the source database, and all writes should be flushed from the source
+to the target before your application starts writing to the target database.
+
+This means that your application will experience partial downtime (in writes).
+The duration of this downtime is proportional to the steady-state `replay_lag`.
+Ideally, this is on the order of seconds or minutes.
+
+Before you proceed, ensure that you have checked the current value of
+`replay_lag`, and that you are comfortable with a partial downtime of this
+duration.
+
+Update the table statistics by running `ANALYZE` on all data:
+
+```sh
+psql $TARGET -c "ANALYZE;"
+```
 
-Ideally the `replay_lag` should be as close to zero as possible. The next step
-is about stopping write traffic to process all the pending changes. The bigger
-the replay lag, the longer is going to take to apply the pending changes.
-Since ingest has to be turned off to prevent data loss, the longer it takes to
-apply the pending changes, the longer the application needs to have write
-traffic off. When deciding the moment to perform the switch-over and make target
-your new primary database, take into consideration what `replay_lag` value you're
-comfortable with and how that value translate in time without ingest.
+<Highlight type="note">
+Application downtime begins here
+</Highlight>
 
-To continue with the switch-over, stop the write traffic to the source
-database. With no new data being ingested the replication slot should not emit
-any more changes and `replay_lag` should decrease considerably faster.
+Stop the write traffic to the source database. With no new data being ingested,
+the `replay_lag` should decrease considerably faster.
 
-With ingest turned off, `pgcopydb` can be instructed to stop when it's done
-applying the changes that have been generated up to this point:
+Instruct `pgcopydb` to stop when it's done applying the changes that have been
+generated up to this point:
 
 ```sh
 pgcopydb stream sentinel set endpos --current --source $SOURCE
 ```
 
 Data written to the source database after this point won't be replicated.
-That's the reason why you've stop the write traffic.
 
 Changes to sequences are not replicated. Fortunately, `pgcopydb` has a command
 to migrate them:
@@ -354,7 +371,7 @@ execution. The process runs until the end position is reached. If you started
 `fg` command.
 
 A successful execution of the follow command should have logs stating that the
-end position has been reached and the that the process is done.
+end position has been reached and the that the process is done:
 
 ```
 07:58:28.859 119 INFO   Transform reached end position 0/2ECDB58 at 0/2ECDB58
@@ -370,14 +387,13 @@ If the command output was redirected to a file the messages won't be shown in
 the terminal even if you bring the process to the foreground. In this case,
 inspect the log file.
 
-Update the table statistics by running `ANALYZE` on all data:
+Switch your application to use the target database, and start accepting writes again.
 
-```sh
-psql $TARGET -c "ANALYZE;"
-```
+<Highlight type="note">
+Application downtime ends here
+</Highlight>
 
-Finally, switch the application to use the target database and clean up the
-`pgcopydb` artifacts:
+Finally, and clean up the `pgcopydb` artifacts:
 
 ```sh
 pgcopydb stream cleanup \

From f31f2e4195aebca6861c0a7a626dca78c2f0d48e Mon Sep 17 00:00:00 2001
From: James Guthrie <jguthrie@timescale.com>
Date: Mon, 30 Oct 2023 12:43:17 +0100
Subject: [PATCH 5/5] Populate live migration index page

---
 migrate/live-migration/index.md | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/migrate/live-migration/index.md b/migrate/live-migration/index.md
index 65ae44dc01..c4aba0855d 100644
--- a/migrate/live-migration/index.md
+++ b/migrate/live-migration/index.md
@@ -7,3 +7,39 @@ tags: [recovery, logical backup, replication]
 ---
 
 # Live migration
+
+Live migration is a migration strategy to move a large amount of data
+(100&nbsp;GB-10&nbsp;TB+) with low downtime (on the order of minutes of
+downtime). It is significantly more complicated to execute than a migration
+with downtime using [pg_dump/restore][pg-dump-and-restore], but supports more
+use-cases and has less requirements than the [dual-write and backfill] method.
+
+<SourceTargetNote />
+
+Live migration leverages Postgres' built-in replication functionality to
+provide a seamless migration with very little application downtime.
+
+Roughly, it consists of four steps:
+
+1. Prepare and create replication slot in source database.
+2. Copy schema from source to target, optionally enabling hypertables.
+3. Copy data from source to target while capturing changes.
+4. Apply captured changes from source to target.
+
+Currently live migration only supports migrating from PostgreSQL, but we are
+actively working on supporting TimescaleDB.
+
+Live migration works well when:
+- Large, busy tables have primary keys, or don't have many `UPDATE` or
+  `DELETE` statements.
+- The insert workload does not exceed 20'000 rows per second, and
+  inserts are batched. If your application exceeds this, you should use
+  the [dual-write and backfill] migration method.
+
+For more information, consult the step-by-step migration guide:
+
+- [Live migration from PostgreSQL][from-postgres]
+
+[from-postgres]: /migrate/:currentVersion:/live-migration/live-migration-from-postgres/
+[pg-dump-and-restore]: /migrate/:currentVersion:/pg-dump-and-restore/
+[dual-write and backfill]: /migrate/:currentVersion:/dual-write-and-backfill/
\ No newline at end of file