From e9686483ac70985714e54d0cc9f33f1dbddaf7a4 Mon Sep 17 00:00:00 2001 From: Alejandro Do Nascimento Mora Date: Tue, 24 Oct 2023 23:11:00 +0200 Subject: [PATCH 1/5] Add live migration from postgres --- .../dual-write-from-postgres.md | 1 + .../live-migration-from-postgres.md | 321 ++++++++++++++++++ migrate/page-index/page-index.js | 13 + 3 files changed, 335 insertions(+) create mode 100644 migrate/live-migration/live-migration-from-postgres.md diff --git a/migrate/dual-write-and-backfill/dual-write-from-postgres.md b/migrate/dual-write-and-backfill/dual-write-from-postgres.md index 4eb1761cd2..73d2f9db96 100644 --- a/migrate/dual-write-and-backfill/dual-write-from-postgres.md +++ b/migrate/dual-write-and-backfill/dual-write-from-postgres.md @@ -18,6 +18,7 @@ import SourceTargetNote from "versionContent/_partials/_migrate_source_target_no import DumpDatabaseRoles from "versionContent/_partials/_migrate_dual_write_dump_database_roles.mdx"; import Step6eTurnOnCompressionPolicies from "versionContent/_partials/_migrate_dual_write_6e_turn_on_compression_policies.mdx"; import Step6aThroughc from "versionContent/_partials/_migrate_dual_write_6a_through_c.mdx"; +import ExplainPgDumpFlags from "versionContent/_partials/_migrate_explain_pg_dump_flags.mdx"; # Dual-write and backfill from PostgreSQL database diff --git a/migrate/live-migration/live-migration-from-postgres.md b/migrate/live-migration/live-migration-from-postgres.md new file mode 100644 index 0000000000..3263e7df6a --- /dev/null +++ b/migrate/live-migration/live-migration-from-postgres.md @@ -0,0 +1,321 @@ +--- +title: Migrate from PostgreSQL using live migration +excerpt: Migrate from a PostgreSQL database using the low-downtime live migration method +products: [cloud] +keywords: [migration, low-downtime] +tags: [migration, logical backup, replication] +--- + +import GettingHelp from "versionContent/_partials/_migrate_dual_write_backfill_getting_help.mdx"; +import SourceTargetNote from "versionContent/_partials/_migrate_source_target_note.mdx"; +import StepOne from "versionContent/_partials/_migrate_dual_write_step1.mdx"; +import DumpDatabaseRoles from "versionContent/_partials/_migrate_dual_write_dump_database_roles.mdx"; +import ExplainPgDumpFlags from "versionContent/_partials/_migrate_explain_pg_dump_flags.mdx"; + +# Live migration from PostgreSQL database with pgcopydb + +This document provides detailed step-by-step instructions to migrate data using +[pgcopydb][pgcopydb] to perform a live migration from a source database which +is using PostgreSQL to Timescale. + +Before beginning the migration process, ensure that tools `psql`, `pg_dump`, +`pg_dumpall`, and `pgcopydb` are installed and available on the system that +performs the migration. + +For Debian and Ubuntu systems, you can install all the tools with: + +``` +sudo apt update +sudo apt install -y postgresql-common +sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh +sudo apt install -y pgcopydb +``` + +- `pgcopydb`: Installation instructions can be found in the [official + repository][install-pgcopydb]. When installing from package managers like + `apt`, `yum`, or `dnf`, the other required tools are usually also installed + as dependencies to `pgcopydb`. + +- `psql`, `pg_dump`, and `pg_dumpall`: These can be installed by following the + instructions in the [How to Install psql on Mac, Ubuntu, Debian, Windows][install-psql] + blog post. Although the instructions specifically mention `psql`, following + them also installs `pg_dump` and `pg_dumpall`. + + + + + +In detail, the migration process consists of the following steps: + +1. Set up a target database instance in Timescale. +1. Set up a replication slot and snapshot. +1. Migrate roles and schema from source to target. +1. Convert hypertables and enable Timescale features. +1. Migrate initial data from source to target. +1. Apply replication changes. +1. Promote target database as new primary. + + + + + +## 2. Set up a replication slot and snapshot + +The [replication slot][replication-slot] forms the backbone of the replication +strategy. + +> A slot represents a stream of changes that can be replayed to a client in the +order they were made on the origin server. + +The stream of changes emitted by the slot are buffered into disk until they are +applied on the target. The instance used to orchestrate the migration (the one +running the commands) should have enough capacity to store the files, and it +should be actively monitored to prevent any issues that might result due to +lack of space. + +Use the `pgcopydb`'s follow command to create a replication slot: + +```sh +pgcopydb follow \ + --source "$SOURCE" \ + --target "$TARGET" \ + --fail-fast \ + --plugin wal2json +``` + + + +This command is going to be active during most of the migration process. You +can run it on a separate terminal instance, or start it in the background. To +start it in the background append `> pgcopydb_follow.log 2>&1 &` to the +command. For example: + +```sh +pgcopydb follow \ + --source "$SOURCE" \ + --target "$TARGET" \ + --fail-fast \ + --plugin wal2json > pgcopydb_follow.log 2>&1 & +``` + +The `> pgcopydb_follow.log 2>&1` part redirects all the messages to the +`pgcopydb_follow.log` file, this is optional but recommended. The `pgcopydb +follow` command outputs many messages, if they are not redirected, using the +terminal becomes cumbersome due to the constant pop up of messages. + +The `follow` command not only creates the replication slot for streaming +changes, but also exports a [snapshot][snapshot] ID to `tmp/pgcopydb/snapshot`. +This ID can be used to migrate the data that was stored in the database prior +to the creation of the replication slot. + +> A snapshot determines which data is visible to the transaction that is using +the snapshot. Synchronized snapshots are necessary when two or more sessions +need to see identical content in the database. + + + +Before the stream of changes can be applied, the schema and data that existed +prior to the creation of the replication slot must be migrated ([step +3][step-3]). The point that marks the beginning of the replication and +buffering of changes is given by the exported snapshot. The larger the +database, the more time it takes to perform the initial migration, and the +longer the buffered files need to be stored. + +## 3. Migrate roles and schema from source to target + +### 3a. Dump the database roles from the source database + + + +### 3b. Dump the database schema from the source database + +```sh +pg_dump -d "$SOURCE" \ + --format=plain \ + --quote-all-identifiers \ + --no-tablespaces \ + --no-owner \ + --no-privileges \ + --schema-only \ + --file=dump.sql \ + --snapshot=$(cat /tmp/pgcopydb/snapshot) +``` + +- `--schema-only` is used to dump only the object definitions (schema), not + data. + +- `--snapshot` is use to specified the synchronized [snapshot][snapshot] when + making a dump of the database. + + + +### 3c. Load the roles and schema into the target database + +```sh +psql -X -d "$TARGET" \ + -v ON_ERROR_STOP=1 \ + --echo-errors \ + -f roles.sql \ + -f dump.sql +``` + +## 4. Convert hypertables and enable Timescale features + +This is the ideal point to convert regular tables into hypertables. In simple +terms, you'll want to convert the tables that contain time series data. For +each table that's going to be converted to a Hypertable in the target database, +run the following command: + +```sh +psql -X -d "$TARGET" \ + -v ON_ERROR_STOP=1 \ + -c "SELECT create_hypertable('', '