From a6ff77697258b1df839d3b437e532a1c7043949c Mon Sep 17 00:00:00 2001 From: Takayuk Shimizukawa Date: Tue, 24 Jul 2018 08:59:01 +0900 Subject: [PATCH] restructure documentation --- AUTHORS.rst | 6 +- CHANGES.rst | 8 +- README.rst | 323 +------------------------------------------------- doc/basic.rst | 33 ++++++ doc/index.rst | 39 ++++-- doc/refs.rst | 234 ++++++++++++++++++++++++++++++++++++ doc/usage.rst | 93 --------------- setup.py | 2 +- 8 files changed, 308 insertions(+), 430 deletions(-) create mode 100644 doc/basic.rst create mode 100644 doc/refs.rst delete mode 100644 doc/usage.rst diff --git a/AUTHORS.rst b/AUTHORS.rst index 29bba13..d4535e6 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -2,6 +2,8 @@ AUTHORS ======= -* Takayuki Shimizukawa - +* Takayuki Shimizukawa +* Kosei Kitahara +* Evandro Myller +* Maxime Vdb diff --git a/CHANGES.rst b/CHANGES.rst index 57ffbb3..bb5ef85 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,8 +4,10 @@ CHANGES 0.9 (Unreleased) ---------------- -* Drop support for Django 1.8, 1.9 and 1.10 - +* #35: Drop support for Django 1.8, 1.9 and 1.10. +* #40: Support Django 2.0. +* #42: Support DISTKEY. Thanks to Benjy Weinberger. +* Documentation: http://django-redshift-backend.rtfd.io/ 0.8.1 (2018-06-19) ------------------ @@ -17,7 +19,7 @@ CHANGES Incompatible Changes: -* #23,#10 Redshift support time zones in time stamps for migration +* #23,#10: Redshift support time zones in time stamps for migration **IMPORTANT**: With this change, the newly created DateTimeField column will be timestamp diff --git a/README.rst b/README.rst index 8e5816a..d82302e 100644 --- a/README.rst +++ b/README.rst @@ -2,61 +2,10 @@ Redshift database backend for Django ==================================== -This product is tested with: +This is a Redshift database backend for Django. -* python-2.7, 3.5, 3.6 -* django-1.11, 2.0 - - -Differences from postgres_psycopg2 backend -========================================== - -Type mapping: - -* 'integer identity(1, 1)' for AutoField -* 'bigint identity(1, 1)' for BigAutoField -* 'timestamp with time zone' for DateTimeField -* 'varchar(max)' for TextField -* 'varchar(32)' for UUIDField -* Possibility to multiply VARCHAR length to support utf-8 string, using - `REDSHIFT_VARCHAR_LENGTH_MULTIPLIER` setting. - -Stop using: - -* RETURNING (single insert and bulk insert) -* SELECT FOR UPDATE -* SELECT DISTINCT ON -* SET CONSTRAINTS -* INDEX -* DEFERRABLE INITIALLY DEFERRED -* CONSTRAINT -* CHECK -* DROP DEFAULT - -To support migration: - -* To add column to existent table on Redshift, column must be nullable -* To support modify column, add new column -> data migration -> drop old column -> rename - -Please note that the migration support for redshift is not perfect yet. - -Note and Limitation --------------------- - -Amazon Redshift doesn't support RETURNING, so ``last_insert_id`` method retrieve MAX(pk) after insertion as a workaround. - -refs: - -* http://stackoverflow.com/q/19428860 -* http://stackoverflow.com/q/25638539 - -In some case, MAX(pk) workaround does not work correctly. -Bulk insertion makes non-contiguous IDs like: 1, 4, 7, 10, ... -and single insertion after such bulk insertion generates strange id value like 2 (smallest non-used id). - - -SETTINGS -======== +Django settings +=============== ENGINE for DATABASES is 'django_redshift_backend'. You can set the name in your settings.py as:: @@ -71,276 +20,14 @@ ENGINE for DATABASES is 'django_redshift_backend'. You can set the name in your } } -REDSHIFT_VARCHAR_LENGTH_MULTIPLIER: - Possibility to multiply VARCHAR length to support utf-8 string. Default is 1. - -Using sortkey ---------------------------------- - -There is built-in support for this option for Django >= 1.9. To use `sortkey`, simply define an `ordering` on the model meta as follow:: - - class MyModel(models.Model): - ... - - class Meta: - ordering = ['col2'] - -N.B.: there is no validation of this option, instead we let Redshift validate it for you. Be sure to refer to the `documentation `_. - -Using distkey ---------------------------------- - -There is built-in support for this option for Django >= 1.11. To use `distkey`, define an index on the model -meta with the custom index type `django_redshift_backend.distkey.DistKey` with `fields` naming a single field:: - - class MyModel(models.Model): - ... - - class Meta: - indexes = [DistKey(fields=['customer_id'])] - -Redshift doesn't have conventional indexes, and we don't generate SQL for them. We merely use -`indexes` as a convenient place in the Meta to identify the `distkey`. - -You will likely encounter the following complication: - -Inlining Index Migrations -~~~~~~~~~~~~~~~~~~~~~~~~~ -Django's `makemigrations` generates a migration file that first applies a `CreateModel` operation without the -`indexes` option, and then adds the index in a separate `AddIndex` operation. - -However Redshift requires that the `distkey` be specified at table creation. As a result, you may need to -manually edit your migration files to move the index creation into the initial `CreateModel`. - -That is, to go from:: - - operations = [ - ... - migrations.CreateModel( - name='FactTable', - fields=[ - ('distkeycol', models.CharField()), - ('measure1', models.IntegerField()), - ('measure2', models.IntegerField()) - ... - ] - ), - ... - migrations.AddIndex( - model_name='facttable', - index=django_redshift_backend.distkey.DistKey(fields=['distkeycol'], name='...'), - ), - ] - -To:: - - operations = [ - ... - migrations.CreateModel( - name='FactTable', - fields=[ - ('distkeycol', models.CharField()), - ('measure1', models.IntegerField()), - ('measure2', models.IntegerField()) - ... - ], - options={ - 'indexes': [django_redshift_backend.distkey.DistKey(fields=['distkeycol'], name='...')], - }, - ), - ... - ] - - -Inlining ForeignKey Migrations -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is common to distribute fact tables on a foreign key column referencing the primary key of a dimension table. - -In this case you may also encounter the following added complication: - -Django's `makemigrations` generates a migration file that first applies a `CreateModel` operation without the -`ForeignKey` column, and then adds the `ForeignKey` column in a separate `AddField` operation. It does this to -avoid attempts to create foreign key constraints against tables that haven't been created yet. - -However Redshift requires that the `distkey` be specified at table creation. As a result, you may need to -manually edit your migration files to move the ForeignKey column into the initial `CreateModel`, while also -ensuring that the referenced table appears *before* the referencing table in the file. - -That is, to go from:: - - operations = [ - ... - migrations.CreateModel( - name='FactTable', - fields=[ - ('measure1', models.IntegerField()), - ('measure2', models.IntegerField()) - ... - ] - ), - ... - migrations.CreateModel( - name='Dimension1Table', - fields=[ - ... - ] - ), - ... - migrations.AddField( - model_name='facttable', - name='dim1', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='myapp.Dimension1Table'), - ), - ... - ] - -To:: +For more information, please refer: http://django-redshift-backend.rtfd.io/ - operations = [ - migrations.CreateModel( - name='Dimension1Table', - fields=[ - ... - ] - ), - ... - migrations.CreateModel( - name='FactTable', - fields=[ - ('measure1', models.IntegerField()), - ('measure2', models.IntegerField()), - ('dim1', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='myapp.Dimension1Table')) - ... - ] - ), - ... - ] - - - -TESTING -======= - -Testing this package requires: - -* tox-1.8 or later -* virtualenv-15.0.1 or later -* pip-8.1.1 or later LICENSE ======= Apache Software License -CHANGES -======= - -0.9 (Unreleased) ----------------- - -* #35: Drop support for Django 1.8, 1.9 and 1.10. -* #40: Support Django 2.0. -* #42: Support DISTKEY. Thanks to Benjy Weinberger. - -0.8.1 (2018-06-19) ------------------- - -* #38: Fix 0.8 doesn't compatible with Python 2. Thanks to Benjy Weinberger. - -0.8 (2018-06-01) ----------------- - -Incompatible Changes: - -* #23,#10: Redshift support time zones in time stamps for migration - - **IMPORTANT**: - With this change, the newly created DateTimeField column will be timestamp - with timezone (TIMESTAMPTZ) by migration. Therefore, the existing - DateTimeField and the new DateTimeField will have different data types as a - redshift schema column type. - There are no migration feature by django-redshift-backend. - see also: https://github.com/shimizukawa/django-redshift-backend/pull/23 - -New Features: - -* #20,#26: Support for sortkey. Thanks to Maxime Vdb and Kosei Kitahara. -* #24: Add UUIDField support. Thanks to Sindri Guðmundsson. -* #14: More compat with redshift: not use SELECT DISTINCT ON. - -Bug Fixes: - -* #15,#21: More compat with redshift: not use CHECK. Thanks to Vasil Vangelovski. -* #18: Fix error on migration with django-1.9 or later that raises AttributeError - of 'sql_create_table_unique'. -* #27: annotate() does not work on Django-1.9 and later. Thanks to Takayuki Hirai. - - -Documentation: - -* Add documentation: http://django-redshift-backend.rtfd.io/ - - -0.7 (2017-06-08) ----------------- - -* Drop Python-3.4 -* Drop Django-1.7 -* Support Python-3.6 -* Support Django-1.11 - -0.6 (2016-12-15) ----------------- - -* Fix crush problem when using bulk insert. - -0.5 (2016-10-05) ----------------- - -* Support Django-1.10 -* #9: Add support for BigAutoField. Thanks to Maxime Vdb. -* Fix crush problem on sqlmigrate when field modified. - -0.4 (2016-05-17) ----------------- - -* Support Python-3.4 and 3.5 -* #7: Restore support django-1.7. Version 0.3 doesn't support django-1.7. -* #4: More compat with redshift: not use SET CONSTRAINTS. Thanks to Maxime Vdb. -* #6: More compat with redshift: not use sequence reset query. Thanks to Maxime Vdb. -* #5: Add REDSHIFT_VARCHAR_LENGTH_MULTIPLIER settings. Thanks to Maxime Vdb. -* Support column type changing on migration. - -0.3 (2016-05-14) ----------------- - -* #3: more compat with Redshift (AutoField, DateTimeField, Index). Thanks to Maxime Vdb. -* More compat with redshift: add TextField -* More compat with redshift: not use DEFERRABLE, CONSTRAINT, DROP DEFAULT -* More compat with redshift: support modify column - - -0.2.1 (2016-02-01) ------------------- - -* "SET TIME_ZONE" warning is changed as debug log for 'django.db.backend' logger. - -0.2 (2016-01-08) ----------------- - -* Disable "SET TIME_ZONE" SQL execution even if settings.TIME_ZONE is specified. - -0.1.2 (2015-06-5) ------------------ - -* Support Django-1.8 - -0.1.1 (2015-03-27) ------------------- -* Disable "SELECT FOR UPDATE" SQL execution. +.. CHANGES.rst will be concatenated here by setup.py -0.1 (2015-03-24) ----------------- -* Support Django-1.7 -* Support "INSERT INTO" SQL execution without "RETURNING" clause. diff --git a/doc/basic.rst b/doc/basic.rst new file mode 100644 index 0000000..4224272 --- /dev/null +++ b/doc/basic.rst @@ -0,0 +1,33 @@ +===== +Basic +===== + +Installation +============ + +Please install django-redshift-backend with using pip (8.1.1 or later). + +.. code-block:: bash + + $ pip install django-redshift-backend + + +Django settings +=============== + +ENGINE for DATABASES is 'django_redshift_backend'. You can set the name in your settings.py as:: + + DATABASES = { + 'default': { + 'ENGINE': 'django_redshift_backend', + 'NAME': '', + 'USER': '', + 'PASSWORD': '', + 'HOST': '', + 'PORT': '5439', + } + } + +For more information, please refer :doc:`refs`. + + diff --git a/doc/index.rst b/doc/index.rst index e59eaf0..b1fd20f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,13 +1,26 @@ -======================= -Django Redshift Backend -======================= - -This is a Redshift database backend for Django. - -.. toctree:: - - usage - dev - changes - contributors - +======================= +Django Redshift Backend +======================= + +This is a Redshift database backend for Django. + +Support versions +================ + +This product is tested with: + +* python-2.7, 3.5, 3.6 +* django-1.11, 2.0 + +LICENSE +======= +Apache Software License + + +.. toctree:: + + basic + refs + dev + changes + authors diff --git a/doc/refs.rst b/doc/refs.rst new file mode 100644 index 0000000..daf076a --- /dev/null +++ b/doc/refs.rst @@ -0,0 +1,234 @@ +========== +References +========== + +.. contents:: + :local: + +Differences from postgres_psycopg2 backend +========================================== + +Type mapping: + +* 'integer identity(1, 1)' for AutoField +* 'bigint identity(1, 1)' for BigAutoField +* 'timestamp with time zone' for DateTimeField +* 'varchar(max)' for TextField +* 'varchar(32)' for UUIDField +* Possibility to multiply VARCHAR length to support utf-8 string, using + `REDSHIFT_VARCHAR_LENGTH_MULTIPLIER` setting. + +Stop using: + +* RETURNING (single insert and bulk insert) +* SELECT FOR UPDATE +* SELECT DISTINCT ON +* SET CONSTRAINTS +* INDEX +* DEFERRABLE INITIALLY DEFERRED +* CONSTRAINT +* CHECK +* DROP DEFAULT + +To support migration: + +* To add column to existent table on Redshift, column must be nullable +* To support modify column, add new column -> data migration -> drop old column -> rename + +Please note that the migration support for redshift is not perfect yet. + + +Note and Limitation +-------------------- + +Amazon Redshift doesn't support RETURNING, so ``last_insert_id`` method retrieve MAX(pk) after insertion as a workaround. + +refs: + +* http://stackoverflow.com/q/19428860 +* http://stackoverflow.com/q/25638539 + +In some case, MAX(pk) workaround does not work correctly. +Bulk insertion makes non-contiguous IDs like: 1, 4, 7, 10, ... +and single insertion after such bulk insertion generates strange id value like 2 (smallest non-used id). + + +Django Settings +=============== + +settings.DATABASES +-------------------- + +:ENGINE: + Set 'django_redshift_backend'. + +:NAME: + Set ''. + +:USER: + Set ''. + +:PASSWORD: + Set ''. + +:HOST: + Set ''. + +:PORT: + Set your Redshift server port number. Maybe '5439'. + } + } + +settings.REDSHIFT_VARCHAR_LENGTH_MULTIPLIER +------------------------------------------- + +Possibility to multiply VARCHAR length to support utf-8 string. Default is 1. + +See also: https://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html#r_Character_types-storage-and-ranges + + +Django Models +============= + +Using sortkey +------------- + +There is built-in support for this option for Django >= 1.9. To use `sortkey`, simply define an `ordering` on the model meta as follow:: + + class MyModel(models.Model): + ... + + class Meta: + ordering = ['col2'] + +N.B.: there is no validation of this option, instead we let Redshift validate it for you. Be sure to refer to the `documentation `_. + +Using distkey +------------- + +There is built-in support for this option for Django >= 1.11. To use `distkey`, define an index on the model +meta with the custom index type `django_redshift_backend.distkey.DistKey` with `fields` naming a single field:: + + class MyModel(models.Model): + ... + + class Meta: + indexes = [DistKey(fields=['customer_id'])] + +Redshift doesn't have conventional indexes, and we don't generate SQL for them. We merely use +`indexes` as a convenient place in the Meta to identify the `distkey`. + +You will likely encounter the following complication: + +Inlining Index Migrations +~~~~~~~~~~~~~~~~~~~~~~~~~ +Django's `makemigrations` generates a migration file that first applies a `CreateModel` operation without the +`indexes` option, and then adds the index in a separate `AddIndex` operation. + +However Redshift requires that the `distkey` be specified at table creation. As a result, you may need to +manually edit your migration files to move the index creation into the initial `CreateModel`. + +That is, to go from:: + + operations = [ + ... + migrations.CreateModel( + name='FactTable', + fields=[ + ('distkeycol', models.CharField()), + ('measure1', models.IntegerField()), + ('measure2', models.IntegerField()) + ... + ] + ), + ... + migrations.AddIndex( + model_name='facttable', + index=django_redshift_backend.distkey.DistKey(fields=['distkeycol'], name='...'), + ), + ] + +To:: + + operations = [ + ... + migrations.CreateModel( + name='FactTable', + fields=[ + ('distkeycol', models.CharField()), + ('measure1', models.IntegerField()), + ('measure2', models.IntegerField()) + ... + ], + options={ + 'indexes': [django_redshift_backend.distkey.DistKey(fields=['distkeycol'], name='...')], + }, + ), + ... + ] + + +Inlining ForeignKey Migrations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is common to distribute fact tables on a foreign key column referencing the primary key of a dimension table. + +In this case you may also encounter the following added complication: + +Django's `makemigrations` generates a migration file that first applies a `CreateModel` operation without the +`ForeignKey` column, and then adds the `ForeignKey` column in a separate `AddField` operation. It does this to +avoid attempts to create foreign key constraints against tables that haven't been created yet. + +However Redshift requires that the `distkey` be specified at table creation. As a result, you may need to +manually edit your migration files to move the ForeignKey column into the initial `CreateModel`, while also +ensuring that the referenced table appears *before* the referencing table in the file. + +That is, to go from:: + + operations = [ + ... + migrations.CreateModel( + name='FactTable', + fields=[ + ('measure1', models.IntegerField()), + ('measure2', models.IntegerField()) + ... + ] + ), + ... + migrations.CreateModel( + name='Dimension1Table', + fields=[ + ... + ] + ), + ... + migrations.AddField( + model_name='facttable', + name='dim1', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='myapp.Dimension1Table'), + ), + ... + ] + +To:: + + operations = [ + migrations.CreateModel( + name='Dimension1Table', + fields=[ + ... + ] + ), + ... + migrations.CreateModel( + name='FactTable', + fields=[ + ('measure1', models.IntegerField()), + ('measure2', models.IntegerField()), + ('dim1', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='myapp.Dimension1Table')) + ... + ] + ), + ... + ] + diff --git a/doc/usage.rst b/doc/usage.rst deleted file mode 100644 index f85c923..0000000 --- a/doc/usage.rst +++ /dev/null @@ -1,93 +0,0 @@ -====== -Usage -====== - -Support versions -================ - -This product is tested with: - -* python-2.7, 3.5, 3.6 -* django-1.11 - - -Differences from postgres_psycopg2 backend -========================================== - -Type mapping: - -* 'integer identity(1, 1)' for AutoField -* 'bigint identity(1, 1)' for BigAutoField -* 'timestamp with time zone' for DateTimeField -* 'varchar(max)' for TextField -* 'varchar(32)' for UUIDField -* Possibility to multiply VARCHAR length to support utf-8 string, using - `REDSHIFT_VARCHAR_LENGTH_MULTIPLIER` setting. - -Stop using: - -* RETURNING (single insert and bulk insert) -* SELECT FOR UPDATE -* SELECT DISTINCT ON -* SET CONSTRAINTS -* INDEX -* DEFERRABLE INITIALLY DEFERRED -* CONSTRAINT -* CHECK -* DROP DEFAULT - -To support migration: - -* To add column to existent table on Redshift, column must be nullable -* To support modify column, add new column -> data migration -> drop old column -> rename - -Please note that the migration support for redshift is not perfect yet. - - -Note and Limitation --------------------- - -Amazon Redshift doesn't support RETURNING, so ``last_insert_id`` method retrieve MAX(pk) after insertion as a workaround. - -refs: - -* http://stackoverflow.com/q/19428860 -* http://stackoverflow.com/q/25638539 - -In some case, MAX(pk) workaround does not work correctly. -Bulk insertion makes non-contiguous IDs like: 1, 4, 7, 10, ... -and single insertion after such bulk insertion generates strange id value like 2 (smallest non-used id). - - -SETTINGS -======== - -ENGINE for DATABASES is 'django_redshift_backend'. You can set the name in your settings.py as:: - - DATABASES = { - 'default': { - 'ENGINE': 'django_redshift_backend', - 'NAME': '', - 'USER': '', - 'PASSWORD': '', - 'HOST': '', - 'PORT': '5439', - } - } - -REDSHIFT_VARCHAR_LENGTH_MULTIPLIER: - Possibility to multiply VARCHAR length to support utf-8 string. Default is 1. - -Using sortkey ---------------------------------- - -There is built-in support for this option for Django >= 1.9. To use `sortkey`, simply define an `ordering` on the model meta as follow:: - - class MyModel(models.Model): - ... - - class Meta: - ordering = ['col2'] - -N.B.: there is no validation of this option, instead we let Redshift validate it for you. Be sure to refer to the `documentation `_. - diff --git a/setup.py b/setup.py index 64b7a35..12535c5 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def read(filename): author='shimizukawa', author_email='shimizukawa@gmail.com', description='Redshift database backend for Django', - long_description=read('README.rst'), + long_description=read('README.rst') + read('CHANGES.rst'), install_requires=requires, python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", classifiers=[