Skip to content

Commit

Permalink
[#135] Support adding BinaryType Field
Browse files Browse the repository at this point in the history
- Support adding BinaryType field.
- Support forward migration with varchar to varbyte(size).
- Support backward migration from varbyte to varchar(size).
- Added BinaryType field to psycopg2 adapter.
- Testing with example/dj-sql-explorer.
  • Loading branch information
shimizukawa committed Jul 22, 2024
1 parent 804a3b8 commit 1b98282
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Features:
* #127 Drop Python-3.7 support.
* #83 Drop Django-2.2 support.
* #134 Support adding COLUMN with UNIQUE; adding column without UNIQUE then add UNIQUE CONSTRAINT.
* #135 Support adding BinaryField.

Bug Fixes:

Expand Down
45 changes: 44 additions & 1 deletion django_redshift_backend/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
from django.db.utils import NotSupportedError, ProgrammingError

from django_redshift_backend.meta import DistKey, SortKey
from psycopg2.extensions import Binary

from .psycopg2adapter import RedshiftBinary

logger = logging.getLogger("django.db.backends")

Expand Down Expand Up @@ -154,6 +156,10 @@ def _get_type_default(field):
return default


def _remove_length_from_type(column_type):
return re.sub(r"\(.*", "", column_type)


class DatabaseSchemaEditor(BasePGDatabaseSchemaEditor):
sql_create_table = "CREATE TABLE %(table)s (%(definition)s) %(options)s"
sql_delete_fk = "ALTER TABLE %(table)s DROP CONSTRAINT %(name)s"
Expand Down Expand Up @@ -187,6 +193,25 @@ def remove_index(self, model, index, concurrently=False):
# Redshift doesn't support INDEX.
pass

def column_sql(self, *args, **kwargs):
definition, params = super().column_sql(*args, **kwargs)
params = self._modify_params_for_redshift(params)
return definition, params

def _modify_params_for_redshift(self, params):
"""
`Psycopg2.extensions.Binary(b'\x80\x00')` in params is converted to `'\\x80\\x00'::bytea` when applied to SQL placeholders. However, Redshift needs to treat binary columns as `to_varbyte('8000', 'hex')::varbyte` instead of `::bytea` [#].
[#]: https://docs.aws.amazon.com/redshift/latest/dg/r_VARBYTE_type.html
So, this function converts `Binary` instances to `RedshiftBinary` instances.
RedshiftBinary is converted to `to_varbyte('8000', 'hex')::varbyte` when applied to placeholders.
"""
new_params = [
RedshiftBinary(p.adapted) if isinstance(p, Binary) else p for p in params
]
return new_params

def create_model(self, model):
"""
Takes a model and creates a table for it in the database.
Expand Down Expand Up @@ -733,14 +758,31 @@ def _alter_column_with_recreate(self, model, old_field, new_field):
},
params,
)

type_cast = ""
if new_field.get_internal_type() == "BinaryField":
# In most cases, we don't change the type to a type that can't be cast,
# so we don't check it.
type_cast = "::" + _remove_length_from_type(
DatabaseWrapper.data_types["BinaryField"]
)
elif (
old_field.get_internal_type() == "BinaryField"
and new_field.get_internal_type() == "CharField"
):
type_cast = "::" + _remove_length_from_type(
DatabaseWrapper.data_types["CharField"]
)

# ## UPDATE <table> SET 'tmp' = <orig column>
actions.append(
(
"UPDATE %(table)s SET %(new_column)s = %(old_column)s WHERE %(old_column)s IS NOT NULL"
"UPDATE %(table)s SET %(new_column)s = %(old_column)s%(cast)s WHERE %(old_column)s IS NOT NULL"
% {
"table": model._meta.db_table,
"new_column": self.quote_name(new_field.column + "_tmp"),
"old_column": self.quote_name(new_field.column),
"cast": type_cast,
},
[],
)
Expand Down Expand Up @@ -1080,6 +1122,7 @@ def _create_unique_sql(
"BigAutoField": "bigint identity(1, 1)",
"TextField": "varchar(max)", # text must be varchar(max)
"UUIDField": "varchar(32)", # redshift doesn't support uuid fields
"BinaryField": "varbyte(%(max_length)s)",
}


Expand Down
10 changes: 10 additions & 0 deletions django_redshift_backend/psycopg2adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from codecs import encode

from psycopg2.extensions import Binary


class RedshiftBinary(Binary):
def getquoted(self) -> bytes:
hex_encoded = encode(self.adapted, "hex_codec")
statement = b"to_varbyte('%s', 'hex')::varbyte" % hex_encoded
return statement
1 change: 1 addition & 0 deletions examples/dj-sql-explorer/.env.sample
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
DATABASE_URL=redshift://user:password@<cluster>.<slug>.<region>.redshift.amazonaws.com:5439/<name>?DISABLE_SERVER_SIDE_CURSORS=True
SECRET_KEY=django-insecure-key
DEBUG=True
17 changes: 17 additions & 0 deletions examples/dj-sql-explorer/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,20 @@
# django-sql-explorer
EXPLORER_CONNECTIONS = { 'Default': 'default' }
EXPLORER_DEFAULT_CONNECTION = 'default'

LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
},
},
'loggers': {
'django.db.backends': {
'handlers': ['console'],
'level': 'DEBUG',
},
},
}
4 changes: 2 additions & 2 deletions examples/dj-sql-explorer/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Django<4
-e ../..[psycopg2-binary]
django-environ==0.8.1
django-sql-explorer
-e ../..
python-dateutil>=2.9
3 changes: 1 addition & 2 deletions examples/proj1/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
Django<4
-e ../..[psycopg2-binary]
django-environ==0.8.1
-e ../..
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def postgres_fixture():
'django_redshift_backend.base.DatabaseWrapper.data_types',
BasePGDatabaseWrapper.data_types,
), \
mock.patch(
'django_redshift_backend.base.DatabaseSchemaEditor._modify_params_for_redshift',
lambda self, params: params
), \
mock.patch(
'django_redshift_backend.base.DatabaseSchemaEditor._get_create_options',
lambda self, model: '',
Expand Down
116 changes: 115 additions & 1 deletion tests/test_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from test_base import OperationTestBase
from conftest import skipif_no_database, postgres_fixture
from conftest import skipif_no_database, postgres_fixture, TEST_WITH_POSTGRES, TEST_WITH_REDSHIFT


@skipif_no_database
Expand Down Expand Up @@ -289,6 +289,38 @@ def test_add_notnull_with_default(self):
'''ALTER TABLE "test_pony" ADD COLUMN "name" varchar(10) DEFAULT '' NOT NULL;''',
], sqls)

@postgres_fixture()
def test_add_binary(self):
from django_redshift_backend.base import DatabaseWrapper, _remove_length_from_type

new_state = self.set_up_test_model('test')
operations = [
migrations.AddField(
model_name='Pony',
name='hash',
field=models.BinaryField(
max_length=10,
verbose_name='hash',
null=False,
default=b'\x80\x00',
),
),
]

with self.collect_sql() as sqls:
self.apply_operations('test', new_state, operations)

bin_type = DatabaseWrapper.data_types['BinaryField'] % {"max_length": 10}
bin_cast = _remove_length_from_type(bin_type)
if TEST_WITH_POSTGRES:
default = fr"DEFAULT '\200\000'::{bin_cast}"
elif TEST_WITH_REDSHIFT:
default = fr"DEFAULT to_varbyte('8000', 'hex')::{bin_cast}"

self.assertEqual([
f'''ALTER TABLE "test_pony" ADD COLUMN "hash" {bin_type} {default} NOT NULL;''',
], sqls)

@postgres_fixture()
def test_alter_type(self):
new_state = self.set_up_test_model('test')
Expand Down Expand Up @@ -390,3 +422,85 @@ def test_alter_notnull_to_nullable(self):
'''ALTER TABLE test_pony DROP COLUMN "weight" CASCADE;''',
'''ALTER TABLE test_pony RENAME COLUMN "weight_tmp" TO "weight";''',
], sqls)

@postgres_fixture()
def test_alter_type_char_to_binary(self):
from django_redshift_backend.base import DatabaseWrapper, _remove_length_from_type

new_state = self.set_up_test_model('test')
operations = [
migrations.AddField(
model_name='Pony',
name='hash',
field=models.CharField(max_length=10, verbose_name='hash', null=False, default=''),
),
migrations.AlterField(
model_name='Pony',
name='hash',
field=models.BinaryField(
max_length=10,
verbose_name='hash',
null=False,
default=b'\x80\x00',
),
),
]

with self.collect_sql() as sqls:
self.apply_operations('test', new_state, operations)

bin_type = DatabaseWrapper.data_types['BinaryField'] % {"max_length": 10}
bin_cast = _remove_length_from_type(bin_type)
if TEST_WITH_POSTGRES:
default = fr"DEFAULT '\200\000'::{bin_cast}"
elif TEST_WITH_REDSHIFT:
default = fr"DEFAULT to_varbyte('8000', 'hex')::{bin_cast}"

self.assertEqual([
'''ALTER TABLE "test_pony" ADD COLUMN "hash" varchar(10) DEFAULT '' NOT NULL;''',
f'''ALTER TABLE "test_pony" ADD COLUMN "hash_tmp" {bin_type} {default} NOT NULL;''',
f'''UPDATE test_pony SET "hash_tmp" = "hash"::{bin_cast} WHERE "hash" IS NOT NULL;''',
'''ALTER TABLE test_pony DROP COLUMN "hash" CASCADE;''',
'''ALTER TABLE test_pony RENAME COLUMN "hash_tmp" TO "hash";''',
], sqls)

@postgres_fixture()
def test_alter_type_binary_to_char(self):
from django_redshift_backend.base import DatabaseWrapper, _remove_length_from_type

new_state = self.set_up_test_model('test')
operations = [
migrations.AddField(
model_name='Pony',
name='hash',
field=models.BinaryField(
max_length=10,
verbose_name='hash',
null=False,
default=b'\x80\x00',
),
),
migrations.AlterField(
model_name='Pony',
name='hash',
field=models.CharField(max_length=10, verbose_name='hash', null=False, default=''),
),
]

with self.collect_sql() as sqls:
self.apply_operations('test', new_state, operations)

bin_type = DatabaseWrapper.data_types['BinaryField'] % {"max_length": 10}
bin_cast = _remove_length_from_type(bin_type)
if TEST_WITH_POSTGRES:
default = fr"DEFAULT '\200\000'::{bin_cast}"
elif TEST_WITH_REDSHIFT:
default = fr"DEFAULT to_varbyte('8000', 'hex')::{bin_cast}"

self.assertEqual([
f'''ALTER TABLE "test_pony" ADD COLUMN "hash" {bin_type} {default} NOT NULL;''',
'''ALTER TABLE "test_pony" ADD COLUMN "hash_tmp" varchar(10) DEFAULT '' NOT NULL;''',
'''UPDATE test_pony SET "hash_tmp" = "hash"::varchar WHERE "hash" IS NOT NULL;''',
'''ALTER TABLE test_pony DROP COLUMN "hash" CASCADE;''',
'''ALTER TABLE test_pony RENAME COLUMN "hash_tmp" TO "hash";''',
], sqls)

0 comments on commit 1b98282

Please sign in to comment.