Skip to content

Commit 8e5f713

Browse files
author
Scott Sanderson
authored
Merge pull request #20 from quantopian/encryption
ENH: Add encryption support.
2 parents 01056b4 + 7b8bad8 commit 8e5f713

18 files changed

+866
-330
lines changed

bin/pgcontents

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python
22
from getpass import getuser
33
from os import getcwd
4-
from os.path import join
54
import subprocess
65
from textwrap import dedent
76

@@ -16,10 +15,6 @@ from pgcontents.utils.migrate import (
1615
temp_alembic_ini,
1716
upgrade,
1817
)
19-
from pgcontents.utils.sync import (
20-
checkpoint_all,
21-
download_checkpoints,
22-
)
2318

2419

2520
@click.group(context_settings=dict(help_option_names=['-h', '--help']))
@@ -114,36 +109,5 @@ def gen_migration(db_url):
114109
)
115110

116111

117-
@main.command('download_checkpoints')
118-
@_db_url
119-
@_directory
120-
@_users
121-
def _download_checkpoints(db_url, directory, users):
122-
"""
123-
Download checkpoints to a directory.
124-
"""
125-
users = users.split(',')
126-
if len(users) == 1:
127-
download_checkpoints(db_url, directory, users[0])
128-
else:
129-
for user in users:
130-
download_checkpoints(db_url, join(directory, user), user)
131-
132-
133-
@main.command('checkpoint_all')
134-
@_db_url
135-
@_directory
136-
@_users
137-
def _checkpoint_all(db_url, directory, users):
138-
"""
139-
Upload a checkpoint for every file in a directory.
140-
"""
141-
users = users.split(',')
142-
if len(users) == 1:
143-
checkpoint_all(db_url, directory, users[0])
144-
else:
145-
for user in users:
146-
checkpoint_all(db_url, join(directory, user), user)
147-
148112
if __name__ == "__main__":
149113
main()

pgcontents/api_utils.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import posixpath
1313

1414
from tornado.web import HTTPError
15-
from .error import PathOutsideRoot
15+
from .error import CorruptedFile, PathOutsideRoot
1616
from .utils.ipycompat import reads, writes
1717

1818
NBFORMAT_VERSION = 4
@@ -117,7 +117,10 @@ def reads_base64(nb, as_version=NBFORMAT_VERSION):
117117
"""
118118
Read a notebook from base64.
119119
"""
120-
return reads(b64decode(nb).decode('utf-8'), as_version=as_version)
120+
try:
121+
return reads(b64decode(nb).decode('utf-8'), as_version=as_version)
122+
except Exception as e:
123+
raise CorruptedFile(e)
121124

122125

123126
def _decode_text_from_base64(path, bcontent):
@@ -161,7 +164,17 @@ def from_b64(path, bcontent, format):
161164
'text': _decode_text_from_base64,
162165
None: _decode_unknown_from_base64,
163166
}
164-
content, real_format = decoders[format](path, bcontent)
167+
168+
try:
169+
content, real_format = decoders[format](path, bcontent)
170+
except HTTPError:
171+
# Pass through HTTPErrors, since we intend for them to bubble all the
172+
# way back to the API layer.
173+
raise
174+
except Exception as e:
175+
# Anything else should be wrapped in a CorruptedFile, since it likely
176+
# indicates misconfiguration of encryption.
177+
raise CorruptedFile(e)
165178

166179
default_mimes = {
167180
'text': 'text/plain',

pgcontents/checkpoints.py

Lines changed: 20 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from .api_utils import (
77
_decode_unknown_from_base64,
88
outside_root_to_404,
9-
prefix_dirs,
109
reads_base64,
1110
to_b64,
1211
writes_base64,
@@ -16,7 +15,6 @@
1615
delete_remote_checkpoints,
1716
delete_single_remote_checkpoint,
1817
get_remote_checkpoint,
19-
latest_remote_checkpoints,
2018
list_remote_checkpoints,
2119
move_remote_checkpoints,
2220
purge_remote_checkpoints,
@@ -40,7 +38,14 @@ def create_notebook_checkpoint(self, nb, path):
4038
"""
4139
b64_content = writes_base64(nb)
4240
with self.engine.begin() as db:
43-
return save_remote_checkpoint(db, self.user_id, path, b64_content)
41+
return save_remote_checkpoint(
42+
db,
43+
self.user_id,
44+
path,
45+
b64_content,
46+
self.crypto.encrypt,
47+
self.max_file_size_bytes,
48+
)
4449

4550
@outside_root_to_404
4651
def create_file_checkpoint(self, content, format, path):
@@ -53,7 +58,14 @@ def create_file_checkpoint(self, content, format, path):
5358
except ValueError as e:
5459
self.do_400(str(e))
5560
with self.engine.begin() as db:
56-
return save_remote_checkpoint(db, self.user_id, path, b64_content)
61+
return save_remote_checkpoint(
62+
db,
63+
self.user_id,
64+
path,
65+
b64_content,
66+
self.crypto.encrypt,
67+
self.max_file_size_bytes,
68+
)
5769

5870
@outside_root_to_404
5971
def delete_checkpoint(self, checkpoint_id, path):
@@ -63,27 +75,28 @@ def delete_checkpoint(self, checkpoint_id, path):
6375
db, self.user_id, path, checkpoint_id,
6476
)
6577

66-
def _get_checkpoint(self, checkpoint_id, path):
78+
def get_checkpoint_content(self, checkpoint_id, path):
6779
"""Get the content of a checkpoint."""
6880
with self.engine.begin() as db:
6981
return get_remote_checkpoint(
7082
db,
7183
self.user_id,
7284
path,
7385
checkpoint_id,
86+
self.crypto.decrypt,
7487
)['content']
7588

7689
@outside_root_to_404
7790
def get_notebook_checkpoint(self, checkpoint_id, path):
78-
b64_content = self._get_checkpoint(checkpoint_id, path)
91+
b64_content = self.get_checkpoint_content(checkpoint_id, path)
7992
return {
8093
'type': 'notebook',
8194
'content': reads_base64(b64_content),
8295
}
8396

8497
@outside_root_to_404
8598
def get_file_checkpoint(self, checkpoint_id, path):
86-
b64_content = self._get_checkpoint(checkpoint_id, path)
99+
b64_content = self.get_checkpoint_content(checkpoint_id, path)
87100
content, format = _decode_unknown_from_base64(path, b64_content)
88101
return {
89102
'type': 'file',
@@ -120,30 +133,3 @@ def purge_db(self):
120133
"""
121134
with self.engine.begin() as db:
122135
purge_remote_checkpoints(db, self.user_id)
123-
124-
def dump(self, contents_mgr):
125-
"""
126-
Synchronize the state of our database with the specified
127-
ContentsManager.
128-
129-
Gets the most recent checkpoint for each file and passes it to the
130-
supplied ContentsManager to be saved.
131-
"""
132-
with self.engine.begin() as db:
133-
records = latest_remote_checkpoints(db, self.user_id)
134-
for record in records:
135-
path = record['path']
136-
if not path.endswith('.ipynb'):
137-
self.log.warn('Ignoring non-notebook file: {}', path)
138-
continue
139-
for dirname in prefix_dirs(path):
140-
self.log.info("Ensuring directory [%s]" % dirname)
141-
contents_mgr.save(
142-
model={'type': 'directory'},
143-
path=dirname,
144-
)
145-
self.log.info("Writing notebook [%s]" % path)
146-
contents_mgr.save(
147-
self.get_notebook_checkpoint(record['id'], path),
148-
path,
149-
)

pgcontents/crypto.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""
2+
Interface definition for encryption/decryption plugins for
3+
PostgresContentsManager.
4+
5+
Encryption backends should raise pgcontents.error.CorruptedFile if they
6+
encounter an input that they cannot decrypt.
7+
"""
8+
from .error import CorruptedFile
9+
10+
11+
class NoEncryption(object):
12+
"""
13+
No-op encryption backend.
14+
15+
encrypt() and decrypt() simply return their inputs.
16+
17+
Methods
18+
-------
19+
encrypt : callable[bytes -> bytes]
20+
decrypt : callable[bytes -> bytes]
21+
"""
22+
def encrypt(self, b):
23+
return b
24+
25+
def decrypt(self, b):
26+
return b
27+
28+
29+
class FernetEncryption(object):
30+
"""
31+
Notebook encryption using cryptography.fernet for symmetric-key encryption.
32+
33+
Parameters
34+
----------
35+
fernet : cryptography.fernet.Fernet
36+
The Fernet object to use for encryption.
37+
38+
Methods
39+
-------
40+
encrypt : callable[bytes -> bytes]
41+
decrypt : callable[bytes -> bytes]
42+
43+
Notes
44+
-----
45+
``cryptography.fernet.MultiFernet`` can be used instead of a vanilla
46+
``Fernet`` to allow zero-downtime key rotation.
47+
48+
See Also
49+
--------
50+
:func:`pgcontents.utils.sync.reencrypt_user`
51+
"""
52+
__slots__ = ('_fernet',)
53+
54+
def __init__(self, fernet):
55+
self._fernet = fernet
56+
57+
def encrypt(self, s):
58+
return self._fernet.encrypt(s)
59+
60+
def decrypt(self, s):
61+
try:
62+
return self._fernet.decrypt(s)
63+
except Exception as e:
64+
raise CorruptedFile(e)
65+
66+
def __copy__(self, memo):
67+
# Any value that appears in an IPython/Jupyter Config object needs to
68+
# be deepcopy-able. Cryptography's Fernet objects aren't deepcopy-able,
69+
# so we copy our underlying state to a new FernetEncryption object.
70+
return FernetEncryption(self._fernet)
71+
72+
def __deepcopy__(self, memo):
73+
# Any value that appears in an IPython/Jupyter Config object needs to
74+
# be deepcopy-able. Cryptography's Fernet objects aren't deepcopy-able,
75+
# so we copy our underlying state to a new FernetEncryption object.
76+
return FernetEncryption(self._fernet)
77+
78+
79+
class FallbackCrypto(object):
80+
"""
81+
Notebook encryption that accepts a list of crypto instances and decrypts by
82+
trying them in order.
83+
84+
Sub-cryptos should raise ``CorruptedFile`` if they're unable to decrypt an
85+
input.
86+
87+
This is conceptually similar to the technique used by
88+
``cryptography.fernet.MultiFernet`` for implementing key rotation.
89+
90+
Parameters
91+
----------
92+
cryptos : list[object]
93+
A sequence of cryptos to use for decryption. cryptos[0] will always be
94+
used for encryption.
95+
96+
Methods
97+
-------
98+
encrypt : callable[bytes -> bytes]
99+
decrypt : callable[bytes -> bytes]
100+
101+
Notes
102+
-----
103+
Since NoEncryption will always succeed, it is only supported as the last
104+
entry in ``cryptos``. Passing a list with a NoEncryption not in the last
105+
location will raise a ValueError.
106+
"""
107+
__slots__ = ('_cryptos',)
108+
109+
def __init__(self, cryptos):
110+
# Only the last crypto can be a ``NoEncryption``.
111+
for c in cryptos[:-1]:
112+
if isinstance(c, NoEncryption):
113+
raise ValueError(
114+
"NoEncryption is only supported as the last fallback."
115+
)
116+
117+
self._cryptos = cryptos
118+
119+
def encrypt(self, s):
120+
return self._cryptos[0].encrypt(s)
121+
122+
def decrypt(self, s):
123+
errors = []
124+
for c in self._cryptos:
125+
try:
126+
return c.decrypt(s)
127+
except CorruptedFile as e:
128+
errors.append(e)
129+
raise CorruptedFile(errors)

pgcontents/db_utils.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"""
1818

1919
from contextlib import contextmanager
20-
from six.moves import zip
20+
from six.moves import map, zip
2121

2222
from psycopg2.errorcodes import (
2323
FOREIGN_KEY_VIOLATION,
@@ -65,14 +65,37 @@ def _get_name(column_like):
6565
return column_like.clause.name
6666

6767

68-
def to_dict(fields, row):
68+
def to_dict_no_content(fields, row):
6969
"""
70-
Convert a SQLAlchemy row to a dict.
70+
Convert a SQLAlchemy row that does not contain a 'content' field to a dict.
7171
7272
If row is None, return None.
73+
74+
Raises AssertionError if there is a field named 'content' in ``fields``.
75+
"""
76+
assert(len(fields) == len(row))
77+
78+
field_names = list(map(_get_name, fields))
79+
assert 'content' not in field_names, "Unexpected content field."
80+
81+
return dict(zip(field_names, row))
82+
83+
84+
def to_dict_with_content(fields, row, decrypt_func):
85+
"""
86+
Convert a SQLAlchemy row that contains a 'content' field to a dict.
87+
88+
``decrypt_func`` will be applied to the ``content`` field of the row.
89+
90+
If row is None, return None.
91+
92+
Raises AssertionError if there is no field named 'content' in ``fields``.
7393
"""
7494
assert(len(fields) == len(row))
75-
return {
76-
_get_name(field): value
77-
for field, value in zip(fields, row)
78-
}
95+
96+
field_names = list(map(_get_name, fields))
97+
assert 'content' in field_names, "Missing content field."
98+
99+
result = dict(zip(field_names, row))
100+
result['content'] = decrypt_func(result['content'])
101+
return result

pgcontents/error.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,7 @@ class FileTooLarge(Exception):
3737

3838
class RenameRoot(Exception):
3939
pass
40+
41+
42+
class CorruptedFile(Exception):
43+
pass

0 commit comments

Comments
 (0)