Skip to content

Commit ada3a48

Browse files
committed
Initial commit.
0 parents  commit ada3a48

File tree

7 files changed

+375
-0
lines changed

7 files changed

+375
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/dist/
2+
*.pyc

LICENSE

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
Copyright (c) 2010-2013 Alexander Ljungberg.
2+
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
8+
Redistributions of source code must retain the above copyright notice, this
9+
list of conditions and the following disclaimer. Redistributions in binary
10+
form must reproduce the above copyright notice, this list of conditions and
11+
the following disclaimer in the documentation and/or other materials provided
12+
with the distribution. Neither the name of Alexander Ljungberg nor the names
13+
of its contributors may be used to endorse or promote products derived from
14+
this software without specific prior written permission.
15+
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
POSSIBILITY OF SUCH DAMAGE.

MANIFEST

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# file GENERATED by distutils, do NOT edit
2+
setup.py
3+
hexahexacontadecimal/__init__.py
4+
hexahexacontadecimal/num_encode_base64.py

README.md

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
Hexahexacontadecimal
2+
====================
3+
4+
*Hexahexacontadecimal is the most compact way to encode a number into a URL.*
5+
6+
Hexahexacontadecimal is a compact format to express a number in a URL. It uses all characters allowed in
7+
a URL without escaping -- the [unreserved characters](http://tools.ietf.org/html/rfc3986#section-2.3) --
8+
making it the shortest possible way to express an integer in a URL.
9+
10+
## Usage
11+
12+
from hexahexacontadecimal import hexahexacontadecimal_encode_int, hexahexacontadecimal_decode_int
13+
14+
print hexahexacontadecimal_encode_int(302231454903657293676544) # 'iFsGUkO.0tsxw'
15+
print hexahexacontadecimal_decode_int('iFsGUkO.0tsxw') # 302231454903657293676544L
16+
17+
Note that urllib.quote escapes tilde (~) (http://bugs.python.org/issue16285), which is not necessary as
18+
of RFC3986.
19+
20+
### Hexahexacontadecimal vs Base 64 in URLs
21+
22+
>>> n = 292231454903657293676544
23+
>>> import base64
24+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
25+
'PeHmHzZFTcAAAA%3D%3D'
26+
>>> urlquote(hexahexacontadecimal_encode_int(n))
27+
'gpE4Xoy7fw5AO'
28+
29+
Worst case scenario for plain Base 64:
30+
31+
>>> n = 64 ** 5 + 1
32+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
33+
'QAAAAQ%3D%3D'
34+
>>> urlquote(hexahexacontadecimal_encode_int(n))
35+
'ucrDZ'
36+
37+
Worst case for hexahexacontadecimal:
38+
39+
>>> n = 66 ** 5 + 1
40+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
41+
'SqUUIQ%3D%3D'
42+
>>> urlquote(hexahexacontadecimal_encode_int(n))
43+
'100001'
44+
45+
That big SHA-512 you always wanted to write in a URL:
46+
47+
>>> n = 2 ** 512
48+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
49+
'AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA%3D'
50+
>>> urlquote(hexahexacontadecimal_encode_int(n))
51+
'JK84xqGD9FMXPNubPghADlRhBUzlqRscC2h~8xmi99PvuQsUCIB2CHGhMUQR8FLm72.Hbbctkqi89xspay~y4'
52+
53+
### Are the savings really significant?
54+
55+
If you're currently doing your BASE64 encoding the naive way, then yes:
56+
57+
>>> sum(len(urlquote(base64.urlsafe_b64encode(long_to_binary(n)))) for n in xrange(10 ** 5))
58+
531584
59+
>>> sum(len(urlquote(hexahexacontadecimal_encode_int(n))) for n in xrange(10 ** 5))
60+
295578
61+
62+
### But what if I use Base64 without padding?
63+
64+
Then the savings are not as significant. But it's still an improvement. Using the code from http://stackoverflow.com/a/561704/76900:
65+
66+
>>> from hexahexacontadecimal.num_encode_base64 import num_encode as num_encode_base64
67+
>>> n = 64 ** 5 + 1
68+
>>> urlquote(num_encode_base64(n))
69+
'BAAAAB'
70+
>>> urlquote(hexahexacontadecimal_encode_int(n))
71+
'ucrDZ'
72+
>>> n = 66 ** 5 + 1
73+
>>> urlquote(num_encode_base64(n))
74+
'BKpRQh'
75+
>>> urlquote(hexahexacontadecimal_encode_int(n))
76+
'100001'
77+
>>> n = 2 ** 512
78+
>>> urlquote(num_encode_base64(n))
79+
'EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
80+
>>> urlquote(hexahexacontadecimal_encode_int(n))
81+
'JK84xqGD9FMXPNubPghADlRhBUzlqRscC2h~8xmi99PvuQsUCIB2CHGhMUQR8FLm72.Hbbctkqi89xspay~y4'
82+
>>> sum(len(urlquote(num_encode_base64(n))) for n in xrange(10 ** 5))
83+
295840
84+
>>> sum(len(urlquote(hexahexacontadecimal_encode_int(n))) for n in xrange(10 ** 5))
85+
295578
86+
87+
## Installation
88+
89+
pip install hexahexacontadecimal
90+
91+
## Documentation
92+
93+
This file and docstrings.
94+
95+
## License
96+
97+
Free to use and modify under the terms of the BSD open source license.
98+
99+
## Author
100+
101+
Alexander Ljungberg

hexahexacontadecimal/__init__.py

+176
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf8 -*-
3+
4+
"""Encode and decode hexahexacontadecimal numbers.
5+
6+
Hexahexacontadecimal is a compact format to express a number in a URL. It uses all characters allowed in
7+
a URL without escaping -- the [unreserved characters](http://tools.ietf.org/html/rfc3986#section-2.3) --
8+
making it the shortest possible way to express an integer in a URL.
9+
10+
Note that urllib.quote escapes tilde (~) (http://bugs.python.org/issue16285), which is not necessary as
11+
of RFC3986.
12+
13+
## Hexahexacontadecimal vs Base 64 in URLs
14+
15+
>>> n = 292231454903657293676544
16+
>>> import base64
17+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
18+
'PeHmHzZFTcAAAA%3D%3D'
19+
>>> urlquote(hexahexacontadecimal_encode_int(n))
20+
'gpE4Xoy7fw5AO'
21+
22+
Worst case scenario for plain Base 64:
23+
24+
>>> n = 64 ** 5 + 1
25+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
26+
'QAAAAQ%3D%3D'
27+
>>> urlquote(hexahexacontadecimal_encode_int(n))
28+
'ucrDZ'
29+
30+
Worst case for hexahexacontadecimal:
31+
32+
>>> n = 66 ** 5 + 1
33+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
34+
'SqUUIQ%3D%3D'
35+
>>> urlquote(hexahexacontadecimal_encode_int(n))
36+
'100001'
37+
38+
That big SHA-512 you always wanted to write in a URL:
39+
40+
>>> n = 2 ** 512
41+
>>> urlquote(base64.urlsafe_b64encode(long_to_binary(n)))
42+
'AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA%3D'
43+
>>> urlquote(hexahexacontadecimal_encode_int(n))
44+
'JK84xqGD9FMXPNubPghADlRhBUzlqRscC2h~8xmi99PvuQsUCIB2CHGhMUQR8FLm72.Hbbctkqi89xspay~y4'
45+
46+
## Are the savings really significant?
47+
48+
If you're currently doing your BASE64 encoding the naive way, then yes:
49+
50+
>>> sum(len(urlquote(base64.urlsafe_b64encode(long_to_binary(n)))) for n in xrange(10 ** 5))
51+
531584
52+
>>> sum(len(urlquote(hexahexacontadecimal_encode_int(n))) for n in xrange(10 ** 5))
53+
295578
54+
55+
## But what if I use Base64 without padding?
56+
57+
Then the savings are not as significant. But it's still an improvement. Using the code from http://stackoverflow.com/a/561704/76900:
58+
59+
>>> from hexahexacontadecimal.num_encode_base64 import num_encode as num_encode_base64
60+
>>> n = 64 ** 5 + 1
61+
>>> urlquote(num_encode_base64(n))
62+
'BAAAAB'
63+
>>> urlquote(hexahexacontadecimal_encode_int(n))
64+
'ucrDZ'
65+
>>> n = 66 ** 5 + 1
66+
>>> urlquote(num_encode_base64(n))
67+
'BKpRQh'
68+
>>> urlquote(hexahexacontadecimal_encode_int(n))
69+
'100001'
70+
>>> n = 2 ** 512
71+
>>> urlquote(num_encode_base64(n))
72+
'EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
73+
>>> urlquote(hexahexacontadecimal_encode_int(n))
74+
'JK84xqGD9FMXPNubPghADlRhBUzlqRscC2h~8xmi99PvuQsUCIB2CHGhMUQR8FLm72.Hbbctkqi89xspay~y4'
75+
>>> sum(len(urlquote(num_encode_base64(n))) for n in xrange(10 ** 5))
76+
295840
77+
>>> sum(len(urlquote(hexahexacontadecimal_encode_int(n))) for n in xrange(10 ** 5))
78+
295578
79+
80+
Why settle for less?
81+
82+
"""
83+
84+
from io import StringIO
85+
import urllib
86+
87+
BASE66_ALPHABET = u"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_.~"
88+
BASE = len(BASE66_ALPHABET)
89+
90+
91+
def urlquote(s, safe=None):
92+
"""Like urllib.quote() but don't escape ~, in accordance with RFC3986."""
93+
94+
return urllib.quote(s, safe='~' + (safe or ''))
95+
96+
97+
def long_to_binary(n):
98+
"""Take an integer and write it as a binary string.
99+
100+
>>> long_to_binary(0)
101+
'\\x00'
102+
>>> long_to_binary(255)
103+
'\\xff'
104+
>>> long_to_binary(512 + 3)
105+
'\\x02\\x03'
106+
"""
107+
108+
h = '%x' % n
109+
return ('0' * (len(h) % 2) + h).decode('hex')
110+
111+
112+
def binary_to_long(b):
113+
"""Take a binary string and read it as an integer.
114+
115+
>>> binary_to_long('\\x00')
116+
0
117+
>>> binary_to_long('\\xff')
118+
255
119+
>>> binary_to_long('\\x02\\x03')
120+
515
121+
"""
122+
123+
return int(b.encode('hex'), 16)
124+
125+
126+
def hexahexacontadecimal_encode_int(n):
127+
"""Represent a number in hexahexacontadecimal, a compact format of unreserved URL characters.
128+
129+
>>> hexahexacontadecimal_encode_int(0)
130+
'0'
131+
>>> hexahexacontadecimal_encode_int(1)
132+
'1'
133+
>>> hexahexacontadecimal_encode_int(65)
134+
'~'
135+
>>> hexahexacontadecimal_encode_int(66)
136+
'10'
137+
>>> hexahexacontadecimal_encode_int(67)
138+
'11'
139+
>>> hexahexacontadecimal_encode_int(302231454903657293676544)
140+
'iFsGUkO.0tsxw'
141+
142+
"""
143+
144+
if n == 0:
145+
return BASE66_ALPHABET[0].encode('ascii')
146+
147+
r = StringIO()
148+
while n:
149+
n, t = divmod(n, BASE)
150+
r.write(BASE66_ALPHABET[t])
151+
return r.getvalue().encode('ascii')[::-1]
152+
153+
154+
def hexahexacontadecimal_decode_int(s):
155+
"""Parse a number expressed in hexahexacontadecimal as an integer (or long).
156+
157+
>>> hexahexacontadecimal_decode_int('0')
158+
0
159+
>>> hexahexacontadecimal_decode_int('1')
160+
1
161+
>>> hexahexacontadecimal_decode_int('~')
162+
65
163+
>>> hexahexacontadecimal_decode_int('10')
164+
66
165+
>>> hexahexacontadecimal_decode_int('11')
166+
67
167+
>>> hexahexacontadecimal_decode_int('iFsGUkO.0tsxw')
168+
302231454903657293676544L
169+
170+
"""
171+
172+
n = 0
173+
for c in s:
174+
n = n * BASE + BASE66_ALPHABET.index(c)
175+
176+
return n
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf8 -*-
3+
4+
"""
5+
6+
An alternative to hexahexacontadecimal for comparison and testing purposes, by Miles at http://stackoverflow.com/a/561704/76900.
7+
8+
"""
9+
10+
import string
11+
12+
ALPHABET = string.ascii_uppercase + string.ascii_lowercase + \
13+
string.digits + '-_'
14+
ALPHABET_REVERSE = dict((c, i) for (i, c) in enumerate(ALPHABET))
15+
BASE = len(ALPHABET)
16+
SIGN_CHARACTER = '$'
17+
18+
19+
def num_encode(n):
20+
if n < 0:
21+
return SIGN_CHARACTER + num_encode(-n)
22+
s = []
23+
while True:
24+
n, r = divmod(n, BASE)
25+
s.append(ALPHABET[r])
26+
if n == 0:
27+
break
28+
return ''.join(reversed(s))
29+
30+
31+
def num_decode(s):
32+
if s[0] == SIGN_CHARACTER:
33+
return -num_decode(s[1:])
34+
n = 0
35+
for c in s:
36+
n = n * BASE + ALPHABET_REVERSE[c]
37+
return n

setup.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from distutils.core import setup
2+
3+
try:
4+
import pypandoc
5+
# This bit requires pandoc. On Mac OS X:
6+
# brew install haskell-platform && cabal update && cabal install pandoc
7+
description = pypandoc.convert('README.md', 'rst', format='markdown')
8+
except (IOError, OSError, ImportError):
9+
description = ''
10+
11+
setup(
12+
name='hexahexacontadecimal',
13+
version='1.0',
14+
description='Encode and decode hexahexacontadecimal numbers, a compact number representation for URLs.',
15+
author='Alexander Ljungberg',
16+
author_email='[email protected]',
17+
url='https://github.com/aljungberg/hexahexacontadecimal',
18+
packages=['hexahexacontadecimal'],
19+
keywords=["base64", "hexahexacontadecimal", "base66", "url"],
20+
classifiers=[
21+
"Programming Language :: Python",
22+
"Development Status :: 5 - Production/Stable",
23+
"Intended Audience :: Developers",
24+
"Topic :: Software Development :: Libraries",
25+
"License :: OSI Approved :: BSD License",
26+
"Operating System :: POSIX",
27+
],
28+
long_description=description
29+
)

0 commit comments

Comments
 (0)