-
Notifications
You must be signed in to change notification settings - Fork 0
/
strip_ids.py
33 lines (25 loc) · 1.08 KB
/
strip_ids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env python
##
# strip_ids.py
###
"""strip_ids.py
Our email subject line length database is pretty skimpy on personal information,
but the message-ids that are used as a primary key (to prevent duplication) can
reveal info about the sender or the recipient of the email, including hostname,
timezone, OS, mail client, etc.
This turns that index into a simple integer that has no relation to the original
message-id, then uses the SQLITE command VACUUM to remove the previous data.
The resulting table can still be used for subject line length analysis.
"""
__author__ = "Danny O'Brien <http://www.spesh.com/danny/>"
__copyright__ = "Copyright Danny O'Brien"
__contributors__ = None
__license__ = "GPL v3"
import sqlite3
c = sqlite3.connect("emailsubjectlinelengths.db")
c.execute('CREATE TABLE copy(id TEXT PRIMARY KEY, date TEXT, subject INT);')
c.execute('INSERT INTO copy SELECT rowid, date, subject FROM email_stats;')
c.execute('ALTER TABLE email_stats RENAME TO old_stats;')
c.execute('ALTER TABLE copy RENAME TO email_stats;')
c.execute('DROP TABLE old_stats;')
c.execute('VACUUM;')