-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_emails.py
executable file
·320 lines (285 loc) · 12.1 KB
/
update_emails.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
#!/usr/bin/env python
"""
A headless open source tool for synchronizing users' name and emails between a
MySQL database and mailman server. Correct login credentials for the database
and mailman server must be provided in a '.env' file.
Python2 & Python3 compatible.
Author: Connor Christian
"""
from __future__ import print_function, division, unicode_literals
import argparse
import os
import re
import sys
import time
from builtins import range
import dotenv
import splinter
import pymysql
def login_webserv(site, uri, pswd):
"""
Logs into the webserv by navigating to the URL, inputting credentials, and
clicking the login button.
Args:
site (splinter.driver.webdriver): Instance of the splinter browser.
uri (str): Web address to navigate with splinter browser.
pswd (str): Password required to login to the webserv.
"""
site.visit(uri)
assert site.is_text_present(VERSION_NUMBER), "Incorrect version number\nAborting"
site.fill('adminpw', pswd)
site.find_by_name('admlogin').click()
assert site.is_text_not_present('Authorization failed.'), "Login failed\nAborting"
def logout_webserv(site, uri):
"""
Logs out of the webserv.
Args:
site (splinter.driver.webdriver): Instance of the splinter browser.
uri (str): Web address to navigate with splinter browser.
"""
site.visit(uri + '/logout')
site.quit()
def get_db_content(HOST, UNAME, DBPSWD, NAME):
"""
Collects the emails from the "database" to be used later.
Args:
HOST (str): Uri for mysql database
UNAME (str): Username for mysql database
DBPSWD (str): Password for mysql database
NAME (str): Name of table in mysql database
Attributes:
db (pymysql.connections.Connection): Database connection
cursor (pymysql.cursors.Cursor): Used to interact with database
data (tuple): All users returned from fetching from database
content (dict): Data in the expected form of a database
Returns:
dict: Content attribute that contains all of the users on the database.
"""
# Open database connection
db = pymysql.connect(HOST, UNAME, DBPSWD, NAME)
# prepare a cursor object using cursor() method
cursor = db.cursor()
# execute SQL query using execute() method.
cursor.execute("SELECT CONCAT(p.lname, ' ', p.fname) AS name, p.email FROM \
ac_person p\
WHERE p.deleted IS NULL AND p.onnoticelist = 't'\
ORDER BY p.lname ASC, p.fname ASC")
# Fetch a single row using fetchone() method.
data = cursor.fetchall() # data = (("lname fname", "email"))
content = dict()
try: #python2 version
for user in data:
content[unicode(user[1], "utf-8")] = unicode(user[0], "utf-8")
except NameError: #python3
for user in data:
content[user[1]] = user[0]
# disconnect from server
db.close()
log("Database data is collected")
return content
def get_web_emails(site, uri):
"""
Scrapes the webserv for all of the users uploaded.
Args:
site (splinter.driver.webdriver): Instance of the splinter browser.
uri (str): Web address to navigate with splinter browser.
Attributes:
letters (list): Contains every letter representing a tab in the html.
web_emails (list): Stores all of the scraped emails.
maxUsers (int): Stores the total number of emails on the webserv. Used
for logging progress.
current (int): Counter for what percentage of emails have been scraped
rows (str): Unused variable that stores how many rows the terminal window is.
columns (int): Stores the number of columns wide the terminal window is
chunks (int): Stores the current "chunk" the scraper is at from the html.
Used for scraping all data if the webserv has organized it in
sublists.
links (splinter.element_list.ElementList): Stores splinter obj referring to all the matching
elements. Used to find all emails on current screen.
Returns:
list: Web_emails attribute that contains all emails scraped from the webserv.
"""
#navigate to membership management
site.visit(uri + '/members/list')
letters = map(str, re.findall(r'/members\?letter=[a-z0-9]', site.html))
if letters != []:
letters = list(set(letters))
web_emails = list()
maxUsers = int(re.search('<em>([0-9]*) members total', site.html).group(1))
current = 0
rows, columns = os.popen('stty size', 'r').read().split()
columns = min(int(columns) - len("] 100% complete "), 80)
log("Scraping data from webserv")
if letters != []: #found letters
for letter in letters:
site.visit(uri + letter)
chunks = len(site.find_link_by_partial_href('&chunk='))
for chunk in range(chunks+1):
site.visit(uri + letter + '&chunk=' + str(chunk))
links = site.find_link_by_partial_href('--at--')
for link in links:
web_emails.append(link.value)
ratio = len(web_emails)/maxUsers
current = int(round(ratio*columns))
if not args.quiet:
sys.stdout.write("\r\033[93m" + '['\
+ '#'*(current) + ' '*(columns - current) \
+ "] " + str(int(round(ratio*100)))\
+ "% complete\033[0m")
sys.stdout.flush()
if not args.quiet:
sys.stdout.write('\n')
else: #all on one page
site.visit(uri + '/members/list')
links = site.find_link_by_partial_href('--at--')
for link in links:
web_emails.append(link.value)
log("Webserv data is collected")
return web_emails
def compare_datasets(webmail, db_content):
"""
Compares email lists and appends data to appropriate add/rm_email data
structs.
Examples:
if (email in database but not webserv) add;
if (email in webserv but not datatbase) remove;
Args:
webmail (list): List of all emails scraped from the webserv.
db_content (dict): Dictonary, to be used as a list of keys(emails), containing
all the users on the database.
Attributes:
add_users (str): Newline separated emails to be added of the format:
"lname fname <email>\\n".
rm_emails (str): Newline separated emails to be removed of the format:
"email\\n".
Returns:
tuple: Contains the emails to add and remove from the webserv
"""
add_users = ""
rm_emails = ""
log("Comparing emails found on webserv with emails in database")
#compares every email from the webserv to those found in the database
for web_data in webmail:
if web_data not in db_content: #if true, then that email must be removed
rm_emails += web_data + '\n'
#compares every email from the database to those found in the webserv
for db_data in db_content:
if db_data not in webmail: #if true, then that email must be added
add_users += db_content[db_data] + ' <' + db_data + '>\n'
return tuple([add_users, rm_emails])
def update_webserv(site, uri, data):
"""
Updates the webserv by adding and removing emails based on descrepencies
between the webser and database.
Args:
site (splinter.driver.webdriver): Instance of the splinter browser.
uri (str): Web address to navigate with splinter browser.
data (tuple): First index is a list of emails to add. Second index is a
list of emails to remove.
Attributes:
added_emails (list): Stores all emails to be added to webserv.
removed_emails (list): Stores all emails to be removed from webserv.
"""
log("Synchronizing data on the webserv")
added_users, removed_emails = data
add_webserv_emails(site, uri, added_users)
remove_webserv_emails(site, uri, removed_emails)
log("Webserv and database are synced")
def add_webserv_emails(site, uri, users):
"""
Takes users that have been passed in and navigates to subscription page
of the webserv that adds content to the webserv.
Args:
site (splinter.driver.webdriver): Instance of the splinter browser.
uri (str): Web address to navigate with splinter browser.
users (str): All emails that are to be added to the webserv.
Format: "lname fname <email>\\n"
Attributes:
users (list): Converted emails string (args) to list.
"""
if not args.dryrun:
# emails: string of emails newline separated
site.visit(uri + '/members/add')
site.choose('send_welcome_msg_to_this_batch', '0')
site.fill('subscribees', users)
site.find_by_name('setmemberopts_btn').click()
users = users.split('\n')
if users[-1] == "":
users.pop()
for user in users:
log("\033[32madded\033[0m " + user)
def remove_webserv_emails(site, uri, emails):
"""
Takes emails that have been passed in and navigates to unsubscription page
of the webserv that removes all matching content in the webserv.
Args:
site (splinter.driver.webdriver): Instance of the splinter browser.
uri (str): Web address to navigate with splinter browser.
emails (str): All emails that are to be removed from the webserv.
Format: "email\\n"
Attributes:
emails (list): Converted emails string (args) to list.
"""
if not args.dryrun:
# emails: string of emails newline separated
site.visit(uri + '/members/remove')
site.choose('send_unsub_ack_to_this_batch', '0')
site.fill('unsubscribees', emails)
site.find_by_name('setmemberopts_btn').click()
emails = emails.split('\n')
if emails[-1] == '':
emails.pop()
for email in emails:
log("\033[34mremoved\033[0m " + email)
def log(message):
"""
Outputs to stdout in the format of:
"YYYY-mm-dd hh:mm:ss <message>"
Args:
message (str): Content to output with timestamp.
"""
if not args.quiet:
print(time.strftime("%Y-%m-%d %H:%M:%S") + ' ' + message)
if __name__ == "__main__":
# argparse used to generate help menu and easy commandline argument parsing
parser = argparse.ArgumentParser(description="A headless open source tool\
for synchronizing users' name and email between a mysql database\
and mailman server. Correct login credentials for the database and\
mailman server must be provided in a '.env' file. This script is\
python2 and python3 compatible.", epilog="Author: Connor Christian")
parser.add_argument("-q", "--quiet", help="suppress output", action="store_true")
parser.add_argument("-v", "--verbose", help="use the headed firefox browser",
action="store_true")
parser.add_argument("-d", "--dryrun", help="perform a dry run by not \
changing the listserv", action="store_true")
args = parser.parse_args()
if args.verbose:
browser = splinter.Browser()
else:
browser = splinter.Browser('phantomjs')
VERSION_NUMBER = "version 2.1.24"
#collect login data collected from .env
dotenv.load_dotenv(dotenv.find_dotenv())
URI = os.getenv('LISTSERV_URI')
PSWD = os.getenv('PASSWORD')
HOST = os.getenv('HOST')
UNAME = os.getenv('UNAME')
DBPSWD = os.getenv('DBPASSWD')
NAME = os.getenv('DBNAME')
# Check that data is set in the .env
assert URI, "No uri in .env\nAborting"
assert PSWD, "No password in .env\nAborting"
assert HOST, "No host for database in .env\nAborting"
assert UNAME, "No database user in .env\nAborting"
assert DBPSWD, "No database password in .env\nAborting"
assert NAME, "No database name in .env\nAborting"
if args.dryrun:
log("\033[93mPerforming dry run\033[0m")
login_webserv(browser, URI, PSWD)
#data structures to be filled with scraped data:
#dictionary format: key="email" value="lname fname"
db_content = get_db_content(HOST, UNAME, DBPSWD, NAME)
#list format: ["email"]
web_emails = get_web_emails(browser, URI)
update_webserv(browser, URI, compare_datasets(web_emails, db_content))
logout_webserv(browser, URI)