Skip to content

Commit eef1f85

Browse files
committed
#73: quick fix to stop logging errors while dealing with non-ASCII characters.
1 parent 5670f35 commit eef1f85

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

coldsweat/fetcher.py

+29-29
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def get_feed_timestamp(soup_feed, default):
3939
if value:
4040
# Fix future dates
4141
return min(tuple_as_datetime(value), default)
42-
logger.debug('no feed timestamp found, using default')
42+
logger.debug(u'no feed timestamp found, using default')
4343
return default
4444

4545
def get_entry_id(entry, default):
@@ -59,7 +59,7 @@ def get_entry_timestamp(entry, default):
5959
if value:
6060
# Fix future dates
6161
return min(tuple_as_datetime(value), default)
62-
logger.debug('no entry timestamp found, using default')
62+
logger.debug(u'no entry timestamp found, using default')
6363
return default
6464

6565
def get_entry_title(entry, default):
@@ -74,7 +74,7 @@ def get_entry_content(entry, default):
7474

7575
candidates = entry.get('content', [])
7676
if 'summary_detail' in entry:
77-
#logger.debug('summary found for entry %s' % entry.link)
77+
#logger.debug(u'summary found for entry %s' % entry.link)
7878
candidates.append(entry.summary_detail)
7979
for c in candidates:
8080
# Match text/html, application/xhtml+xml
@@ -84,7 +84,7 @@ def get_entry_content(entry, default):
8484
if candidates:
8585
return candidates[0].type, candidates[0].value
8686

87-
logger.debug('no content found for entry %s' % entry.link)
87+
logger.debug(u'no content found for entry %s' % entry.link)
8888
return default
8989

9090
# Nullable fields
@@ -127,10 +127,10 @@ def load_plugins():
127127
fp, pathname, description = imp.find_module(name, [plugin_dir])
128128
imp.load_module(name, fp, pathname, description)
129129
except ImportError, ex:
130-
logger.warn('could not load %s plugin (%s), ignored' % (name, ex))
130+
logger.warn(u'could not load %s plugin (%s), ignored' % (name, ex))
131131
continue
132132

133-
logger.debug('loaded %s plugin' % name)
133+
logger.debug(u'loaded %s plugin' % name)
134134
fp.close()
135135

136136
def add_feed(feed, fetch_icon=False, add_entries=False):
@@ -143,7 +143,7 @@ def add_feed(feed, fetch_icon=False, add_entries=False):
143143

144144
try:
145145
previous_feed = Feed.get(Feed.self_link == feed.self_link)
146-
logger.debug('feed %s has been already added to database, skipped' % feed.self_link)
146+
logger.debug(u'feed %s has been already added to database, skipped' % feed.self_link)
147147
return previous_feed
148148
except Feed.DoesNotExist:
149149
pass
@@ -155,10 +155,10 @@ def add_subscription(feed, user, group):
155155
try:
156156
subscription = Subscription.create(user=user, feed=feed, group=group)
157157
except IntegrityError:
158-
logger.debug('user %s has already feed %s in her subscriptions' % (user.username, feed.self_link))
158+
logger.debug(u'user %s has already feed %s in her subscriptions' % (user.username, feed.self_link))
159159
return None
160160

161-
logger.debug('added feed %s for user %s' % (feed.self_link, user.username))
161+
logger.debug(u'added feed %s for user %s' % (feed.self_link, user.username))
162162
return subscription
163163

164164
# ------------------------------------------------------
@@ -179,7 +179,7 @@ def fetch_url(url, timeout=None, etag=None, modified_since=None):
179179

180180
try:
181181
response = requests.get(url, timeout=timeout, headers=request_headers)
182-
logger.debug("got status %d" % response.status_code)
182+
logger.debug(u"got status %d" % response.status_code)
183183
except (IOError, RequestException), ex:
184184
return None
185185

@@ -215,7 +215,7 @@ def add_synthesized_entry(feed, title, content_type, content):
215215
last_updated_on = now
216216
)
217217
entry.save()
218-
logger.debug("synthesized entry %s" % guid)
218+
logger.debug(u"synthesized entry %s" % guid)
219219
return entry
220220

221221

@@ -234,15 +234,15 @@ def post_fetch(status, error=False):
234234
if error_threshold and (feed.error_count > error_threshold):
235235
feed.is_enabled = False
236236
feed.last_status = status # Save status code for posterity
237-
logger.warn("%s has too many errors, disabled" % netloc)
237+
logger.warn(u"%s has too many errors, disabled" % netloc)
238238
synthesize_entry('Feed has accomulated too many errors (last was %s).' % status_title(status))
239239
feed.save()
240240

241241
max_history = config.getint('fetcher', 'max_history')
242242
interval = config.getint('fetcher', 'min_interval')
243243
timeout = config.getint('fetcher', 'timeout')
244244

245-
logger.debug("fetching %s" % feed.self_link)
245+
logger.debug(u"fetching %s" % feed.self_link)
246246

247247
schema, netloc, path, params, query, fragment = urlparse.urlparse(feed.self_link)
248248

@@ -256,14 +256,14 @@ def post_fetch(status, error=False):
256256
# No datetime.timedelta since we need to deal with large seconds values
257257
delta = datetime_as_epoch(now) - datetime_as_epoch(value)
258258
if delta < interval:
259-
logger.debug("%s for %s is below min_interval, skipped" % (fieldname, netloc))
259+
logger.debug(u"%s for %s is below min_interval, skipped" % (fieldname, netloc))
260260
return
261261

262262
response = fetch_url(feed.self_link, timeout=timeout, etag=feed.etag, modified_since=feed.last_updated_on)
263263
if not response:
264264
# Record as "503 Service unavailable"
265265
post_fetch(503, error=True)
266-
logger.warn("a network error occured while fetching %s" % netloc)
266+
logger.warn(u"a network error occured while fetching %s" % netloc)
267267
return
268268

269269
feed.last_checked_on = now
@@ -275,33 +275,33 @@ def post_fetch(status, error=False):
275275
Feed.get(self_link=self_link)
276276
except Feed.DoesNotExist:
277277
feed.self_link = self_link
278-
logger.info("%s has changed its location, updated to %s" % (netloc, self_link))
278+
logger.info(u"%s has changed its location, updated to %s" % (netloc, self_link))
279279
else:
280280
feed.is_enabled = False
281-
logger.warn("new %s location %s is duplicated, disabled" % (netloc, self_link))
281+
logger.warn(u"new %s location %s is duplicated, disabled" % (netloc, self_link))
282282
synthesize_entry('Feed has a duplicated web address.')
283283
post_fetch(DuplicatedFeedError.code, error=True)
284284
return
285285

286286
if response.status_code == 304: # Not modified
287-
logger.debug("%s hasn't been modified, skipped" % netloc)
287+
logger.debug(u"%s hasn't been modified, skipped" % netloc)
288288
post_fetch(response.status_code)
289289
return
290290
elif response.status_code == 410: # Gone
291291
feed.is_enabled = False
292-
logger.warn("%s is gone, disabled" % netloc)
292+
logger.warn(u"%s is gone, disabled" % netloc)
293293
synthesize_entry('Feed has been removed from the origin server.')
294294
post_fetch(response.status_code, error=True)
295295
return
296296
elif response.status_code not in POSITIVE_STATUS_CODES: # No good
297-
logger.warn("%s replied with status %d, aborted" % (netloc, response.status_code))
297+
logger.warn(u"%s replied with status %d, aborted" % (netloc, response.status_code))
298298
post_fetch(response.status_code, error=True)
299299
return
300300

301301
soup = feedparser.parse(response.text)
302302
# Got parsing error? Log error but do not increment the error counter
303303
if hasattr(soup, 'bozo') and soup.bozo:
304-
logger.info("%s caused a parser error (%s), tried to parse it anyway" % (netloc, soup.bozo_exception))
304+
logger.info(u"%s caused a parser error (%s), tried to parse it anyway" % (netloc, soup.bozo_exception))
305305
post_fetch(response.status_code)
306306

307307
feed.etag = response.headers.get('ETag', None)
@@ -321,7 +321,7 @@ def post_fetch(status, error=False):
321321
# point to Feed Burner or similar services
322322
feed.icon = favicon.fetch(feed.alternate_link or feed.self_link)
323323
feed.icon_last_updated_on = now
324-
logger.debug("saved favicon %s..." % (feed.icon[:70]))
324+
logger.debug(u"saved favicon %s..." % (feed.icon[:70]))
325325

326326
post_fetch(response.status_code)
327327

@@ -334,7 +334,7 @@ def post_fetch(status, error=False):
334334
guid = get_entry_id(parsed_entry, default=link)
335335

336336
if not guid:
337-
logger.warn('could not find guid for entry from %s, skipped' % netloc)
337+
logger.warn(u'could not find guid for entry from %s, skipped' % netloc)
338338
continue
339339

340340
author = get_entry_author(parsed_entry, soup.feed)
@@ -345,13 +345,13 @@ def post_fetch(status, error=False):
345345

346346
# Skip ancient feed items
347347
if max_history and ((now - timestamp).days > max_history):
348-
logger.debug("entry %s from %s is over max_history, skipped" % (guid, netloc))
348+
logger.debug(u"entry %s from %s is over max_history, skipped" % (guid, netloc))
349349
continue
350350

351351
try:
352352
# If entry is already in database with same id, then skip it
353353
Entry.get(guid=guid)
354-
logger.debug("duplicated entry %s, skipped" % guid)
354+
logger.debug(u"duplicated entry %s, skipped" % guid)
355355
continue
356356
except Entry.DoesNotExist:
357357
pass
@@ -375,7 +375,7 @@ def post_fetch(status, error=False):
375375
def feed_worker(feed):
376376

377377
if not feed.subscriptions:
378-
logger.debug("feed %s has no subscribers, skipped" % feed.self_link)
378+
logger.debug(u"feed %s has no subscribers, skipped" % feed.self_link)
379379
return
380380

381381
# Allow each process to open and close its database connection
@@ -396,12 +396,12 @@ def fetch_feeds():
396396

397397
feeds = list(q)
398398
if not feeds:
399-
logger.debug("no feeds found to refresh, halted")
399+
logger.debug(u"no feeds found to refresh, halted")
400400
return
401401

402402
load_plugins()
403403

404-
logger.debug("starting fetcher")
404+
logger.debug(u"starting fetcher")
405405
trigger_event('fetch_started')
406406

407407
if config.getboolean('fetcher', 'multiprocessing'):
@@ -417,7 +417,7 @@ def fetch_feeds():
417417

418418
trigger_event('fetch_done', feeds)
419419

420-
logger.info("%d feeds checked in %.2fs" % (len(feeds), time.time() - start))
420+
logger.info(u"%d feeds checked in %.2fs" % (len(feeds), time.time() - start))
421421

422422

423423

0 commit comments

Comments
 (0)