diff --git a/pypln/backend/celery_task.py b/pypln/backend/celery_task.py
index ed1c957..bced3a2 100644
--- a/pypln/backend/celery_task.py
+++ b/pypln/backend/celery_task.py
@@ -16,11 +16,9 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
-
+import pymongo
 from celery import Task
 
-from pypln.backend.mongodict_adapter import MongoDictAdapter
-
 # This import may look like an unused imported, but it is not.
 # When our base task class is defined, the Celery app must have already been
 # instantiated, otherwise when this code is imported elsewhere (like in a
@@ -33,6 +31,11 @@
 from pypln.backend import config
 
 
+mongo_client = pymongo.MongoClient(host=config.MONGODB_CONFIG["host"],
+        port=config.MONGODB_CONFIG["port"])
+database = mongo_client[config.MONGODB_CONFIG["database"]]
+document_collection = database[config.MONGODB_CONFIG["collection"]]
+
 class PyPLNTask(Task):
     """
     A base class for PyPLN tasks. It is in charge of getting the document
@@ -48,16 +51,9 @@ def run(self, document_id):
         It will call the `process` method with a dictionary containing all the
         document information and will update de database with results.
         """
-        document = MongoDictAdapter(doc_id=document_id,
-                host=config.MONGODB_CONFIG['host'],
-                port=config.MONGODB_CONFIG['port'],
-                database=config.MONGODB_CONFIG['database'])
-        # Create a dictionary out of our document. We could simply pass
-        # it on to the process method, but for now we won't let the user
-        # manipulate the MongoDict directly.
-        dic = {k: v for k, v in document.iteritems()}
-        result = self.process(dic)
-        document.update(result)
+        document = document_collection.find_one({"_id": document_id})
+        result = self.process(document)
+        document_collection.update({"_id": document_id}, {"$set": result})
         return document_id
 
     def process(self, document):
diff --git a/pypln/backend/config.py b/pypln/backend/config.py
index f06fb1e..f074c3a 100644
--- a/pypln/backend/config.py
+++ b/pypln/backend/config.py
@@ -5,6 +5,7 @@ def get_store_config():
     defaults = {'host': 'localhost',
                 'port': '27017',
                 'database': 'pypln_dev',
+                'collection': 'documents',
                 'gridfs_collection': 'files',
     }
     config = ConfigParser.ConfigParser(defaults=defaults)
diff --git a/pypln/backend/mongodict_adapter.py b/pypln/backend/mongodict_adapter.py
deleted file mode 100644
index b57d322..0000000
--- a/pypln/backend/mongodict_adapter.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# coding: utf-8
-#
-# Copyright 2012 NAMD-EMAP-FGV
-#
-# This file is part of PyPLN. You can get more information at: http://pypln.org/.
-#
-# PyPLN is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# PyPLN is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
-
-from mongodict import MongoDict
-
-
-class MongoDictAdapter(MongoDict):
-    #TODO: implement clear, __iter__, __len__ and contains with filters by id
-    def __init__(self, doc_id, *args, **kwargs):
-        self.doc_id = doc_id
-        self.prefix = 'id:{}:'.format(self.doc_id)
-        self.prefixed_id_query = {'$regex':
-                        '^{}'.format(self.prefix)}
-        return super(MongoDictAdapter, self).__init__(*args, **kwargs)
-
-    def __setitem__(self, key, value):
-        key = 'id:{}:{}'.format(self.doc_id, key)
-        return super(MongoDictAdapter, self).__setitem__(key, value)
-
-    def __getitem__(self, key):
-        key = 'id:{}:{}'.format(self.doc_id, key)
-        return super(MongoDictAdapter, self).__getitem__(key)
-
-    def __delitem__(self, key):
-        key = 'id:{}:{}'.format(self.doc_id, key)
-        return super(MongoDictAdapter, self).__delitem__(key)
-
-    def __contains__(self, key):
-        # If this is being called by other methods (like __delitem__)
-        # it will already have the prefix
-        if not key.startswith('id:'):
-            key = 'id:{}:{}'.format(self.doc_id, key)
-        return super(MongoDictAdapter, self).__contains__(key)
-
-    has_key = __contains__
-
-    def __iter__(self):
-        query_result = self._collection.find({'_id':
-            self.prefixed_id_query}, {'_id': 1})
-        keys = (k['_id'].replace(self.prefix, '', 1) for k in query_result)
-        return keys
-
-    def __len__(self):
-        return self._collection.find({'_id': self.prefixed_id_query}).count()
-
-    def clear(self):
-        self._collection.remove({'_id': self.prefixed_id_query})
diff --git a/pypln/backend/workers/elastic_indexer.py b/pypln/backend/workers/elastic_indexer.py
index faf8119..f5b55c3 100644
--- a/pypln/backend/workers/elastic_indexer.py
+++ b/pypln/backend/workers/elastic_indexer.py
@@ -36,6 +36,12 @@ def process(self, document):
         # See `test_regression_indexing_should_not_include_contents` in
         # tests/test_elastic_indexer.py for details.
         document.pop('contents')
+        # We also need to exclude _id, because ObjectId's won't be
+        # serializable.
+        document.pop("_id")
+
         result = ES.index(index=index_name, doc_type=doc_type,
                 body=document, id=file_id)
+        index_id = result.pop("_id")
+        result["index_id"] = index_id
         return result
diff --git a/pypln/backend/workers/extractor.py b/pypln/backend/workers/extractor.py
index 09e1b32..110730b 100644
--- a/pypln/backend/workers/extractor.py
+++ b/pypln/backend/workers/extractor.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
 
+import base64
 import shlex
 
 from HTMLParser import HTMLParser
@@ -169,15 +170,16 @@ class Extractor(PyPLNTask):
     #TODO: should 'replace_with' be '' when extracting from HTML?
 
     def process(self, file_data):
+        contents = base64.b64decode(file_data['contents'])
         with magic.Magic(flags=magic.MAGIC_MIME_TYPE) as m:
-            file_mime_type = m.id_buffer(file_data['contents'])
+            file_mime_type = m.id_buffer(contents)
         metadata = {}
         if file_mime_type == 'text/plain':
-            text = file_data['contents']
+            text = contents
         elif file_mime_type == 'text/html':
-            text = parse_html(file_data['contents'], True, ['script', 'style'])
+            text = parse_html(contents, True, ['script', 'style'])
         elif file_mime_type == 'application/pdf':
-            text, metadata = extract_pdf(file_data['contents'])
+            text, metadata = extract_pdf(contents)
         else:
             # If we can't detect the mimetype we add a flag that can be read by
             # the frontend to provide more information on why the document
diff --git a/pypln/backend/workers/gridfs_data_retriever.py b/pypln/backend/workers/gridfs_data_retriever.py
index 268cb21..68ed916 100644
--- a/pypln/backend/workers/gridfs_data_retriever.py
+++ b/pypln/backend/workers/gridfs_data_retriever.py
@@ -16,6 +16,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
+import base64
 from bson import ObjectId
 from gridfs import GridFS
 import pymongo
@@ -31,9 +32,17 @@ def process(self, document):
         gridfs = GridFS(database, config.MONGODB_CONFIG['gridfs_collection'])
 
         file_data = gridfs.get(ObjectId(document['file_id']))
+
+        # We decided to store 'contents' as a base64 encoded string in the
+        # database to avoid possible corruption of files. For example: when
+        # it's a pdf, the process of storing the data as utf-8 in mongo might
+        # be corrupting the file.  This wasn't a problem before, because
+        # MongoDict pickled everything before storing.
+        contents = base64.b64encode(file_data.read())
+
         result = {'length': file_data.length,
                   'md5': file_data.md5,
                   'filename': file_data.filename,
                   'upload_date': file_data.upload_date,
-                  'contents': file_data.read()}
+                  'contents': contents}
         return result
diff --git a/pypln/backend/workers/gridfs_file_deleter.py b/pypln/backend/workers/gridfs_file_deleter.py
index c1dc15f..36ea082 100644
--- a/pypln/backend/workers/gridfs_file_deleter.py
+++ b/pypln/backend/workers/gridfs_file_deleter.py
@@ -25,9 +25,9 @@
 class GridFSFileDeleter(PyPLNTask):
 
     def process(self, document):
-        database = pymongo.MongoClient(host=config.MONGODB_CONFIG['host'],
-                port=config.MONGODB_CONFIG['port']
-            )[config.MONGODB_CONFIG['database']]
+        mongo_client = pymongo.MongoClient(host=config.MONGODB_CONFIG["host"],
+                port=config.MONGODB_CONFIG["port"])
+        database = mongo_client[config.MONGODB_CONFIG["database"]]
         gridfs = GridFS(database, config.MONGODB_CONFIG['gridfs_collection'])
 
         gridfs.delete(ObjectId(document['file_id']))
diff --git a/pypln/backend/workers/tokenizer.py b/pypln/backend/workers/tokenizer.py
index d6f30d0..fd5e37a 100644
--- a/pypln/backend/workers/tokenizer.py
+++ b/pypln/backend/workers/tokenizer.py
@@ -16,7 +16,6 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
-from mongodict import MongoDict
 from nltk import word_tokenize, sent_tokenize
 
 from pypln.backend.celery_task import PyPLNTask
diff --git a/pypln/backend/workers/trigrams.py b/pypln/backend/workers/trigrams.py
index d1972c2..4ad46ef 100644
--- a/pypln/backend/workers/trigrams.py
+++ b/pypln/backend/workers/trigrams.py
@@ -42,6 +42,14 @@ def process(self, document):
         tr = defaultdict(lambda: [])
         for m in metrics:
             for res in trigram_finder.score_ngrams(getattr(trigram_measures,m)):
-                tr[res[0]].append(res[1])
+                # We cannot store the trigram as a tuple (mongo keys need to be
+                # strings). We decided to join tokens using spaces since a
+                # space will never be in a token.
+                key = u' '.join(res[0])
+                # Mongo cannot have `.` or `$` in key names. Unfortunatelly
+                # this means we need to replace them with placeholders.
+                key = key.replace(u'$', u'\dollarsign')
+                key = key.replace(u'.', u'\dot')
+                tr[key].append(res[1])
 
-        return {'trigram_rank': dict(tr), 'metrics':metrics}
+        return {'trigram_rank': tr, 'metrics':metrics}
diff --git a/requirements/production.txt b/requirements/production.txt
index 2c80c43..e19f0fe 100644
--- a/requirements/production.txt
+++ b/requirements/production.txt
@@ -1,7 +1,6 @@
 # Common
 celery
 pymongo==2.8.1
-mongodict
 
 # The newest pyparsing (2.0) only supports python 3,
 # so we explicitly install 1.5.7 (the last version that
diff --git a/tests/test_celery_task.py b/tests/test_celery_task.py
new file mode 100644
index 0000000..fd1adde
--- /dev/null
+++ b/tests/test_celery_task.py
@@ -0,0 +1,39 @@
+# coding: utf-8
+#
+# Copyright 2015 NAMD-EMAP-FGV
+#
+# This file is part of PyPLN. You can get more information at: http://pypln.org/.
+#
+# PyPLN is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# PyPLN is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
+from pypln.backend.celery_task import PyPLNTask
+from utils import TaskTest
+
+class FakeTask(PyPLNTask):
+    def process(self, document):
+        return {'result': document['input']}
+
+class TestCeleryTask(TaskTest):
+    def test_task_should_get_the_correct_document(self):
+        """This is a regression test. PyPLNTask was not filtering by _id. It
+        was getting the first document it found. """
+
+        # This is just preparing the expected input in the database
+        wrong_doc_id = self.collection.insert({'input': 'wrong'}, w=1)
+        correct_doc_id = self.collection.insert({'input': 'correct'}, w=1)
+
+        FakeTask().delay(correct_doc_id)
+
+        refreshed_doc = self.collection.find_one({'_id': correct_doc_id})
+
+        self.assertEqual(refreshed_doc['result'], 'correct')
diff --git a/tests/test_elastic_indexer.py b/tests/test_elastic_indexer.py
index a35dd23..faaafab 100644
--- a/tests/test_elastic_indexer.py
+++ b/tests/test_elastic_indexer.py
@@ -32,9 +32,10 @@ def test_indexing_go_through(self):
             'contents': 'raw_file_contents',
         }
 
-        self.document.update(doc)
-        ElasticIndexer().delay(self.fake_id)
-        assert self.document['created']  # must be True
+        doc_id = self.collection.insert(doc, w=1)
+        ElasticIndexer().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertTrue(refreshed_document['created'])
 
     @patch('pypln.backend.workers.elastic_indexer.ES')
     def test_regression_indexing_should_not_include_contents(self, ES):
@@ -54,11 +55,12 @@ def test_regression_indexing_should_not_include_contents(self, ES):
             'contents': 'raw_file_contents',
         }
 
-        self.document.update(doc)
-        ElasticIndexer().delay(self.fake_id)
+        doc_id = self.collection.insert(doc, w=1)
+        ElasticIndexer().delay(doc_id)
         # remove properties that won't be indexed
         index_name = doc.pop("index_name")
         doc_type = doc.pop('doc_type')
         doc.pop('contents')
+        doc.pop('_id')
         ES.index.assert_called_with(body=doc, id=doc['file_id'],
                 doc_type=doc_type, index=index_name)
diff --git a/tests/test_mongodict_adapter.py b/tests/test_mongodict_adapter.py
deleted file mode 100644
index af1880d..0000000
--- a/tests/test_mongodict_adapter.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# coding: utf-8
-#
-# Copyright 2012 NAMD-EMAP-FGV
-#
-# This file is part of PyPLN. You can get more information at: http://pypln.org/.
-#
-# PyPLN is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# PyPLN is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
-
-import pickle
-import unittest
-
-from bson import Binary
-import pymongo
-
-from pypln.backend.mongodict_adapter import MongoDictAdapter
-
-
-
-class TestMongoDictAdapter(unittest.TestCase):
-    db_name = 'test_mongodictbyid'
-
-    def setUp(self):
-        self.fake_id = '1234'
-        self.document = MongoDictAdapter(self.fake_id, database=self.db_name)
-        self.db = pymongo.Connection()[self.db_name]
-
-    def tearDown(self):
-        self.db.main.remove({})
-
-    @classmethod
-    def tearDownClass(cls):
-        pymongo.MongoClient().drop_database(cls.db_name)
-
-    def test_creating_a_new_key_should_saved_the_information(self):
-        self.document['new_key'] = 'value'
-        stored_value = self.db.main.find_one(
-            {'_id': 'id:{}:new_key'.format(self.fake_id)})
-        self.assertIsNotNone(stored_value)
-        # This decodes the value with the defaults for MongoDict
-        decoded_value = pickle.loads(str(stored_value['v']))
-        self.assertEqual(decoded_value, 'value')
-
-    def test_reading_an_existing_key_should_read_saved_information(self):
-        encoded_value = Binary(pickle.dumps(
-            'value', protocol=pickle.HIGHEST_PROTOCOL))
-
-        self.db.main.insert(
-                {'_id': 'id:{}:key'.format(self.fake_id), 'v': encoded_value})
-
-        self.assertEqual(self.document['key'], 'value')
-
-    def test_deleting_an_existing_key_should_delete_saved_information(self):
-        encoded_value = Binary(pickle.dumps(
-            'value', protocol=pickle.HIGHEST_PROTOCOL))
-
-        self.db.main.insert(
-                {'_id': 'id:{}:key'.format(self.fake_id), 'v': encoded_value})
-
-        self.assertEqual(self.document['key'], 'value')
-        del self.document['key']
-
-        stored_value = self.db.main.find_one(
-            {'_id': 'id:{}:key'.format(self.fake_id)})
-        self.assertIsNone(stored_value)
-
-    def test_iterating_through_keys_does_not_bring_keys_from_other_docs(self):
-        self.document['key_1'] = 1
-        self.document['key_2'] = 2
-        other_document = MongoDictAdapter('other_id', database=self.db_name)
-        other_document['other_key'] = 3
-        keys = [k for k in self.document]
-
-        self.assertIn('key_1', keys)
-        self.assertIn('key_2', keys)
-        self.assertNotIn('key_3', keys)
-
-        self.assertEquals(['key_1', 'key_2'], self.document.keys())
-
-    def test_clear_should_not_remove_keys_for_other_docs(self):
-        self.document['key_1'] = 1
-        self.document['key_2'] = 2
-        other_document = MongoDictAdapter('other_id', database=self.db_name)
-        other_document['other_key'] = 3
-
-        self.document.clear()
-
-        with self.assertRaises(KeyError):
-            self.document['key_1']
-            self.document['key_2']
-
-        self.assertEqual(other_document['other_key'], 3)
-
-    def test_return_correct_length(self):
-        self.document['key_1'] = 1
-        self.document['key_2'] = 2
-        other_document = MongoDictAdapter('other_id', database=self.db_name)
-        other_document['other_key'] = 3
-
-        self.assertEquals(len(self.document), 2)
-
-    def test_contains(self):
-        self.document['key'] = 1
-        self.assertIn('key', self.document)
-        self.assertNotIn('inexistent_key', self.document)
-
-    def test_has_key(self):
-        self.document['key'] = 1
-        self.assertTrue(self.document.has_key('key'))
-        self.assertFalse(self.document.has_key('inexistent_key'))
diff --git a/tests/test_worker_bigrams.py b/tests/test_worker_bigrams.py
index 91334b9..de605e2 100644
--- a/tests/test_worker_bigrams.py
+++ b/tests/test_worker_bigrams.py
@@ -28,16 +28,31 @@
 class TestBigramWorker(TaskTest):
     def test_bigrams_should_return_correct_score(self):
         # We need this list comprehension because we need to save the word list
-        # in MongoDict (thus, it needs to be pickleable). Also, a list is what
-        # will be available to the worker in real situations.
+        # in mongo (thus, it needs to be json serializable). Also, a list is
+        # what will be available to the worker in real situations.
         tokens = [w for w in
                 nltk.corpus.genesis.words('english-web.txt')]
 
-        self.document['tokens'] = tokens
-        bigram_finder = nltk.collocations.BigramCollocationFinder.from_words(tokens)
-        expected = bigram_finder.score_ngram(bigram_measures.chi_sq, u',', u'which')
+        doc_id = self.collection.insert({'tokens': tokens}, w=1)
 
-        Bigrams().delay(self.fake_id)
-        bigram_rank = self.document['bigram_rank']
+        Bigrams().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        bigram_rank = refreshed_document['bigram_rank']
         result = bigram_rank[0][1][0]
-        self.assertEqual(result, expected)
+        # This is the value of the chi_sq measure for this bigram in this
+        # colocation
+        expected_chi_sq = 95.59393417173634
+        self.assertEqual(result, expected_chi_sq)
+
+    def test_bigrams_could_contain_dollar_signs_and_dots(self):
+        tokens = ['$', '.']
+        doc_id = self.collection.insert({'tokens': tokens}, w=1)
+
+        Bigrams().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        bigram_rank = refreshed_document['bigram_rank']
+        result = bigram_rank[0][1][0]
+        # 2.0 is the value of the chi_sq measure for this bigram in this
+        # colocation
+        expected_chi_sq = 2.0
+        self.assertEqual(result, expected_chi_sq)
diff --git a/tests/test_worker_extractor.py b/tests/test_worker_extractor.py
index 489bc02..d7819a5 100644
--- a/tests/test_worker_extractor.py
+++ b/tests/test_worker_extractor.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
 
+import base64
 import os
 from textwrap import dedent
 from pypln.backend.workers import Extractor
@@ -28,54 +29,64 @@ class TestExtractorWorker(TaskTest):
     def test_extraction_from_text_file(self):
         expected = "This is a test file.\nI'm testing PyPLN extractor worker!"
         filename = os.path.join(DATA_DIR, 'test.txt')
-        self.document.update({'filename': filename,
-            'contents': open(filename).read()})
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['text'], expected)
-        self.assertEqual(self.document['file_metadata'], {})
-        self.assertEqual(self.document['mimetype'], 'text/plain')
+        doc_id = self.collection.insert({'filename': filename,
+            'contents': base64.b64encode(open(filename).read())}, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
+        self.assertEqual(refreshed_document['file_metadata'], {})
+        self.assertEqual(refreshed_document['mimetype'], 'text/plain')
 
     def test_extraction_from_html_file(self):
         expected = "This is a test file. I'm testing PyPLN extractor worker!"
         filename = os.path.join(DATA_DIR, 'test.html')
-        data = {'filename': filename, 'contents': open(filename).read()}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['text'], expected)
-        self.assertEqual(self.document['file_metadata'], {})
-        self.assertEqual(self.document['mimetype'], 'text/html')
+        # When saving directly to mongodb we always get everything back from
+        # the database as unicode. Because of that, the extractor is having
+        # problems when there is a non-ascii character in the content. This
+        # wasn't a problem before because with mongodict we used to keep a
+        # pickled representation of the data.
+        data = {'filename': filename,
+                'contents': base64.b64encode(open(filename).read())}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
+        self.assertEqual(refreshed_document['file_metadata'], {})
+        self.assertEqual(refreshed_document['mimetype'], 'text/html')
 
     def test_extraction_from_pdf_file(self):
         expected = "This is a test file.\nI'm testing PyPLN extractor worker!"
         filename = os.path.join(DATA_DIR, 'test.pdf')
-        data = {'filename': filename, 'contents': open(filename).read()}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['text'], expected)
+        data = {'filename': filename,
+                'contents': base64.b64encode(open(filename).read())}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
         # Check that the expected metadata is a subset of what
         # our Extractor found (it may have found more details
         # depending on the toolset used to extract metadata)
         metadata_expected = {
-                'Author':         'Álvaro Justen',
-                'Creator':        'Writer',
-                'Producer':       'LibreOffice 3.5',
-                'CreationDate':   'Fri Jun  1 17:07:57 2012',
-                'Tagged':         'no',
-                'Pages':          '1',
-                'Encrypted':      'no',
-                'Page size':      '612 x 792 pts (letter)',
-                'Optimized':      'no',
-                'PDF version':    '1.4',
+                u'Author':         u'Álvaro Justen',
+                u'Creator':        u'Writer',
+                u'Producer':       u'LibreOffice 3.5',
+                u'CreationDate':   u'Fri Jun  1 17:07:57 2012',
+                u'Tagged':         u'no',
+                u'Pages':          u'1',
+                u'Encrypted':      u'no',
+                u'Page size':      u'612 x 792 pts (letter)',
+                u'Optimized':      u'no',
+                u'PDF version':    u'1.4',
         }
         metadata_expected_set = set(metadata_expected.iteritems())
-        metadata = self.document['file_metadata']
+        metadata = refreshed_document['file_metadata']
         metadata_set = set(metadata.iteritems())
         diff_set = metadata_expected_set - metadata_set
         self.assertTrue(metadata_expected_set.issubset(metadata_set),
                         ("Extracted metadata is not a subset of the expected metadata. "
                          "Items missing or with different values: {}").format(
                          u", ".join(unicode(item) for item in diff_set)))
-        self.assertEqual(self.document['mimetype'], 'application/pdf')
+        self.assertEqual(refreshed_document['mimetype'], 'application/pdf')
 
     def test_extraction_from_html(self):
         contents = dedent('''
@@ -101,9 +112,10 @@ def test_extraction_from_html(self):
           </body>
         </html>
         ''')
-        data = {'filename': 'test.html', 'contents': contents}
-        self.document.update(data)
-        result = Extractor().delay(self.fake_id)
+        data = {'filename': 'test.html',
+                'contents': base64.b64encode(contents)}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
         expected = dedent('''
             Testing
 
@@ -121,75 +133,92 @@ def test_extraction_from_html(self):
             bla1
 
             bla2''').strip()
-        self.assertEqual(self.document['text'], expected)
-        self.assertEqual(self.document['mimetype'], 'text/html')
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
+        self.assertEqual(refreshed_document['mimetype'], 'text/html')
 
     def test_language_detection_pt(self):
         text_pt = 'Esse texto foi escrito por Álvaro em Português.'
-        data_pt = {'filename': 'text-pt.txt', 'contents': text_pt}
-        self.document.update(data_pt)
-        Extractor().delay(self.fake_id).get()
-        self.assertEqual(self.document['language'], 'pt')
+        data_pt = {'filename': 'text-pt.txt',
+                'contents': base64.b64encode(text_pt)}
+        doc_id = self.collection.insert(data_pt, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['language'], 'pt')
 
     def test_language_detection_es(self):
         text_es = 'Este texto ha sido escrito en Español por Álvaro.'
-        data_es = {'filename': 'text-es.txt', 'contents': text_es}
-        self.document.update(data_es)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['language'], 'es')
+        data_es = {'filename': 'text-es.txt',
+                'contents': base64.b64encode(text_es)}
+        doc_id = self.collection.insert(data_es, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['language'], 'es')
 
     def test_language_detection_en(self):
         text_en = 'This text was written by Álvaro in English.'
-        data_en = {'filename': 'text-en.txt', 'contents': text_en}
-        self.document.update(data_en)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['language'], 'en')
+        data_en = {'filename': 'text-en.txt',
+                'contents': base64.b64encode(text_en)}
+        doc_id = self.collection.insert(data_en, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['language'], 'en')
 
     def test_unescape_html_entities(self):
         expected = (u"This text has html <entities>. Álvaro asked me to make"
                      " sure it also has non ascii chars.")
         filename = os.path.join(DATA_DIR, 'test_html_entities.txt')
-        data = {'filename': filename, 'contents': open(filename).read()}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['text'], expected)
+        data = {'filename': filename,
+                'contents': base64.b64encode(open(filename).read())}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
 
     def test_should_detect_encoding_and_return_a_unicode_object(self):
         expected = u"Flávio"
         filename = os.path.join(DATA_DIR, 'test_iso-8859-1.txt')
-        data = {'filename': filename, 'contents': open(filename).read()}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['text'], expected)
-        self.assertEqual(type(self.document['text']), unicode)
+        data = {'filename': filename,
+                'contents': base64.b64encode(open(filename).read())}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
+        self.assertEqual(type(refreshed_document['text']), unicode)
 
     def test_should_guess_mimetype_for_file_without_extension(self):
         contents = "This is a test file. I'm testing PyPLN extractor worker!"
         filename = os.path.join(DATA_DIR, 'text_file')
-        data = {'filename': filename, 'contents': contents}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['mimetype'], 'text/plain')
+        data = {'filename': filename,
+                'contents': base64.b64encode(contents)}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['mimetype'], 'text/plain')
 
     def test_unknown_mimetype_should_be_flagged(self):
         filename = os.path.join(DATA_DIR, 'random_file')
         # we can't put the expected text content here, so we'll just make sure
         # it's equal to the input content, since
         contents = open(filename).read()
-        data = {'filename': filename, 'contents': contents}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['mimetype'], 'unknown')
-        self.assertEqual(self.document['text'], "")
-        self.assertEqual(self.document['language'], "")
-        self.assertEqual(self.document['file_metadata'], {})
+        data = {'filename': filename,
+                'contents': base64.b64encode(contents)}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['mimetype'], 'unknown')
+        self.assertEqual(refreshed_document['text'], "")
+        self.assertEqual(refreshed_document['language'], "")
+        self.assertEqual(refreshed_document['file_metadata'], {})
 
     def test_unknown_encoding_should_be_ignored(self):
         filename = os.path.join(DATA_DIR, 'encoding_unknown_to_libmagic.txt')
         expected = u"This file has a weird byte (\x96) that makes it impossible for libmagic to recognize it's encoding."
-        data = {'filename': filename, 'contents': open(filename).read()}
-        self.document.update(data)
-        Extractor().delay(self.fake_id)
-        self.assertEqual(self.document['text'], expected)
-        self.assertEqual(self.document['file_metadata'], {})
-        self.assertEqual(self.document['language'], 'en')
+        data = {'filename': filename,
+                'contents': base64.b64encode(open(filename).read())}
+        doc_id = self.collection.insert(data, w=1)
+        Extractor().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['text'], expected)
+        self.assertEqual(refreshed_document['file_metadata'], {})
+        self.assertEqual(refreshed_document['language'], 'en')
diff --git a/tests/test_worker_freqdist.py b/tests/test_worker_freqdist.py
index d8d61a9..bde9c98 100644
--- a/tests/test_worker_freqdist.py
+++ b/tests/test_worker_freqdist.py
@@ -22,18 +22,18 @@
 
 class TestFreqDistWorker(TaskTest):
     def test_freqdist_should_return_a_list_of_tuples_with_frequency_distribution(self):
-        tokens = ['The', 'sky', 'is', 'blue', ',', 'the', 'sun', 'is',
-                  'yellow', '.']
+        tokens = [u'The', u'sky', u'is', u'blue', u',', u'the', u'sun', u'is',
+                  u'yellow', u'.']
 
-        expected_fd =  [('is', 2), ('the', 2), ('blue', 1), ('sun', 1),
-                ('sky', 1), (',', 1), ('yellow', 1), ('.', 1)]
+        expected_fd =  [[u'is', 2], [u'the', 2], [u'blue', 1], [u'sun', 1],
+                [u'sky', 1], [u',', 1], [u'yellow', 1], [u'.', 1]]
 
 
         # This is just preparing the expected input in the database
-        self.document['tokens'] = tokens
+        doc_id = self.collection.insert({'tokens': tokens}, w=1)
 
-        FreqDist().delay(self.fake_id)
+        FreqDist().delay(doc_id)
 
-        resulting_fd = self.document['freqdist']
+        resulting_fd = self.collection.find_one({'_id': doc_id})['freqdist']
 
         self.assertEqual(resulting_fd, expected_fd)
diff --git a/tests/test_worker_gridfs_data_retriever.py b/tests/test_worker_gridfs_data_retriever.py
index ffdfce5..1012627 100644
--- a/tests/test_worker_gridfs_data_retriever.py
+++ b/tests/test_worker_gridfs_data_retriever.py
@@ -17,6 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.
 
+import base64
 import bson
 from gridfs import GridFS
 from pypln.backend.workers import GridFSDataRetriever
@@ -31,19 +32,29 @@ def test_extract_file_data_from_GridFS(self):
         new_file_id = gridfs.put(content)
         expected_file_data = gridfs.get(new_file_id)
 
-        self.document['file_id'] = str(new_file_id)
-        GridFSDataRetriever().delay(self.fake_id)
+        data = {'file_id': str(new_file_id)}
+        doc_id = self.collection.insert(data, w=1)
+        GridFSDataRetriever().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
 
-        self.assertEqual(self.document['contents'], content)
-        self.assertEqual(self.document['length'], expected_file_data.length)
-        self.assertEqual(self.document['md5'], expected_file_data.md5)
-        self.assertEqual(self.document['filename'], expected_file_data.filename)
-        self.assertEqual(self.document['upload_date'], expected_file_data.upload_date)
-        self.assertEqual(self.document['contents'], expected_file_data.read())
+        self.assertEqual(refreshed_document['contents'],
+                base64.b64encode(content))
+        self.assertEqual(refreshed_document['length'],
+                expected_file_data.length)
+        self.assertEqual(refreshed_document['md5'], expected_file_data.md5)
+        self.assertEqual(refreshed_document['filename'],
+                expected_file_data.filename)
+        self.assertEqual(refreshed_document['upload_date'],
+                expected_file_data.upload_date)
+        self.assertEqual(refreshed_document['contents'],
+                base64.b64encode(expected_file_data.read()))
 
     def test_task_raises_exception_when_file_does_not_exist(self):
-        self.document['file_id'] = "Inexistent document"
-        result = GridFSDataRetriever().delay(self.fake_id)
+        data = {'file_id': "Inexistent document"}
+        doc_id = self.collection.insert(data, w=1)
+        result = GridFSDataRetriever().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+
         self.assertTrue(result.failed())
         self.assertEqual(result.status, "FAILURE")
         self.assertIsInstance(result.info, bson.errors.InvalidId)
diff --git a/tests/test_worker_gridfs_file_deleter.py b/tests/test_worker_gridfs_file_deleter.py
index 60f9613..149d9d0 100644
--- a/tests/test_worker_gridfs_file_deleter.py
+++ b/tests/test_worker_gridfs_file_deleter.py
@@ -31,16 +31,16 @@ def test_delete_file_from_GridFS(self):
         new_file_id = gridfs.put(content)
         expected_file_data = gridfs.get(new_file_id)
 
-        self.document['file_id'] = str(new_file_id)
+        doc_id = self.collection.insert({'file_id': new_file_id}, w=1)
         self.assertTrue(gridfs.exists(new_file_id))
 
-        GridFSFileDeleter().delay(self.fake_id)
+        GridFSFileDeleter().delay(doc_id)
 
         self.assertFalse(gridfs.exists(new_file_id))
 
     def test_task_raises_exception_when_file_does_not_exist(self):
-        self.document['file_id'] = "Inexistent document"
-        result = GridFSFileDeleter().delay(self.fake_id)
+        doc_id = self.collection.insert({'file_id': "Inexistent document"}, w=1)
+        result = GridFSFileDeleter().delay(doc_id)
         self.assertTrue(result.failed())
         self.assertEqual(result.status, "FAILURE")
         self.assertIsInstance(result.info, bson.errors.InvalidId)
diff --git a/tests/test_worker_lemmatizer_pt.py b/tests/test_worker_lemmatizer_pt.py
index 2dc4c62..3887d81 100644
--- a/tests/test_worker_lemmatizer_pt.py
+++ b/tests/test_worker_lemmatizer_pt.py
@@ -42,8 +42,9 @@ def test_lemmatizer_should_return_a_list_with_lemmas(self):
         </s>
         ''').strip() + '\n\n'
 
-        self.document['palavras_raw'] = palavras_output
-        self.document['palavras_raw_ran'] = True
-        result = Lemmatizer().delay(self.fake_id)
+        doc = {'palavras_raw': palavras_output, 'palavras_raw_ran': True}
+        doc_id = self.collection.insert(doc, w=1)
+        result = Lemmatizer().delay(doc_id)
         expected = 'eu saber que em este momento falar para todo Brasil .'.split()
-        self.assertEqual(self.document['lemmas'], expected)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['lemmas'], expected)
diff --git a/tests/test_worker_palavras_noun_phrase.py b/tests/test_worker_palavras_noun_phrase.py
index 950f8ae..68adcaa 100644
--- a/tests/test_worker_palavras_noun_phrase.py
+++ b/tests/test_worker_palavras_noun_phrase.py
@@ -44,9 +44,10 @@ def test_noun_phrase_worker_should_return_a_list_with_phrases(self):
         </s>
         ''').strip() + '\n\n'
 
-        self.document.update({'palavras_raw': palavras_output,
-                'palavras_raw_ran': True})
-        NounPhrase().delay(self.fake_id)
+        doc_id = self.collection.insert({'palavras_raw': palavras_output,
+                'palavras_raw_ran': True}, w=1)
+        NounPhrase().delay(doc_id)
         expected = ['_este *momento', 'todo o *povo de_ _o Brasil .',
                                      '_o *Brasil .']
-        self.assertEqual(self.document['noun_phrases'], expected)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['noun_phrases'], expected)
diff --git a/tests/test_worker_palavras_raw.py b/tests/test_worker_palavras_raw.py
index 41e9c61..90a3845 100644
--- a/tests/test_worker_palavras_raw.py
+++ b/tests/test_worker_palavras_raw.py
@@ -30,25 +30,28 @@ class TestPalavrasRawWorker(TaskTest):
 
     def test_should_run_only_if_language_is_portuguese(self):
         if palavras_raw.palavras_installed():
-            self.document.update({'text': 'There was a rock on the way.',
-                'language': 'en'})
+            doc_id = self.collection.insert({'text': 'There was a rock on the way.',
+                'language': 'en'}, w=1)
 
-            palavras_raw.PalavrasRaw().delay(self.fake_id)
-            self.assertEqual(self.document['palavras_raw_ran'], False)
+            palavras_raw.PalavrasRaw().delay(doc_id)
+            refreshed_document = self.collection.find_one({'_id': doc_id})
+            self.assertEqual(refreshed_document['palavras_raw_ran'], False)
 
     def test_palavras_not_installed(self):
         palavras_raw.BASE_PARSER = '/not-found'
-        self.document.update({'text': 'Tinha uma pedra no meio do caminho.',
-            'language': 'pt'})
-        palavras_raw.PalavrasRaw().delay(self.fake_id)
-        self.assertEqual(self.document['palavras_raw_ran'], False)
+        doc_id = self.collection.insert(
+                {'text': 'Tinha uma pedra no meio do caminho.',
+                    'language': 'pt'}, w=1)
+        palavras_raw.PalavrasRaw().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['palavras_raw_ran'], False)
 
 
     def test_palavras_should_return_raw_if_it_is_installed(self):
         palavras_raw.BASE_PARSER = ORIGINAL_PATH
-        self.document.update(
+        doc_id = self.collection.insert(
                 {'text': 'Eu sei que neste momento falo para todo Brasil.',
-                    'language': 'pt'})
+                    'language': 'pt'}, w=1)
         expected_raw = dedent('''
         Eu 	[eu] <*> PERS M/F 1S NOM @SUBJ>  #1->2
         sei 	[saber] <fmc> <mv> V PR 1S IND VFIN @FS-STA  #2->0
@@ -63,6 +66,7 @@ def test_palavras_should_return_raw_if_it_is_installed(self):
         $. #11->0
         </s>
         ''').strip() + '\n\n'
-        result = palavras_raw.PalavrasRaw().delay(self.fake_id)
-        self.assertEqual(self.document['palavras_raw'], expected_raw)
-        self.assertEqual(self.document['palavras_raw_ran'], True)
+        result = palavras_raw.PalavrasRaw().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['palavras_raw'], expected_raw)
+        self.assertEqual(refreshed_document['palavras_raw_ran'], True)
diff --git a/tests/test_worker_palavras_semantic_tagger.py b/tests/test_worker_palavras_semantic_tagger.py
index aa75a2a..1b3abbe 100644
--- a/tests/test_worker_palavras_semantic_tagger.py
+++ b/tests/test_worker_palavras_semantic_tagger.py
@@ -54,11 +54,12 @@ def test_basic_semantic_tags(self):
                  'Verbs_related_human_things': ['falo']
         }
 
-        self.document.update({'palavras_raw': palavras_output,
-            'palavras_raw_ran': True})
-        SemanticTagger().delay(self.fake_id)
+        doc_id = self.collection.insert({'palavras_raw': palavras_output,
+            'palavras_raw_ran': True}, w=1)
+        SemanticTagger().delay(doc_id)
 
-        self.assertEqual(self.document['semantic_tags'], expected_tags)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['semantic_tags'], expected_tags)
 
 
     def test_ambiguous_tags(self):
@@ -77,12 +78,13 @@ def test_ambiguous_tags(self):
         ''').strip() + '\n\n'
 
         expected_tags = {
-                'Non_Tagged': ['Eu', 'bem', 'enquanto', 'ele', 'está', 'em',
-                               'o'],
-                'Place and spatial': ['canto'],
-                'Verbs_related_human_things': ['canto']
+                'Non_Tagged': [u'Eu', u'bem', u'enquanto', u'ele', u'está',
+                    u'em', u'o'],
+                'Place and spatial': [u'canto'],
+                'Verbs_related_human_things': [u'canto']
         }
-        self.document.update({'palavras_raw': palavras_output,
-            'palavras_raw_ran': True})
-        SemanticTagger().delay(self.fake_id)
-        self.assertEqual(self.document['semantic_tags'], expected_tags)
+        doc_id = self.collection.insert({'palavras_raw': palavras_output,
+            'palavras_raw_ran': True}, w=1)
+        SemanticTagger().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['semantic_tags'], expected_tags)
diff --git a/tests/test_worker_pos.py b/tests/test_worker_pos.py
index 6413c18..68a3bc2 100644
--- a/tests/test_worker_pos.py
+++ b/tests/test_worker_pos.py
@@ -27,15 +27,16 @@ def test_pos_should_return_a_list_of_tuples_with_token_classification_and_offset
         text = 'The sky is blue, the sun is yellow.'
         tokens = ['The', 'sky', 'is', 'blue', ',', 'the', 'sun', 'is',
                   'yellow', '.']
-        expected = [('The', 'DT', 0), ('sky', 'NN', 4), ('is', 'VBZ', 8),
-                   ('blue', 'JJ', 11), (',', ',', 15), ('the', 'DT', 17),
-                   ('sun', 'NN', 21), ('is', 'VBZ', 25), ('yellow', 'JJ', 28),
-                   ('.', '.', 34)]
-        self.document.update({'text': text, 'tokens': tokens,
-                                'language': 'en'})
-        POS().delay(self.fake_id)
-        self.assertEqual(self.document['pos'], expected)
-        self.assertEqual(self.document['tagset'], 'en-nltk')
+        expected = [['The', 'DT', 0], ['sky', 'NN', 4], ['is', 'VBZ', 8],
+                   ['blue', 'JJ', 11], [',', ',', 15], ['the', 'DT', 17],
+                   ['sun', 'NN', 21], ['is', 'VBZ', 25], ['yellow', 'JJ', 28],
+                   ['.', '.', 34]]
+        doc_id = self.collection.insert({'text': text, 'tokens': tokens,
+                                'language': 'en'}, w=1)
+        POS().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['pos'], expected)
+        self.assertEqual(refreshed_document['tagset'], 'en-nltk')
 
     def test_pos_should_run_pt_palavras_if_text_is_in_portuguese(self):
         text = 'Isso é uma frase em português.'
@@ -51,9 +52,10 @@ def test_pos_should_run_pt_palavras_if_text_is_in_portuguese(self):
         ''').strip() + '\n\n'
 
         # '.' is the only named entity here.
-        expected = [(u'.', u'.', 29)]
-        self.document.update({'text': text, 'tokens': tokens,
-            'language': 'pt', 'palavras_raw': palavras_raw})
-        POS().delay(self.fake_id)
-        self.assertEqual(self.document['pos'], expected)
-        self.assertEqual(self.document['tagset'], 'pt-palavras')
+        expected = [[u'.', u'.', 29]]
+        doc_id = self.collection.insert({'text': text, 'tokens': tokens,
+            'language': 'pt', 'palavras_raw': palavras_raw}, w=1)
+        POS().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['pos'], expected)
+        self.assertEqual(refreshed_document['tagset'], 'pt-palavras')
diff --git a/tests/test_worker_spellchecker.py b/tests/test_worker_spellchecker.py
index 7700b75..b81bb93 100644
--- a/tests/test_worker_spellchecker.py
+++ b/tests/test_worker_spellchecker.py
@@ -28,19 +28,24 @@
 class TestSpellcheckerWorker(TaskTest):
     def test_spellchek_pt(self):
         text = u"Meu cachoro é um pastor"
-        self.document.update({'text': text, 'language': 'pt_BR'})
-        spellchecker.SpellingChecker().delay(self.fake_id)
-        self.assertEqual(len(self.document['spelling_errors']), 1)
-        self.assertIn('cachoro', self.document['spelling_errors'][0])
-        self.assertIn('cachorro', self.document['spelling_errors'][0][2])
-        self.assertEqual(self.document['spelling_errors'][0][1], 4)
+        doc_id = self.collection.insert({'text': text, 'language': 'pt_BR'},
+                w=1)
+        spellchecker.SpellingChecker().delay(doc_id)
+
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(len(refreshed_document['spelling_errors']), 1)
+        self.assertIn('cachoro', refreshed_document['spelling_errors'][0])
+        self.assertIn('cachorro', refreshed_document['spelling_errors'][0][2])
+        self.assertEqual(refreshed_document['spelling_errors'][0][1], 4)
 
     def test_spellchek_en(self):
         text = u"The cat bit the doggyo"
-        self.document.update({'text': text, 'language': 'en'})
-        spellchecker.SpellingChecker().delay(self.fake_id)
-        self.assertEqual(len(self.document['spelling_errors']), 1)
-        self.assertIn('doggyo', self.document['spelling_errors'][0])
-        self.assertIn('doggy', self.document['spelling_errors'][0][2])
-        self.assertEqual(self.document['spelling_errors'][0][1], 16)
+        doc_id = self.collection.insert({'text': text, 'language': 'en'}, w=1)
+        spellchecker.SpellingChecker().delay(doc_id)
+
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(len(refreshed_document['spelling_errors']), 1)
+        self.assertIn('doggyo', refreshed_document['spelling_errors'][0])
+        self.assertIn('doggy', refreshed_document['spelling_errors'][0][2])
+        self.assertEqual(refreshed_document['spelling_errors'][0][1], 16)
 
diff --git a/tests/test_worker_statistics.py b/tests/test_worker_statistics.py
index efc412f..3370e8d 100644
--- a/tests/test_worker_statistics.py
+++ b/tests/test_worker_statistics.py
@@ -23,26 +23,32 @@
 
 class TestStatisticsWorker(TaskTest):
     def test_simple(self):
-        self.document['sentences'] = [['this', 'is', 'a', 'test', '.'],
-                     ['this', 'is', 'another', '!']]
-        self.document['freqdist'] = [('this', 2), ('is', 2), ('a', 1),
-                ('test', 1), ('.', 1), ('another', 1), ('!', 1)]
-        Statistics().delay(self.fake_id)
-        self.assertEqual(self.document['average_sentence_length'], 4.5)
-        self.assertEqual(self.document['average_sentence_repertoire'], 1)
-        self.assertAlmostEqual(self.document['momentum_1'], 1.2857, places=3)
-        self.assertAlmostEqual(self.document['momentum_2'], 1.8571, places=3)
-        self.assertEqual(self.document['momentum_3'], 3)
-        self.assertAlmostEqual(self.document['momentum_4'], 5.2857, places=3)
-        self.assertAlmostEqual(self.document['repertoire'], 0.7777, places=3)
+        doc = {'sentences': [['this', 'is', 'a', 'test', '.'], ['this', 'is',
+            'another', '!']], 'freqdist': [('this', 2), ('is', 2), ('a', 1),
+                ('test', 1), ('.', 1), ('another', 1), ('!', 1)]}
+        doc_id = self.collection.insert(doc, w=1)
+        Statistics().delay(doc_id)
+
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+
+        self.assertEqual(refreshed_document['average_sentence_length'], 4.5)
+        self.assertEqual(refreshed_document['average_sentence_repertoire'], 1)
+        self.assertAlmostEqual(refreshed_document['momentum_1'], 1.2857, places=3)
+        self.assertAlmostEqual(refreshed_document['momentum_2'], 1.8571, places=3)
+        self.assertEqual(refreshed_document['momentum_3'], 3)
+        self.assertAlmostEqual(refreshed_document['momentum_4'], 5.2857, places=3)
+        self.assertAlmostEqual(refreshed_document['repertoire'], 0.7777, places=3)
 
     def test_zero_division_error(self):
-        self.document.update({'freqdist': [], 'sentences': []})
-        Statistics().delay(self.fake_id)
-        self.assertEqual(self.document['average_sentence_length'], 0)
-        self.assertEqual(self.document['average_sentence_repertoire'], 0)
-        self.assertEqual(self.document['momentum_1'], 0)
-        self.assertEqual(self.document['momentum_2'], 0)
-        self.assertEqual(self.document['momentum_3'], 0)
-        self.assertEqual(self.document['momentum_4'], 0)
-        self.assertEqual(self.document['repertoire'], 0)
+        doc_id = self.collection.insert({'freqdist': [], 'sentences': []}, w=1)
+
+        Statistics().delay(doc_id)
+
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        self.assertEqual(refreshed_document['average_sentence_length'], 0)
+        self.assertEqual(refreshed_document['average_sentence_repertoire'], 0)
+        self.assertEqual(refreshed_document['momentum_1'], 0)
+        self.assertEqual(refreshed_document['momentum_2'], 0)
+        self.assertEqual(refreshed_document['momentum_3'], 0)
+        self.assertEqual(refreshed_document['momentum_4'], 0)
+        self.assertEqual(refreshed_document['repertoire'], 0)
diff --git a/tests/test_worker_tokenizer.py b/tests/test_worker_tokenizer.py
index 6d3e4d3..9d59cac 100644
--- a/tests/test_worker_tokenizer.py
+++ b/tests/test_worker_tokenizer.py
@@ -23,17 +23,19 @@
 
 class TestTokenizerWorker(TaskTest):
     def test_tokenizer_should_receive_text_and_return_tokens(self):
-        self.document['text'] = 'The sky is blue, the sun is yellow. This is another sentence.'
+        doc = {'text': 'The sky is blue, the sun is yellow. This is another sentence.'}
 
         expected_tokens = ['The', 'sky', 'is', 'blue', ',', 'the', 'sun', 'is',
             'yellow', '.', 'This', 'is', 'another', 'sentence', '.']
         expected_sentences = [['The', 'sky', 'is', 'blue', ',', 'the', 'sun',
             'is', 'yellow', '.'], ['This', 'is', 'another', 'sentence', '.']]
 
-        Tokenizer().delay(self.fake_id)
+        doc_id = self.collection.insert(doc, w=1)
+        Tokenizer().delay(doc_id)
 
-        tokens = self.document['tokens']
-        sentences = self.document['sentences']
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        tokens = refreshed_document['tokens']
+        sentences = refreshed_document['sentences']
 
         self.assertEqual(tokens, expected_tokens)
         self.assertEqual(sentences, expected_sentences)
diff --git a/tests/test_worker_trigrams.py b/tests/test_worker_trigrams.py
index 6b1818f..93575e9 100644
--- a/tests/test_worker_trigrams.py
+++ b/tests/test_worker_trigrams.py
@@ -26,13 +26,27 @@
 
 
 class TestTrigramWorker(TaskTest):
-    def test_Trigrams_should_return_correct_score_(self):
+    def test_Trigrams_should_return_correct_score(self):
         tokens = [w for w in
                 nltk.corpus.genesis.words('english-web.txt')]
-        trigram_finder = nltk.collocations.TrigramCollocationFinder.from_words(tokens)
-        expected = trigram_finder.score_ngram(trigram_measures.chi_sq, u'olive', u'leaf',u'plucked')
-        self.document['tokens'] = tokens
-        Trigrams().delay(self.fake_id)
-        trigram_rank = self.document['trigram_rank']
-        result = trigram_rank[(u'olive', u'leaf',u'plucked')][0]
-        self.assertEqual(result, expected)
+        doc_id = self.collection.insert({'tokens': tokens}, w=1)
+        Trigrams().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        trigram_rank = refreshed_document['trigram_rank']
+        result = trigram_rank[u'olive leaf plucked'][0]
+        # This is the value of the chi_sq measure for this trigram in this
+        # colocation
+        expected_chi_sq = 1940754916.9623578
+        self.assertEqual(result, expected_chi_sq)
+
+    def test_Trigrams_may_contain_dots_and_dollar_signs(self):
+        tokens = ['$', 'test', '.']
+        doc_id = self.collection.insert({'tokens': tokens}, w=1)
+        Trigrams().delay(doc_id)
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        trigram_rank = refreshed_document['trigram_rank']
+        result = trigram_rank[u'\dollarsign test \dot'][0]
+        # This is the value of the chi_sq measure for this trigram in this
+        # colocation
+        expected_chi_sq = 10.5
+        self.assertEqual(result, expected_chi_sq)
diff --git a/tests/test_worker_wordcloud.py b/tests/test_worker_wordcloud.py
index 56c48f2..23ed090 100644
--- a/tests/test_worker_wordcloud.py
+++ b/tests/test_worker_wordcloud.py
@@ -29,12 +29,13 @@
 class TestFreqDistWorker(TaskTest):
     name = "WordCloud"
     def test_wordcloud_should_return_a_base64_encoded_png(self):
-        self.document['freqdist'] =  [('is', 2), ('the', 2), ('blue', 1), ('sun', 1),
-                    ('sky', 1), (',', 1), ('yellow', 1), ('.', 1)]
-        self.document['language'] = 'en'
-        WordCloud().delay(self.fake_id).get()
+        doc = {'freqdist':  [('is', 2), ('the', 2), ('blue', 1), ('sun', 1),
+            ('sky', 1), (',', 1), ('yellow', 1), ('.', 1)], 'language': 'en'}
+        doc_id = self.collection.insert(doc, w=1)
+        WordCloud().delay(doc_id)
 
-        raw_png_data = base64.b64decode(self.document['wordcloud'])
+        refreshed_document = self.collection.find_one({'_id': doc_id})
+        raw_png_data = base64.b64decode(refreshed_document['wordcloud'])
 
         fake_file = StringIO(raw_png_data)
         img = Image.open(fake_file)
diff --git a/tests/utils.py b/tests/utils.py
index 04e7c7c..452bc79 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -21,7 +21,6 @@
 import unittest
 import pymongo
 
-from pypln.backend.mongodict_adapter import MongoDictAdapter
 from pypln.backend.celery_app import app
 from pypln.backend import config
 
@@ -33,12 +32,11 @@ class TaskTest(unittest.TestCase):
 
     def setUp(self):
         app.conf.update(CELERY_ALWAYS_EAGER=True)
-        self.fake_id = '1234'
-        self.document = MongoDictAdapter(self.fake_id, database=self.db_name)
         self.db = pymongo.Connection()[self.db_name]
+        self.collection = self.db[config.MONGODB_CONFIG['collection']]
 
     def tearDown(self):
-        self.db.main.remove({})
+        self.collection.remove({})
 
     @classmethod
     def setUpClass(cls):