-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmainDatabase.py
More file actions
83 lines (57 loc) · 2.31 KB
/
mainDatabase.py
File metadata and controls
83 lines (57 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pymongo
from collections import defaultdict
from credentials import Credentials
class MainDatabase:
def __init__(self):
client = pymongo.MongoClient(
getCredentials())
self.db = client.search_engine
self.collection = self.db.final_index
self.collection_content = self.db.final_content
self.collection = self.db.final_index
self.collection_content = self.db.final_final_content
# Twogram collections
self.twogram_collection = self.db.final_final_twogram_index
self.twogram_content_collection = self.db.final_final_twogram_content
def bulk_insert(self, docs):
self.collection.insert_many(docs)
def bulk_insert_content(self, docs):
self.collection_content.insert_many(docs)
def twogram_bulk_insert(self, docs):
self.twogram_collection.insert_many(docs)
def twogram_bulk_insert_content(self, docs):
self.twogram_content_collection.insert_many(docs)
# Insert a single document into the mongo collection: documents
# and return the inserted _id
def insert_mongo_document(self, document):
return self.collection.insert_one(document).inserted_id
def find_mongo_documents(self, key, value):
return self.collection.find({key: value})
def find_mongo_documents_by_key(self, key):
return list(self.collection.find({key : {'$exists' : True}}))
def count_words(self):
word_dict = dict()
for i in self.words:
if i in word_dict:
word_dict[i] += 1
else:
word_dict[i] = 1
for i in word_dict:
self.insert_mongo_document({i :word_dict[i]})
def update_database(self, indexDictionary: dict):
counter = 0
incremCounter = 0
for token in indexDictionary.keys():
print(token)
counter = counter + 1
incremCounter = incremCounter + 1
if incremCounter == 1000:
incremCounter = 0
self.collection.insert_one({
'word': token,
'urls': indexDictionary[token]['urls'],
'amountOfURLs': indexDictionary[token]['amountOfURLs'],
'df': indexDictionary[token]['amountOfURLs']
})
if __name__ == "__main__":
pass