Skip to content

Commit

Permalink
fix: improve efficiency of querying headword counts
Browse files Browse the repository at this point in the history
  • Loading branch information
Crissium committed Mar 2, 2024
1 parent 0fb04fb commit ea0cc0b
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 10 deletions.
37 changes: 32 additions & 5 deletions server/app/db_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_cursor() -> sqlite3.Cursor:
return local_storage.cursor


def create_table_entries() -> None:
def init_db() -> None:
cursor = get_cursor()
cursor.execute('''create table if not exists entries (
key text, -- the entry in lowercase and without accents
Expand All @@ -60,6 +60,13 @@ def create_table_entries() -> None:
offset integer, -- offset of the entry in the dictionary file
size integer -- size of the definition in bytes
)''')
cursor.execute('''create table if not exists headword_counts (
dictionary_name text primary key,
count integer
)''')
# Note: we shouldn't use triggers to update the table automatically,
# because it is statement-level rather than transaction-level,
# i.e., it would run after each insertion, which is not efficient.


def dictionary_exists(dictionary_name: str) -> bool:
Expand All @@ -80,8 +87,15 @@ def add_entry(key: str,
cursor.execute('insert into entries values (?, ?, ?, ?, ?)', (key, dictionary_name, word, offset, size))


def commit() -> None:
get_connection().commit()
def commit_new_entries(dictionary_name: str) -> None:
conn = get_connection()
conn.commit()

cursor = get_cursor()
cursor.execute('''insert or replace into headword_counts (dictionary_name, count)
select ?, count(*) from entries
where dictionary_name = ?''', (dictionary_name, dictionary_name))
conn.commit()


def create_ngram_table(stores_keys: bool) -> None:
Expand Down Expand Up @@ -138,10 +152,22 @@ def get_entries(key: str, dictionary_name: str) -> list[tuple[str, int, int]]:
return cursor.fetchall()


lock_headword_counts = threading.Lock()

def headword_count_of_dictionary(dictionary_name: str) -> int:
cursor = get_cursor()
cursor.execute('select count(*) from entries where dictionary_name = ?', (dictionary_name,))
return cursor.fetchone()[0]
cursor.execute('select count from headword_counts where dictionary_name = ?', (dictionary_name,))
cached_count = cursor.fetchone()
if cached_count:
return cached_count[0]
else:
cursor.execute('select count(*) from entries where dictionary_name = ?', (dictionary_name,))
count = cursor.fetchone()[0]
with lock_headword_counts:
cursor.execute('insert into headword_counts (dictionary_name, count) values (?, ?)',
(dictionary_name, count))
get_connection().commit()
return count


def get_entries_with_headword(word: str, dictionary_name: str) -> list[tuple[int, int]]:
Expand All @@ -167,6 +193,7 @@ def get_entries_all(dictionary_name: str) -> list[tuple[str, str, int, int]]:
def delete_dictionary(dictionary_name: str) -> None:
cursor = get_cursor()
cursor.execute('delete from entries where dictionary_name = ?', (dictionary_name,))
cursor.execute('delete from headword_counts where dictionary_name = ?', (dictionary_name,))
get_connection().commit()


Expand Down
2 changes: 1 addition & 1 deletion server/app/dictionaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def __init__(self, app: Flask) -> None:

self.settings = Settings()

db_manager.create_table_entries()
db_manager.init_db()

self._dictionaries: dict[str, BaseReader] = dict()
# on HDD it would confuse the I/O scheduler to load the dictionaries in parallel
Expand Down
2 changes: 1 addition & 1 deletion server/app/dicts/dsl_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def __init__(self,
for headword in headwords:
db_manager.add_entry(self.simplify(headword), self.name, headword, offset, size)
headwords.clear()
db_manager.commit()
db_manager.commit_new_entries(self.name)
db_manager.create_index()
logger.info(f'Entries of dictionary {self.name} added to database')
# Whether compressed originally or not, we need to compress it now
Expand Down
2 changes: 1 addition & 1 deletion server/app/dicts/mdict_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(self,
key.decode('UTF-8'),
offset,
length)
db_manager.commit()
db_manager.commit_new_entries(self.name)
db_manager.create_index()
logger.info(f'Entries of dictionary {self.name} added to database')

Expand Down
2 changes: 1 addition & 1 deletion server/app/dicts/stardict_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self,
word_decoded = word_str.decode('utf-8')
for offset, size in spans:
db_manager.add_entry(self.simplify(word_decoded), self.name, word_decoded, offset, size)
db_manager.commit()
db_manager.commit_new_entries(self.name)
db_manager.create_index()
logger.info(f'Entries of dictionary {self.name} added to database')

Expand Down
2 changes: 1 addition & 1 deletion server/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
project_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
windows_save_path = os.path.join(os.path.dirname(project_directory), 'SilverDict-windows.zip')
unix_save_path = os.path.join(project_directory, 'SilverDict.zip')
current_version = 'v1.1.0'
current_version = 'v1.1.1'


def _get_latest_version_and_release_note() -> tuple[str, str]:
Expand Down

0 comments on commit ea0cc0b

Please sign in to comment.