Skip to content

update: Rewrite update script #372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 39 additions & 12 deletions elixir/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@
import berkeleydb
import re
from . import lib
from .lib import autoBytes
import os
import os.path
import errno

# Cache size used by the update script for the largest databases. Tuple of (gigabytes, bytes).
# https://docs.oracle.com/database/bdb181/html/api_reference/C/dbset_cachesize.html
# https://docs.oracle.com/database/bdb181/html/programmer_reference/general_am_conf.html#am_conf_cachesize
CACHESIZE = (2,0)

deflist_regex = re.compile(b'(\d*)(\w)(\d*)(\w),?')
deflist_macro_regex = re.compile('\dM\d+(\w)')

Expand Down Expand Up @@ -72,6 +78,14 @@ def iter(self, dummy=False):
if dummy:
yield maxId, None, None, None

def exists(self, idx, line_num):
entries = deflist_regex.findall(self.data)
for id, _, line, _ in entries:
if id == idx and int(line) == line_num:
return True

return False

def append(self, id, type, line, family):
if type not in defTypeD:
return
Expand Down Expand Up @@ -145,11 +159,14 @@ def pack(self):
return self.data

class BsdDB:
def __init__(self, filename, readonly, contentType, shared=False):
def __init__(self, filename, readonly, contentType, shared=False, cachesize=None):
self.filename = filename
self.db = berkeleydb.db.DB()
flags = berkeleydb.db.DB_THREAD if shared else 0

if cachesize is not None:
self.db.set_cachesize(cachesize[0], cachesize[1])

if readonly:
flags |= berkeleydb.db.DB_RDONLY
self.db.open(filename, flags=flags)
Expand All @@ -159,40 +176,50 @@ def __init__(self, filename, readonly, contentType, shared=False):
self.ctype = contentType

def exists(self, key):
key = lib.autoBytes(key)
key = autoBytes(key)
return self.db.exists(key)

def get(self, key):
key = lib.autoBytes(key)
key = autoBytes(key)
p = self.db.get(key)
return self.ctype(p) if p is not None else None
if p is None:
return None
p = self.ctype(p)
return p

def get_keys(self):
return self.db.keys()

def put(self, key, val, sync=False):
key = lib.autoBytes(key)
val = lib.autoBytes(val)
key = autoBytes(key)
val = autoBytes(val)
if type(val) is not bytes:
val = val.pack()
self.db.put(key, val)
if sync:
self.db.sync()

def sync(self):
self.db.sync()

def close(self):
self.db.close()

def __len__(self):
return self.db.stat()["nkeys"]

class DB:
def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
def __init__(self, dir, readonly=True, dtscomp=False, shared=False, update_cache=False):
if os.path.isdir(dir):
self.dir = dir
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dir)

ro = readonly
cachesize = None

if update_cache:
cachesize = CACHESIZE

self.vars = BsdDB(dir + '/variables.db', ro, lambda x: int(x.decode()), shared=shared)
# Key-value store of basic information
Expand All @@ -203,20 +230,20 @@ def __init__(self, dir, readonly=True, dtscomp=False, shared=False):
self.file = BsdDB(dir + '/filenames.db', ro, lambda x: x.decode(), shared=shared)
# Map serial number to filename
self.vers = BsdDB(dir + '/versions.db', ro, PathList, shared=shared)
self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared)
self.defs = BsdDB(dir + '/definitions.db', ro, DefList, shared=shared, cachesize=cachesize)
self.defs_cache = {}
NOOP = lambda x: x
self.defs_cache['C'] = BsdDB(dir + '/definitions-cache-C.db', ro, NOOP, shared=shared)
self.defs_cache['K'] = BsdDB(dir + '/definitions-cache-K.db', ro, NOOP, shared=shared)
self.defs_cache['D'] = BsdDB(dir + '/definitions-cache-D.db', ro, NOOP, shared=shared)
self.defs_cache['M'] = BsdDB(dir + '/definitions-cache-M.db', ro, NOOP, shared=shared)
assert sorted(self.defs_cache.keys()) == sorted(lib.CACHED_DEFINITIONS_FAMILIES)
self.refs = BsdDB(dir + '/references.db', ro, RefList, shared=shared)
self.docs = BsdDB(dir + '/doccomments.db', ro, RefList, shared=shared)
self.refs = BsdDB(dir + '/references.db', ro, RefList, shared=shared, cachesize=cachesize)
self.docs = BsdDB(dir + '/doccomments.db', ro, RefList, shared=shared, cachesize=cachesize)
self.dtscomp = dtscomp
if dtscomp:
self.comps = BsdDB(dir + '/compatibledts.db', ro, RefList, shared=shared)
self.comps_docs = BsdDB(dir + '/compatibledts_docs.db', ro, RefList, shared=shared)
self.comps = BsdDB(dir + '/compatibledts.db', ro, RefList, shared=shared, cachesize=cachesize)
self.comps_docs = BsdDB(dir + '/compatibledts_docs.db', ro, RefList, shared=shared, cachesize=cachesize)
# Use a RefList in case there are multiple doc comments for an identifier

def close(self):
Expand Down
3 changes: 2 additions & 1 deletion elixir/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import sys
import logging
import subprocess, os
from typing import List

logger = logging.getLogger(__name__)

Expand All @@ -46,7 +47,7 @@ def run_cmd(*args, env=None):
# Invoke ./script.sh with the given arguments
# Returns the list of output lines

def scriptLines(*args, env=None):
def scriptLines(*args, env=None) -> List[bytes]:
p = script(*args, env=env)
p = p.split(b'\n')
del p[-1]
Expand Down
Loading