Skip to content
Snippets Groups Projects
Commit f5bc0cbb authored by Todd Gamblin's avatar Todd Gamblin Committed by GitHub
Browse files

Merge pull request #1535 from LLNL/bugfix/faster-install-db-gh1521

[WIP] Faster database loading, faster in-memory hashing
parents 8d755c01 69b68153
Branches
Tags
No related merge requests found
...@@ -164,6 +164,9 @@ def __init__(self, root, db_dir=None): ...@@ -164,6 +164,9 @@ def __init__(self, root, db_dir=None):
self.lock = Lock(self._lock_path) self.lock = Lock(self._lock_path)
self._data = {} self._data = {}
# whether there was an error at the start of a read transaction
self._error = None
def write_transaction(self, timeout=_db_lock_timeout): def write_transaction(self, timeout=_db_lock_timeout):
"""Get a write lock context manager for use in a `with` block.""" """Get a write lock context manager for use in a `with` block."""
return WriteTransaction(self.lock, self._read, self._write, timeout) return WriteTransaction(self.lock, self._read, self._write, timeout)
...@@ -198,7 +201,7 @@ def _write_to_yaml(self, stream): ...@@ -198,7 +201,7 @@ def _write_to_yaml(self, stream):
except YAMLError as e: except YAMLError as e:
raise SpackYAMLError("error writing YAML database:", str(e)) raise SpackYAMLError("error writing YAML database:", str(e))
def _read_spec_from_yaml(self, hash_key, installs, parent_key=None): def _read_spec_from_yaml(self, hash_key, installs):
"""Recursively construct a spec from a hash in a YAML database. """Recursively construct a spec from a hash in a YAML database.
Does not do any locking. Does not do any locking.
...@@ -212,19 +215,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None): ...@@ -212,19 +215,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
# Build spec from dict first. # Build spec from dict first.
spec = Spec.from_node_dict(spec_dict) spec = Spec.from_node_dict(spec_dict)
return spec
def _assign_dependencies(self, hash_key, installs, data):
# Add dependencies from other records in the install DB to # Add dependencies from other records in the install DB to
# form a full spec. # form a full spec.
spec = data[hash_key].spec
spec_dict = installs[hash_key]['spec']
if 'dependencies' in spec_dict[spec.name]: if 'dependencies' in spec_dict[spec.name]:
yaml_deps = spec_dict[spec.name]['dependencies'] yaml_deps = spec_dict[spec.name]['dependencies']
for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps): for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
child = self._read_spec_from_yaml(dhash, installs, hash_key) if dhash not in data:
spec._add_dependency(child, dtypes) tty.warn("Missing dependency not in database: ",
"%s needs %s-%s" % (
spec.format('$_$#'), dname, dhash[:7]))
continue
# Specs from the database need to be marked concrete because # defensive copy (not sure everything handles extra
# they represent actual installations. # parent links yet)
spec._mark_concrete() child = data[dhash].spec
return spec spec._add_dependency(child, dtypes)
def _read_from_yaml(self, stream): def _read_from_yaml(self, stream):
""" """
...@@ -248,7 +259,8 @@ def _read_from_yaml(self, stream): ...@@ -248,7 +259,8 @@ def _read_from_yaml(self, stream):
def check(cond, msg): def check(cond, msg):
if not cond: if not cond:
raise CorruptDatabaseError(self._index_path, msg) raise CorruptDatabaseError(
"Spack database is corrupt: %s" % msg, self._index_path)
check('database' in yfile, "No 'database' attribute in YAML.") check('database' in yfile, "No 'database' attribute in YAML.")
...@@ -267,22 +279,28 @@ def check(cond, msg): ...@@ -267,22 +279,28 @@ def check(cond, msg):
self.reindex(spack.install_layout) self.reindex(spack.install_layout)
installs = dict((k, v.to_dict()) for k, v in self._data.items()) installs = dict((k, v.to_dict()) for k, v in self._data.items())
# Iterate through database and check each record. def invalid_record(hash_key, error):
msg = ("Invalid record in Spack database: "
"hash: %s, cause: %s: %s")
msg %= (hash_key, type(e).__name__, str(e))
raise CorruptDatabaseError(msg, self._index_path)
# Build up the database in three passes:
#
# 1. Read in all specs without dependencies.
# 2. Hook dependencies up among specs.
# 3. Mark all specs concrete.
#
# The database is built up so that ALL specs in it share nodes
# (i.e., its specs are a true Merkle DAG, unlike most specs.)
# Pass 1: Iterate through database and build specs w/o dependencies
data = {} data = {}
for hash_key, rec in installs.items(): for hash_key, rec in installs.items():
try: try:
# This constructs a spec DAG from the list of all installs # This constructs a spec DAG from the list of all installs
spec = self._read_spec_from_yaml(hash_key, installs) spec = self._read_spec_from_yaml(hash_key, installs)
# Validate the spec by ensuring the stored and actual
# hashes are the same.
spec_hash = spec.dag_hash()
if not spec_hash == hash_key:
tty.warn(
"Hash mismatch in database: %s -> spec with hash %s" %
(hash_key, spec_hash))
continue # TODO: is skipping the right thing to do?
# Insert the brand new spec in the database. Each # Insert the brand new spec in the database. Each
# spec has its own copies of its dependency specs. # spec has its own copies of its dependency specs.
# TODO: would a more immmutable spec implementation simplify # TODO: would a more immmutable spec implementation simplify
...@@ -290,11 +308,22 @@ def check(cond, msg): ...@@ -290,11 +308,22 @@ def check(cond, msg):
data[hash_key] = InstallRecord.from_dict(spec, rec) data[hash_key] = InstallRecord.from_dict(spec, rec)
except Exception as e: except Exception as e:
tty.warn("Invalid database reecord:", invalid_record(hash_key, e)
"file: %s" % self._index_path,
"hash: %s" % hash_key, # Pass 2: Assign dependencies once all specs are created.
"cause: %s: %s" % (type(e).__name__, str(e))) for hash_key in data:
raise try:
self._assign_dependencies(hash_key, installs, data)
except Exception as e:
invalid_record(hash_key, e)
# Pass 3: Mark all specs concrete. Specs representing real
# installations must be explicitly marked.
# We do this *after* all dependencies are connected because if we
# do it *while* we're constructing specs,it causes hashes to be
# cached prematurely.
for hash_key, rec in data.items():
rec.spec._mark_concrete()
self._data = data self._data = data
...@@ -304,7 +333,26 @@ def reindex(self, directory_layout): ...@@ -304,7 +333,26 @@ def reindex(self, directory_layout):
Locks the DB if it isn't locked already. Locks the DB if it isn't locked already.
""" """
with self.write_transaction(): # Special transaction to avoid recursive reindex calls and to
# ignore errors if we need to rebuild a corrupt database.
def _read_suppress_error():
try:
if os.path.isfile(self._index_path):
self._read_from_yaml(self._index_path)
except CorruptDatabaseError as e:
self._error = e
self._data = {}
transaction = WriteTransaction(
self.lock, _read_suppress_error, self._write, _db_lock_timeout)
with transaction:
if self._error:
tty.warn(
"Spack database was corrupt. Will rebuild. Error was:",
str(self._error))
self._error = None
old_data = self._data old_data = self._data
try: try:
self._data = {} self._data = {}
...@@ -313,10 +361,15 @@ def reindex(self, directory_layout): ...@@ -313,10 +361,15 @@ def reindex(self, directory_layout):
for spec in directory_layout.all_specs(): for spec in directory_layout.all_specs():
# Create a spec for each known package and add it. # Create a spec for each known package and add it.
path = directory_layout.path_for_spec(spec) path = directory_layout.path_for_spec(spec)
old_info = old_data.get(spec.dag_hash())
# Try to recover explicit value from old DB, but
# default it to False if DB was corrupt.
explicit = False explicit = False
if old_info is not None: if old_data is not None:
explicit = old_info.explicit old_info = old_data.get(spec.dag_hash())
if old_info is not None:
explicit = old_info.explicit
self._add(spec, path, directory_layout, explicit=explicit) self._add(spec, path, directory_layout, explicit=explicit)
self._check_ref_counts() self._check_ref_counts()
...@@ -601,11 +654,7 @@ def missing(self, spec): ...@@ -601,11 +654,7 @@ def missing(self, spec):
class CorruptDatabaseError(SpackError): class CorruptDatabaseError(SpackError):
"""Raised when errors are found while reading the database."""
def __init__(self, path, msg=''):
super(CorruptDatabaseError, self).__init__(
"Spack database is corrupt: %s. %s." % (path, msg),
"Try running `spack reindex` to fix.")
class InvalidDatabaseVersionError(SpackError): class InvalidDatabaseVersionError(SpackError):
......
...@@ -504,6 +504,7 @@ def __init__(self, spec_like, *dep_like, **kwargs): ...@@ -504,6 +504,7 @@ def __init__(self, spec_like, *dep_like, **kwargs):
self.variants.spec = self self.variants.spec = self
self.namespace = other.namespace self.namespace = other.namespace
self._hash = other._hash self._hash = other._hash
self._cmp_key_cache = other._cmp_key_cache
# Specs are by default not assumed to be normal, but in some # Specs are by default not assumed to be normal, but in some
# cases we've read them from a file want to assume normal. # cases we've read them from a file want to assume normal.
...@@ -858,9 +859,10 @@ def return_val(res): ...@@ -858,9 +859,10 @@ def return_val(res):
# Edge traversal yields but skips children of visited nodes # Edge traversal yields but skips children of visited nodes
if not (key in visited and cover == 'edges'): if not (key in visited and cover == 'edges'):
# This code determines direction and yields the children/parents # This code determines direction and yields the children/parents
successors = deps successors = deps
if direction == 'parents': if direction == 'parents':
successors = self.dependents_dict() successors = self.dependents_dict() # TODO: deptype?
visited.add(key) visited.add(key)
for name in sorted(successors): for name in sorted(successors):
...@@ -1278,15 +1280,15 @@ def concretize(self): ...@@ -1278,15 +1280,15 @@ def concretize(self):
# Mark everything in the spec as concrete, as well. # Mark everything in the spec as concrete, as well.
self._mark_concrete() self._mark_concrete()
def _mark_concrete(self): def _mark_concrete(self, value=True):
"""Mark this spec and its dependencies as concrete. """Mark this spec and its dependencies as concrete.
Only for internal use -- client code should use "concretize" Only for internal use -- client code should use "concretize"
unless there is a need to force a spec to be concrete. unless there is a need to force a spec to be concrete.
""" """
for s in self.traverse(deptype_query=alldeps): for s in self.traverse(deptype_query=alldeps):
s._normal = True s._normal = value
s._concrete = True s._concrete = value
def concretized(self): def concretized(self):
"""This is a non-destructive version of concretize(). First clones, """This is a non-destructive version of concretize(). First clones,
...@@ -1533,6 +1535,10 @@ def normalize(self, force=False): ...@@ -1533,6 +1535,10 @@ def normalize(self, force=False):
if self._normal and not force: if self._normal and not force:
return False return False
# avoid any assumptions about concreteness when forced
if force:
self._mark_concrete(False)
# Ensure first that all packages & compilers in the DAG exist. # Ensure first that all packages & compilers in the DAG exist.
self.validate_names() self.validate_names()
# Get all the dependencies into one DependencyMap # Get all the dependencies into one DependencyMap
...@@ -1865,7 +1871,7 @@ def virtual_dependencies(self): ...@@ -1865,7 +1871,7 @@ def virtual_dependencies(self):
"""Return list of any virtual deps in this spec.""" """Return list of any virtual deps in this spec."""
return [spec for spec in self.traverse() if spec.virtual] return [spec for spec in self.traverse() if spec.virtual]
def _dup(self, other, **kwargs): def _dup(self, other, deps=True, cleardeps=True):
"""Copy the spec other into self. This is an overwriting """Copy the spec other into self. This is an overwriting
copy. It does not copy any dependents (parents), but by default copy. It does not copy any dependents (parents), but by default
copies dependencies. copies dependencies.
...@@ -1896,7 +1902,7 @@ def _dup(self, other, **kwargs): ...@@ -1896,7 +1902,7 @@ def _dup(self, other, **kwargs):
self.versions = other.versions.copy() self.versions = other.versions.copy()
self.architecture = other.architecture self.architecture = other.architecture
self.compiler = other.compiler.copy() if other.compiler else None self.compiler = other.compiler.copy() if other.compiler else None
if kwargs.get('cleardeps', True): if cleardeps:
self._dependents = DependencyMap() self._dependents = DependencyMap()
self._dependencies = DependencyMap() self._dependencies = DependencyMap()
self.compiler_flags = other.compiler_flags.copy() self.compiler_flags = other.compiler_flags.copy()
...@@ -1906,19 +1912,15 @@ def _dup(self, other, **kwargs): ...@@ -1906,19 +1912,15 @@ def _dup(self, other, **kwargs):
self.external_module = other.external_module self.external_module = other.external_module
self.namespace = other.namespace self.namespace = other.namespace
self._hash = other._hash self._hash = other._hash
self._cmp_key_cache = other._cmp_key_cache
# If we copy dependencies, preserve DAG structure in the new spec # If we copy dependencies, preserve DAG structure in the new spec
if kwargs.get('deps', True): if deps:
# This copies the deps from other using _dup(deps=False) # This copies the deps from other using _dup(deps=False)
# XXX(deptype): We can keep different instances of specs here iff deptypes = alldeps
# it is only a 'build' dependency (from its parent). if isinstance(deps, (tuple, list)):
# All other instances must be shared (due to symbol deptypes = deps
# and PATH contention). These should probably search new_nodes = other.flat_dependencies(deptypes=deptypes)
# for any existing installation which can satisfy the
# build and latch onto that because if 3 things need
# the same build dependency and it is *not*
# available, we only want to build it once.
new_nodes = other.flat_dependencies(deptype_query=alldeps)
new_nodes[self.name] = self new_nodes[self.name] = self
stack = [other] stack = [other]
...@@ -1927,6 +1929,9 @@ def _dup(self, other, **kwargs): ...@@ -1927,6 +1929,9 @@ def _dup(self, other, **kwargs):
new_spec = new_nodes[cur_spec.name] new_spec = new_nodes[cur_spec.name]
for depspec in cur_spec._dependencies.values(): for depspec in cur_spec._dependencies.values():
if not any(d in deptypes for d in depspec.deptypes):
continue
stack.append(depspec.spec) stack.append(depspec.spec)
# XXX(deptype): add any new deptypes that may have appeared # XXX(deptype): add any new deptypes that may have appeared
...@@ -1942,13 +1947,22 @@ def _dup(self, other, **kwargs): ...@@ -1942,13 +1947,22 @@ def _dup(self, other, **kwargs):
self.external_module = other.external_module self.external_module = other.external_module
return changed return changed
def copy(self, **kwargs): def copy(self, deps=True):
"""Return a copy of this spec. """Return a copy of this spec.
By default, returns a deep copy. Supply dependencies=False
to get a shallow copy. By default, returns a deep copy. To control how dependencies are
copied, supply:
deps=True: deep copy
deps=False: shallow copy (no dependencies)
deps=('link', 'build'):
only build and link dependencies. Similar for other deptypes.
""" """
clone = Spec.__new__(Spec) clone = Spec.__new__(Spec)
clone._dup(self, **kwargs) clone._dup(self, deps=deps)
return clone return clone
@property @property
...@@ -2059,10 +2073,17 @@ def _cmp_key(self): ...@@ -2059,10 +2073,17 @@ def _cmp_key(self):
1. A tuple describing this node in the DAG. 1. A tuple describing this node in the DAG.
2. The hash of each of this node's dependencies' cmp_keys. 2. The hash of each of this node's dependencies' cmp_keys.
""" """
dep_dict = self.dependencies_dict(deptype=('link', 'run')) if self._cmp_key_cache:
return self._cmp_node() + ( return self._cmp_key_cache
tuple(hash(dep_dict[name])
for name in sorted(dep_dict)),) dep_tuple = tuple(
(d.spec.name, hash(d.spec), tuple(sorted(d.deptypes)))
for name, d in sorted(self._dependencies.items()))
key = (self._cmp_node(), dep_tuple)
if self._concrete:
self._cmp_key_cache = key
return key
def colorized(self): def colorized(self):
return colorize_spec(self) return colorize_spec(self)
...@@ -2457,6 +2478,7 @@ def spec(self, name, check_valid_token=False): ...@@ -2457,6 +2478,7 @@ def spec(self, name, check_valid_token=False):
spec._dependencies = DependencyMap() spec._dependencies = DependencyMap()
spec.namespace = spec_namespace spec.namespace = spec_namespace
spec._hash = None spec._hash = None
spec._cmp_key_cache = None
spec._normal = False spec._normal = False
spec._concrete = False spec._concrete = False
......
...@@ -91,22 +91,33 @@ def test_read_and_write_spec(self): ...@@ -91,22 +91,33 @@ def test_read_and_write_spec(self):
# Make sure spec file can be read back in to get the original spec # Make sure spec file can be read back in to get the original spec
spec_from_file = self.layout.read_spec(spec_path) spec_from_file = self.layout.read_spec(spec_path)
self.assertEqual(spec, spec_from_file)
self.assertTrue(spec.eq_dag, spec_from_file) # currently we don't store build dependency information when
# we write out specs to the filesystem.
# TODO: fix this when we can concretize more loosely based on
# TODO: what is installed. We currently omit these to
# TODO: increase reuse of build dependencies.
stored_deptypes = ('link', 'run')
expected = spec.copy(deps=stored_deptypes)
self.assertEqual(expected, spec_from_file)
self.assertTrue(expected.eq_dag, spec_from_file)
self.assertTrue(spec_from_file.concrete) self.assertTrue(spec_from_file.concrete)
# Ensure that specs that come out "normal" are really normal. # Ensure that specs that come out "normal" are really normal.
with open(spec_path) as spec_file: with open(spec_path) as spec_file:
read_separately = Spec.from_yaml(spec_file.read()) read_separately = Spec.from_yaml(spec_file.read())
read_separately.normalize() # TODO: revise this when build deps are in dag_hash
self.assertEqual(read_separately, spec_from_file) norm = read_separately.normalized().copy(deps=stored_deptypes)
self.assertEqual(norm, spec_from_file)
read_separately.concretize() # TODO: revise this when build deps are in dag_hash
self.assertEqual(read_separately, spec_from_file) conc = read_separately.concretized().copy(deps=stored_deptypes)
self.assertEqual(conc, spec_from_file)
# Make sure the hash of the read-in spec is the same # Make sure the hash of the read-in spec is the same
self.assertEqual(spec.dag_hash(), spec_from_file.dag_hash()) self.assertEqual(expected.dag_hash(), spec_from_file.dag_hash())
# Ensure directories are properly removed # Ensure directories are properly removed
self.layout.remove_install_directory(spec) self.layout.remove_install_directory(spec)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment