Skip to content
Snippets Groups Projects
Commit 409e7a2e authored by Todd Gamblin's avatar Todd Gamblin
Browse files

Faster database loading.

- use a 3-pass algorithm to load the installed package DAG.

- avoid redundant hashing/comparing on load.
parent a8aad95d
No related branches found
No related tags found
No related merge requests found
...@@ -198,7 +198,7 @@ def _write_to_yaml(self, stream): ...@@ -198,7 +198,7 @@ def _write_to_yaml(self, stream):
except YAMLError as e: except YAMLError as e:
raise SpackYAMLError("error writing YAML database:", str(e)) raise SpackYAMLError("error writing YAML database:", str(e))
def _read_spec_from_yaml(self, hash_key, installs, parent_key=None): def _read_spec_from_yaml(self, hash_key, installs):
"""Recursively construct a spec from a hash in a YAML database. """Recursively construct a spec from a hash in a YAML database.
Does not do any locking. Does not do any locking.
...@@ -212,19 +212,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None): ...@@ -212,19 +212,27 @@ def _read_spec_from_yaml(self, hash_key, installs, parent_key=None):
# Build spec from dict first. # Build spec from dict first.
spec = Spec.from_node_dict(spec_dict) spec = Spec.from_node_dict(spec_dict)
return spec
def _assign_dependencies(self, hash_key, installs, data):
# Add dependencies from other records in the install DB to # Add dependencies from other records in the install DB to
# form a full spec. # form a full spec.
spec = data[hash_key].spec
spec_dict = installs[hash_key]['spec']
if 'dependencies' in spec_dict[spec.name]: if 'dependencies' in spec_dict[spec.name]:
yaml_deps = spec_dict[spec.name]['dependencies'] yaml_deps = spec_dict[spec.name]['dependencies']
for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps): for dname, dhash, dtypes in Spec.read_yaml_dep_specs(yaml_deps):
child = self._read_spec_from_yaml(dhash, installs, hash_key) if dhash not in data:
spec._add_dependency(child, dtypes) tty.warn("Missing dependency not in database: ",
"%s needs %s-%s" % (
spec.format('$_$#'), dname, dhash[:7]))
continue
# Specs from the database need to be marked concrete because # defensive copy (not sure everything handles extra
# they represent actual installations. # parent links yet)
spec._mark_concrete() child = data[dhash].spec
return spec spec._add_dependency(child, dtypes)
def _read_from_yaml(self, stream): def _read_from_yaml(self, stream):
""" """
...@@ -267,22 +275,22 @@ def check(cond, msg): ...@@ -267,22 +275,22 @@ def check(cond, msg):
self.reindex(spack.install_layout) self.reindex(spack.install_layout)
installs = dict((k, v.to_dict()) for k, v in self._data.items()) installs = dict((k, v.to_dict()) for k, v in self._data.items())
# Iterate through database and check each record. # Build up the database in three passes:
#
# 1. Read in all specs without dependencies.
# 2. Hook dependencies up among specs.
# 3. Mark all specs concrete.
#
# The database is built up so that ALL specs in it share nodes
# (i.e., its specs are a true Merkle DAG, unlike most specs.)
# Pass 1: Iterate through database and build specs w/o dependencies
data = {} data = {}
for hash_key, rec in installs.items(): for hash_key, rec in installs.items():
try: try:
# This constructs a spec DAG from the list of all installs # This constructs a spec DAG from the list of all installs
spec = self._read_spec_from_yaml(hash_key, installs) spec = self._read_spec_from_yaml(hash_key, installs)
# Validate the spec by ensuring the stored and actual
# hashes are the same.
spec_hash = spec.dag_hash()
if not spec_hash == hash_key:
tty.warn(
"Hash mismatch in database: %s -> spec with hash %s" %
(hash_key, spec_hash))
continue # TODO: is skipping the right thing to do?
# Insert the brand new spec in the database. Each # Insert the brand new spec in the database. Each
# spec has its own copies of its dependency specs. # spec has its own copies of its dependency specs.
# TODO: would a more immmutable spec implementation simplify # TODO: would a more immmutable spec implementation simplify
...@@ -296,6 +304,18 @@ def check(cond, msg): ...@@ -296,6 +304,18 @@ def check(cond, msg):
"cause: %s: %s" % (type(e).__name__, str(e))) "cause: %s: %s" % (type(e).__name__, str(e)))
raise raise
# Pass 2: Assign dependencies once all specs are created.
for hash_key in data:
self._assign_dependencies(hash_key, installs, data)
# Pass 3: Mark all specs concrete. Specs representing real
# installations must be explicitly marked.
# We do this *after* all dependencies are connected because if we
# do it *while* we're constructing specs,it causes hashes to be
# cached prematurely.
for hash_key, rec in data.items():
rec.spec._mark_concrete()
self._data = data self._data = data
def reindex(self, directory_layout): def reindex(self, directory_layout):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment