diff --git a/var/spack/repos/builtin/packages/kaldi/openfst-1.4.1.patch b/var/spack/repos/builtin/packages/kaldi/openfst-1.4.1.patch
new file mode 100644
index 0000000000000000000000000000000000000000..62e0c7544bde8ee4932231687ef8ecb923de5e15
--- /dev/null
+++ b/var/spack/repos/builtin/packages/kaldi/openfst-1.4.1.patch
@@ -0,0 +1,45 @@
+From ef1c38844fd4c40304176f4e672189010805a3d9 Mon Sep 17 00:00:00 2001
+From: Jianwen WEI <weijianwen@gmail.com>
+Date: Thu, 13 Apr 2017 15:17:12 +0800
+Subject: [PATCH 1/1] Add a patched openfst@1.4.1-patch for Kaldi.
+
+---
+ src/configure  | 4 ++--
+ tools/Makefile | 4 ++--
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/src/configure b/src/configure
+index c90e9ba4e..ff328772c 100755
+--- a/src/configure
++++ b/src/configure
+@@ -793,9 +793,9 @@ echo "CONFIGURE_VERSION := $CONFIGURE_VERSION" >> kaldi.mk
+ echo "FSTROOT = $FSTROOT" >> kaldi.mk
+ 
+ # Check installed OpenFst version and add C++11 flags if OpenFst >= 1.4
+-OPENFST_VER=`grep 'PACKAGE_VERSION' $FSTROOT/Makefile | sed -e 's:.*= ::'`
++OPENFST_VER=1.4.1
+ echo "OPENFST_VER = $OPENFST_VER" >> kaldi.mk
+-OPENFST_VER_NUM=`echo $OPENFST_VER | sed 's/\./ /g' | xargs printf "%d%02d%02d"`
++OPENFST_VER_NUM=104001
+ if [ $OPENFST_VER_NUM -ge 10400 ]; then
+   echo "OPENFST_GE_10400 = 1" >> kaldi.mk
+   echo "EXTRA_CXXFLAGS += -DHAVE_OPENFST_GE_10400 -std=c++0x" >> kaldi.mk
+diff --git a/tools/Makefile b/tools/Makefile
+index cb3a35b2a..b868e600a 100644
+--- a/tools/Makefile
++++ b/tools/Makefile
+@@ -3,9 +3,9 @@
+ CXX = g++
+ # CXX = clang++  # Uncomment this line to build with Clang.
+ 
+-OPENFST_VERSION = 1.3.4
++# OPENFST_VERSION = 1.3.4
+ # Uncomment the next line to build with OpenFst-1.4.1.
+-# OPENFST_VERSION = 1.4.1
++OPENFST_VERSION = 1.4.1
+ # Note: OpenFst >= 1.4 requires C++11 support, hence you will need to use a
+ # relatively recent C++ compiler, e.g. gcc >= 4.6, clang >= 3.0.
+ 
+-- 
+2.11.1
+
diff --git a/var/spack/repos/builtin/packages/kaldi/package.py b/var/spack/repos/builtin/packages/kaldi/package.py
index 3cf25df46b7c92dfaf18f6de2ddce59838219c93..d2344d2838845ab6547ea5e7d4544e16cfdbdd54 100644
--- a/var/spack/repos/builtin/packages/kaldi/package.py
+++ b/var/spack/repos/builtin/packages/kaldi/package.py
@@ -38,6 +38,8 @@ class Kaldi(Package):    # Does not use Autotools
     url      = "https://github.com/kaldi-asr/kaldi/archive/master.zip"
 
     version('master', git='https://github.com/kaldi-asr/kaldi.git')
+    version('c024e8', git='https://github.com/kaldi-asr/kaldi.git',
+            commit='c024e8aa0a727bf76c91a318f76a1f8b0b59249e')
 
     variant('shared', default=True,
             description='build shared libraries')
@@ -47,19 +49,22 @@ class Kaldi(Package):    # Does not use Autotools
             description='build with CUDA')
 
     depends_on('blas')
-    depends_on('speex')
-    depends_on('openfst@1.6:')
     depends_on('cuda', when='+cuda')
     depends_on('sph2pipe', type='run')
     depends_on('sctk', type='run')
+    depends_on('speex', type='run')
+    depends_on('openfst@1.4.1-patch', when='@c024e8')
+    depends_on('openfst')
+
+    patch('openfst-1.4.1.patch', when='@c024e8')
 
     def install(self, spec, prefix):
-        configure_args = [
-            '--threaded-math',
-            '--speex-root=' + spec['speex'].prefix,
-            '--fst-root=' + spec['openfst'].prefix,
-            '--fst-version=' + str(spec['openfst'].version)
-        ]
+        configure_args = ['--fst-root=' + spec['openfst'].prefix]
+
+        if spec.satisfies('c024e8'):
+            configure_args.append('--speex-root=' + spec['speex'].prefix)
+            configure_args.append('--fst-version=' +
+                                  str(spec['openfst'].version))
 
         if '~shared' in spec:
             configure_args.append('--static')
@@ -70,7 +75,7 @@ def install(self, spec, prefix):
             configure_args.append('--mathlib=OPENBLAS')
             configure_args.append('--openblas-root=' + spec['blas'].prefix)
             if '+openmp' in spec['blas'].variants:
-                configure_args.append('--threaded-math')
+                configure_args.append('--threaded-math=yes')
         elif '^atlas' in spec:
             configure_args.append('--mathlib=ATLAS')
             configure_args.append('--atlas-root=' + spec['blas'].prefix)
diff --git a/var/spack/repos/builtin/packages/openfst/openfst-1.4.1.patch b/var/spack/repos/builtin/packages/openfst/openfst-1.4.1.patch
new file mode 100644
index 0000000000000000000000000000000000000000..1037b242319f17c5f50115c0dae75661c0ff71b9
--- /dev/null
+++ b/var/spack/repos/builtin/packages/openfst/openfst-1.4.1.patch
@@ -0,0 +1,227 @@
+*** openfst-1.4.1/src/include/fst/minimize.h
+***************
+*** 134,140 ****
+    typedef typename A::Weight Weight;
+    typedef ReverseArc<A> RevA;
+  
+!   CyclicMinimizer(const ExpandedFst<A>& fst) {
+      Initialize(fst);
+      Compute(fst);
+    }
+--- 134,147 ----
+    typedef typename A::Weight Weight;
+    typedef ReverseArc<A> RevA;
+  
+!   CyclicMinimizer(const ExpandedFst<A>& fst):
+!       // tell the Partition data-member to expect multiple repeated
+!       // calls to SplitOn with the same element if we are non-deterministic.
+!       P_(fst.Properties(kIDeterministic, true) == 0) {
+!     if(fst.Properties(kIDeterministic, true) == 0)
+!       CHECK(Weight::Properties() & kIdempotent); // this minimization
+!     // algorithm for non-deterministic FSTs can only work with idempotent
+!     // semirings.
+      Initialize(fst);
+      Compute(fst);
+    }
+***************
+*** 315,321 ****
+    typedef typename A::StateId ClassId;
+    typedef typename A::Weight Weight;
+  
+!   AcyclicMinimizer(const ExpandedFst<A>& fst) {
+      Initialize(fst);
+      Refine(fst);
+    }
+--- 322,334 ----
+    typedef typename A::StateId ClassId;
+    typedef typename A::Weight Weight;
+  
+!   AcyclicMinimizer(const ExpandedFst<A>& fst):
+!       // tell the Partition data-member to expect multiple repeated
+!       // calls to SplitOn with the same element if we are non-deterministic.
+!       partition_(fst.Properties(kIDeterministic, true) == 0) {
+!     if(fst.Properties(kIDeterministic, true) == 0)
+!       CHECK(Weight::Properties() & kIdempotent); // minimization for
+!     // non-deterministic FSTs can only work with idempotent semirings.
+      Initialize(fst);
+      Refine(fst);
+    }
+***************
+*** 531,543 ****
+  void Minimize(MutableFst<A>* fst,
+                MutableFst<A>* sfst = 0,
+                float delta = kDelta) {
+!   uint64 props = fst->Properties(kAcceptor | kIDeterministic|
+!                                  kWeighted | kUnweighted, true);
+!   if (!(props & kIDeterministic)) {
+!     FSTERROR() << "FST is not deterministic";
+!     fst->SetProperties(kError, kError);
+!     return;
+!   }
+  
+    if (!(props & kAcceptor)) {  // weighted transducer
+      VectorFst< GallicArc<A, STRING_LEFT> > gfst;
+--- 544,550 ----
+  void Minimize(MutableFst<A>* fst,
+                MutableFst<A>* sfst = 0,
+                float delta = kDelta) {
+!   uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true);
+  
+    if (!(props & kAcceptor)) {  // weighted transducer
+      VectorFst< GallicArc<A, STRING_LEFT> > gfst;
+*** openfst-1.4.1/src/include/fst/partition.h
+***************
+*** 43,50 ****
+    friend class PartitionIterator<T>;
+  
+    struct Element {
+!    Element() : value(0), next(0), prev(0) {}
+!    Element(T v) : value(v), next(0), prev(0) {}
+  
+     T        value;
+     Element* next;
+--- 43,50 ----
+    friend class PartitionIterator<T>;
+  
+    struct Element {
+!     Element() : value(0), next(0), prev(0) {}
+!     Element(T v) : value(v), next(0), prev(0) {}
+  
+     T        value;
+     Element* next;
+***************
+*** 52,60 ****
+    };
+  
+   public:
+!   Partition() {}
+  
+!   Partition(T num_states) {
+      Initialize(num_states);
+    }
+  
+--- 52,62 ----
+    };
+  
+   public:
+!   Partition(bool allow_repeated_split):
+!       allow_repeated_split_(allow_repeated_split) {}
+  
+!   Partition(bool allow_repeated_split, T num_states):
+!       allow_repeated_split_(allow_repeated_split) {
+      Initialize(num_states);
+    }
+  
+***************
+*** 137,152 ****
+      if (class_size_[class_id] == 1) return;
+  
+      // first time class is split
+!     if (split_size_[class_id] == 0)
+        visited_classes_.push_back(class_id);
+! 
+      // increment size of split (set of element at head of chain)
+      split_size_[class_id]++;
+! 
+      // update split point
+!     if (class_split_[class_id] == 0)
+!       class_split_[class_id] = classes_[class_id];
+!     if (class_split_[class_id] == elements_[element_id])
+        class_split_[class_id] = elements_[element_id]->next;
+  
+      // move to head of chain in same class
+--- 139,154 ----
+      if (class_size_[class_id] == 1) return;
+  
+      // first time class is split
+!     if (split_size_[class_id] == 0) { 
+        visited_classes_.push_back(class_id);
+!       class_split_[class_id] = classes_[class_id];
+!     }
+      // increment size of split (set of element at head of chain)
+      split_size_[class_id]++;
+!     
+      // update split point
+!     if (class_split_[class_id] != 0
+!         && class_split_[class_id] == elements_[element_id])
+        class_split_[class_id] = elements_[element_id]->next;
+  
+      // move to head of chain in same class
+***************
+*** 157,180 ****
+    // class indices of the newly created class. Returns the new_class id
+    // or -1 if no new class was created.
+    T SplitRefine(T class_id) {
+      // only split if necessary
+!     if (class_size_[class_id] == split_size_[class_id]) {
+!       class_split_[class_id] = 0;
+        split_size_[class_id] = 0;
+        return -1;
+      } else {
+- 
+        T new_class = AddClass();
+        size_t remainder = class_size_[class_id] - split_size_[class_id];
+        if (remainder < split_size_[class_id]) {  // add smaller
+-         Element* split_el   = class_split_[class_id];
+          classes_[new_class] = split_el;
+-         class_size_[class_id] = split_size_[class_id];
+-         class_size_[new_class] = remainder;
+          split_el->prev->next = 0;
+          split_el->prev = 0;
+        } else {
+-         Element* split_el   = class_split_[class_id];
+          classes_[new_class] = classes_[class_id];
+          class_size_[class_id] = remainder;
+          class_size_[new_class] = split_size_[class_id];
+--- 159,189 ----
+    // class indices of the newly created class. Returns the new_class id
+    // or -1 if no new class was created.
+    T SplitRefine(T class_id) {
++ 
++     Element* split_el = class_split_[class_id];
+      // only split if necessary
+!     //if (class_size_[class_id] == split_size_[class_id]) {
+!     if(split_el == NULL) { // we split on everything...
+        split_size_[class_id] = 0;
+        return -1;
+      } else {
+        T new_class = AddClass();
++ 
++       if(allow_repeated_split_) { // split_size_ is possibly
++         // inaccurate, so work it out exactly.
++         size_t split_count;  Element *e;
++         for(split_count=0,e=classes_[class_id];
++             e != split_el; split_count++, e=e->next);
++         split_size_[class_id] = split_count;
++       }
+        size_t remainder = class_size_[class_id] - split_size_[class_id];
+        if (remainder < split_size_[class_id]) {  // add smaller
+          classes_[new_class] = split_el;
+          split_el->prev->next = 0;
+          split_el->prev = 0;
++         class_size_[class_id] = split_size_[class_id];
++         class_size_[new_class] = remainder;
+        } else {
+          classes_[new_class] = classes_[class_id];
+          class_size_[class_id] = remainder;
+          class_size_[new_class] = split_size_[class_id];
+***************
+*** 245,254 ****
+--- 254,269 ----
+    vector<T> class_size_;
+  
+    // size of split for each class
++   // in the nondeterministic case, split_size_ is actually an upper
++   // bound on the size of split for each class.
+    vector<T> split_size_;
+  
+    // set of visited classes to be used in split refine
+    vector<T> visited_classes_;
++ 
++   // true if input fst was deterministic: we can make
++   // certain assumptions in this case that speed up the algorithm.
++   bool allow_repeated_split_;
+  };
+  
+  
+
diff --git a/var/spack/repos/builtin/packages/openfst/openfst_gcc41up.patch b/var/spack/repos/builtin/packages/openfst/openfst_gcc41up.patch
new file mode 100644
index 0000000000000000000000000000000000000000..0e3eb87e8b904ddf456166a34ccd7c783f0c6c77
--- /dev/null
+++ b/var/spack/repos/builtin/packages/openfst/openfst_gcc41up.patch
@@ -0,0 +1,28 @@
+*** openfst-1.4.1/src/include/fst/lock.h
+***************
+*** 78,85 ****
+    RefCounter() : count_(1) {}
+
+    int count() const { return count_; }
+!   int Incr() const { return ++count_; }
+!   int Decr() const {  return --count_; }
+
+   private:
+    mutable int count_;
+--- 78,93 ----
+    RefCounter() : count_(1) {}
+
+    int count() const { return count_; }
+!
+! // below lines are modifications of openfst for multi-thrads support,
+! // from tools/extras/openfst_gcc41up.patch, applied by tools/Makefile,
+! // applicable to gcc 4.1 or above
+!   // int Incr() const { return ++count_; }
+!   // int Decr() const {  return --count_; }
+!
+!   int Incr() const { return __sync_add_and_fetch(&count_, 1); }
+!   int Decr() const { return __sync_sub_and_fetch(&count_, 1); }
+! // end modifications
+
+   private:
+    mutable int count_;
diff --git a/var/spack/repos/builtin/packages/openfst/package.py b/var/spack/repos/builtin/packages/openfst/package.py
index 9711605462f5e2e142ec411f082ec415f7f40198..7a555f5b4c60bb2ab1cd5e0150eb47a15d8e09a4 100644
--- a/var/spack/repos/builtin/packages/openfst/package.py
+++ b/var/spack/repos/builtin/packages/openfst/package.py
@@ -42,8 +42,15 @@ class Openfst(AutotoolsPackage):
     version('1.5.2',  'e9d43874f7cadf791394caab3925eee4')
     version('1.5.1',  '8869e44c5a4af65409ae78b9f482b40e')
     version('1.5.0',  'a24fee5ffe28744c6fb7b1a49e0006c4')
+    version('1.4.1-patch',  'ca8f1730b9b9b281e515611fa9ae23c0',
+            url='http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.4.1.tar.gz')
     version('1.4.1',  'ca8f1730b9b9b281e515611fa9ae23c0')
     version('1.4.0',  '662367ec91084ffab48ee9b5716de39c')
 
     conflicts('%intel@16:')
     conflicts('%gcc@6:')
+
+    # Patch openfst-1.4.1 for kaldi@c024e8
+    # See https://github.com/kaldi-asr/kaldi/blob/c024e8aa0a727bf76c91a318f76a1f8b0b59249e/tools/Makefile#L82-L88
+    patch('openfst-1.4.1.patch', when='@1.4.1-patch')
+    patch('openfst_gcc41up.patch', when='@1.4.1-patch')