diff --git a/src/Floquet/Floq2Op.cc b/src/Floquet/Floq2Op.cc
index eb0c9fd..5500eb9 100644
--- a/src/Floquet/Floq2Op.cc
+++ b/src/Floquet/Floq2Op.cc
@@ -803,6 +803,18 @@ floq2_op exp(floq2_op &Op1)
   floq2_op EXpOp1(Op1.N1, Op1.N2, Op1.hs, Op1._omega1, Op1._omega2);
    EXpOp1.gen_op::operator= (exp((gen_op&) Op1));  // calculate exp(Op1)
   return EXpOp1;
+  }
+
+
+	// Input		Op1   : Floquet operator
+        // Return		Op    : exponential of Op1 using Pade
+	//				Op = exp(Op1)
+
+floq2_op expm(floq2_op &Op1)
+  {  
+  floq2_op EXpOp1(Op1.N1, Op1.N2, Op1.hs, Op1._omega1, Op1._omega2);
+   EXpOp1.gen_op::operator= (Op1.gen_op::expm());  // calculate exp(Op1)
+  return EXpOp1;
   }
 
 /*
diff --git a/src/Floquet/Floq2Op.h b/src/Floquet/Floq2Op.h
index 5e6e028..3cf5ad3 100644
--- a/src/Floquet/Floq2Op.h
+++ b/src/Floquet/Floq2Op.h
@@ -243,13 +243,19 @@ MSVCDLL        void     operator /= (      double   d);
         // Output          gen_op    : Operator containing trace(s) over    
         //                             Photon space
 
-MSVCDLL friend floq2_op exp (floq2_op &Op1);
+MSVCDLL friend floq2_op exp(floq2_op &Op1);
 	
         // Input		Op1  : Floquet operator
         // Return		Op   : exponential of Op1
 	//			       Op = exp(Op1)
         // Note			     : Computed in EBR of Op1
 
+MSVCDLL friend floq2_op expm(floq2_op &Op1);
+	
+        // Input		Op1  : Floquet operator
+        // Return		Op   : exponential of Op1 using Pade
+	//			       Op = exp(Op1)
+
 /*  friend floq2_op prop (floq2_op &FLOQHAM , double &time);
  
 	// Input		Op1   : Floquet Hamiltonian
diff --git a/src/Floquet/FloqOp.cc b/src/Floquet/FloqOp.cc
index 032ca2a..6972f72 100644
--- a/src/Floquet/FloqOp.cc
+++ b/src/Floquet/FloqOp.cc
@@ -653,6 +653,32 @@ floq_op floq_op::exp()
   return ExpFOp;
   }
 
+        // Input		FOp	: Floquet operator
+        // Return		ExpFOp	: Exponential of FOp using Pade
+	//				   ExpFOp = exp(FOp)
+
+floq_op expm(const floq_op& FOp)
+  {
+  floq_op ExpFOp(FOp.N, FOp.hs, FOp.Om);	// Allocate return operator
+  ExpFOp.gen_op::operator= (FOp.gen_op::expm());	// Set exponential array
+  return ExpFOp;				// Return exponentiated FOp
+  }
+
+
+floq_op floq_op::expm()
+
+        // Input                FlOp    : Floquet operator (*this)
+        // Return               ExpFlOp : Exponential of Op1 using Pade
+        //                                   Op = exp(Op1)
+	// sosi				: Be nice to set this up as in
+	//				  class gen_op.....
+
+  {
+  floq_op ExpFOp(N, hs, Om);	// Allocate return operator
+  ExpFOp=*this;
+  ExpFOp.gen_op::operator= (ExpFOp.gen_op::expm());	// Set exponential array
+  return ExpFOp;
+  }
 
 floq_op prop(floq_op& FH, double time)
 
diff --git a/src/Floquet/FloqOp.h b/src/Floquet/FloqOp.h
index 8979443..9d390e1 100644
--- a/src/Floquet/FloqOp.h
+++ b/src/Floquet/FloqOp.h
@@ -268,6 +268,18 @@ MSVCDLL floq_op exp();
         // Return               ExpFlOp : Exponential of Op1
         //                                   Op = exp(Op1)
 
+MSVCDLL friend floq_op expm(const floq_op& Op1);
+	
+        // Input		Op1  : Floquet operator
+        // Return		Op   : exponential of Op1 using Pade
+	//			       Op = exp(Op1)
+
+MSVCDLL floq_op expm();
+
+        // Input                FlOp    : Floquet operator (*this)
+        // Return               ExpFlOp : Exponential of Op1 using Pade
+        //                                   Op = exp(Op1)
+
 MSVCDLL friend floq_op prop(floq_op& FLOQHAM, double time);
  
 	// Input		Op1   : Floquet Hamiltonian
diff --git a/src/HSLib/GenOp.cc b/src/HSLib/GenOp.cc
index b895df6..36512e9 100644
--- a/src/HSLib/GenOp.cc
+++ b/src/HSLib/GenOp.cc
@@ -1165,13 +1165,14 @@ complex gen_op::proj(const gen_op &Op2, int normf) const
   }
 
 
+	// Input		Op   : General operator (this)
+        // Return		int  : Op Liouville space dimension
+
 int gen_op::dim()    const { return (!WBR)?0:WBR->RepMx.cols(); }
 int gen_op::HS()     const { return (!WBR)?0:WBR->RepMx.cols(); }
 int gen_op::LS()     const { return (!WBR)?0:WBR->RepBs.dim_LS(); }
 int gen_op::dim_LS() const { return (!WBR)?0:WBR->RepBs.dim_LS(); }
 
-	// Input		Op   : General operator (this)
-        // Return		int  : Op Liouville space dimension
 
 
 	// Input		Op1   : General operator (this)
@@ -1228,6 +1229,52 @@ gen_op gen_op::exp(const complex& t, double cutoff) const
   return ExpOp;
   }
  
+	// Input		Op   : General operator (this)
+        // Return		int  : Op Liouville space dimension
+
+
+	// Input		Op1   : General operator (this)
+        // Return		Op    : exponential of Op1
+	//				Op = exp(Op1)
+        // Note			      : Computed in same base as Op1
+        // Note			      : Returns I matrix for Null Op
+
+gen_op gen_op::expm() const
+  {
+  int hs = dim();				// Operator Hilbert space
+  if(!WBR)					// If we have no reps then
+    {						// return an identity matrix
+    if(!hs) GenOpfatality(3, "exp"); 		// or Op exponential error
+    return gen_op(matrix(hs,hs,i_matrix_type)); // if there is no dimension
+    }
+  gen_op ExpOp(*this); 				// Copy Op in diagonal form
+  matrix mx1 = this->get_mx();
+  ExpOp.put_mx(mx1.expm());
+  return ExpOp;
+  }     
+
+
+
+        // Input                Op    : Operator (this)
+        //                      t     : Exponential factor
+        //                      cutoff: Exponential factor roundoff
+        // Return               ExpOp : Exponential of Op
+        //                              ExpOp = exp(t*Op)
+        // Note                       : Exponential output in same base as Op
+        // Note                       : Value of t is considered 0 if
+        //                              it's magnituded is less than cutoff
+
+gen_op gen_op::expm(const complex& t, double cutoff) const
+  {
+  int hs = dim();				// Operator Hilbert space
+  if(!WBR && !hs) GenOpfatality(3,"exp"); 	// Op exponential error
+  if(!WBR || norm(t) < fabs(cutoff))
+    return gen_op(matrix(hs, hs, i_matrix_type));
+  gen_op ExpOp(*this); 
+  matrix mx1 = this->get_mx()*t;
+  ExpOp.put_mx(mx1.expm());
+  return ExpOp;
+  }
  
 
         // Input                Op      : Operator (this)
@@ -1682,9 +1729,14 @@ void gen_op::status(int pf) const
       case i_matrix_type:
       case d_matrix_type:
       case n_matrix_type:
-        if((REP->RepMx).test_hermitian()) std::cout << ", Hermitian"; 
-        else                              std::cout << ", Non-Hermitian"; break;
-      default:                            std::cout << ", Non-Hermitian"; break;
+        if((REP->RepMx).test_hermitian())
+	  std::cout << ", Hermitian"; 
+        else                              
+	  std::cout << ", Non-Hermitian";
+	break;
+      default:                            
+	std::cout << ", Non-Hermitian";
+	break;
       }
 
     std::cout << "\n\t     - basis  = " << (REP->RepBs).refs() << " refs";
@@ -1702,9 +1754,14 @@ void gen_op::status(int pf) const
       case i_matrix_type:
       case d_matrix_type:
       case n_matrix_type:
-        if((REP->RepMx).test_hermitian()) std::cout << ", Hermitian";
-        else                              std::cout << ", Non-Hermitian"; break;
-      default:                            std::cout << ", Non-Hermitian"; break;
+        if((REP->RepMx).test_hermitian())
+	  std::cout << ", Hermitian";
+        else                             
+	  std::cout << ", Non-Hermitian";
+	break;
+      default:                           
+	std::cout << ", Non-Hermitian";
+	break;
       }
     REP++;
     item++;
@@ -1904,8 +1961,7 @@ void gen_op::SetLimits(int limit) const
   std::cout << "\n\tLimiting No. Of ";
   if(OpName.length()) std::cout << OpName << " ";
   std::cout << "Operator Representations";
-  std::cout << "\n\tCurrent Representations: " 
-            << size();
+  std::cout << "\n\tCurrent Representations: " << size();
   std::cout << ", Maximum Allowed: " << limit; 
 # endif
   gen_op* OpTmp = const_cast<gen_op*>(this);	// Cast away const (mutable!)
diff --git a/src/HSLib/GenOp.h b/src/HSLib/GenOp.h
index 7dbed89..22abb17 100644
--- a/src/HSLib/GenOp.h
+++ b/src/HSLib/GenOp.h
@@ -429,6 +429,24 @@ MSVCDLL gen_op exp(const complex& t, double cutoff=1.e-12) const;
         // Note                       : Value of t is considered 0 if
         //                              it's magnituded is less than cutoff
 
+MSVCDLL gen_op expm() const;
+
+	// Input		Op1  : General operator (this)
+        // Return		Op   : exponential of Op1
+	//			       Op = exp(Op1) using pade
+        // Note			     : Computed in the same base as Op1
+
+
+MSVCDLL gen_op expm(const complex& t, double cutoff=1.e-12) const;
+
+        // Input                Op    : Operator (this)
+        //                      t     : Exponential factor
+        //                      cutoff: Exponential factor roundoff
+        // Return               ExpOp : Exponential of Op
+        //                              ExpOp = exp(t*Op) using Pade
+        // Note                       : Exponential output in same base as Op
+        // Note                       : Value of t is considered 0 if
+        //                              it's magnituded is less than cutoff
 
 MSVCDLL gen_op Pow(int power) const;
 
diff --git a/src/LSLib/SuperOp.cc b/src/LSLib/SuperOp.cc
index a19b4b1..badc9f9 100644
--- a/src/LSLib/SuperOp.cc
+++ b/src/LSLib/SuperOp.cc
@@ -1327,6 +1327,124 @@ super_op exp(const super_op& LOp1, const complex& t)
   return LOp;
   }
 
+// ----------------------------------------------------------------------------
+//                 Exponential Superoperators using Pade Method
+// ----------------------------------------------------------------------------
+
+
+	// Input		LOp   : Superoperator (this)
+        // Return		ExpLOp: Exponential of LOp
+	//				ExpLOp = exp(LOp) (Pade method)
+        // Note			      : Exponential output in same base as LOp
+
+super_op super_op::expm() const
+  {
+  if(!HSp)				// Check for NULL LOp
+    {
+    LOperror(5, "exp", 1);		// Error during LOp exp function
+    LOpfatal(7);			// Accessing Null LOp
+    }
+  super_op ExpLOp(*this);		// Copy LOp in its current base
+  matrix mx1 = this->mx;
+  
+  ExpLOp.mx = mx1.expm();
+  
+  return ExpLOp;
+  }
+
+
+super_op super_op::expm(const complex& t, double cutoff) const
+
+        // Input                LOp   : Superoperator (this)
+        //                      t     : Exponential factor
+        //                      cutoff: Exponential factor roundoff
+        // Return               ExpLOp: Exponential of LOp
+        //                              ExpLOp = exp(t*LOp)
+        // Note                       : Exponential output in EBR of LOp
+        // Note                       : L0p's EBR is generated herein
+        // Note                       : Value of t is considered 0 if
+        //                              it's magnituded is less than cutoff
+
+  {
+  if(!HSp)				// Check for NULL LOp
+    {
+    LOperror(5, "exp", 1);		// Error during LOp exp function
+    LOpfatal(7);			// Accessing NULL Lop
+    }
+  super_op ExpLOp(*this);		// Copy LOp in its current base
+  if(norm(t) < fabs(cutoff))
+    {
+    matrix mx(LSp, LSp, i_matrix_type);	// ExpLOp is the identity superop
+    ExpLOp.mx = mx;
+    }
+  else
+    {
+    matrix mx1 = this->mx*t;
+    ExpLOp.mx = mx1.expm();
+    }
+  return ExpLOp;
+  }
+
+  
+
+
+	// Input		LOp1  : Superoperator
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1)
+        // Note			      : Computed in EBR of LOp1
+        // Note			      : Superoperator output in EBR of LOp1
+
+super_op expm(const super_op& LOp1)
+  {
+  super_op LOp = LOp1;
+  if(LOp1.HSp)				// Check for NULL LOp
+    {
+    matrix m1 = LOp.get_mx();
+    LOp.put_mx(m1.expm());
+    return LOp;
+    }
+  else
+    {
+    LOp1.LOperror(5, "exp", 1);	// Error during LOp exp function
+    LOp1.LOpfatal(7);		// Accessing Null LOp
+    }
+  return LOp;
+  }
+
+  
+super_op expm(const super_op& LOp1, const complex& t)
+//return LOp(matrix(LOp1.LSp, LOp1.LSp, d_matrix_type));
+
+	// Input		LOp1  : Superoperator
+	//			t     : A time
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1*t)
+        // Note			      : Computed in EBR of LOp1
+
+  {
+  super_op LOp = LOp1;
+  if(LOp.HSp) 				// Check for NULL LOp
+    {
+    if(t != complex0)
+      {
+      matrix m1 = LOp.get_mx()*t;
+      LOp.put_mx(m1.expm());
+      return LOp;
+      }
+    else
+      {
+      matrix mx(LOp1.LSp, LOp1.LSp, i_matrix_type);	// LOp is the identity superoperator
+      LOp.mx = mx;
+      }
+    }
+  else
+    {
+    LOp1.LOperror(5, "exp", 1);	// Error during LOp exp function
+    LOp1.LOpfatal(7);		// Accessing Null LOp
+    }
+  return LOp;
+  }
+
 
 // ----------------------------------------------------------------------------
 //                      Superoperators Taken To A Power
diff --git a/src/LSLib/SuperOp.h b/src/LSLib/SuperOp.h
index a6bca2f..45ec76c 100644
--- a/src/LSLib/SuperOp.h
+++ b/src/LSLib/SuperOp.h
@@ -398,6 +398,41 @@ MSVCDLL friend super_op exp(const super_op& LOp1, const complex& t);
 	//				LOp = exp(LOp1*t)
         // Note			      : Computed in EBR of LOp1
   
+MSVCDLL super_op expm() const;
+
+        // Input                LOp   : Superoperator (this)
+        // Return               ExpLOp: Exponential of LOp
+        //                              ExpLOp = exp(LOp) (Pade method)
+        // Note                       : Exponential output in same base as LOp
+
+
+MSVCDLL super_op expm(const complex& t, double cutoff=1.e-12) const;
+
+        // Input                LOp   : Superoperator (this)
+	//			t     : Exponential factor
+	//			cutoff: Exponential factor roundoff
+        // Return               ExpLOp: Exponential of LOp
+        //                              ExpLOp = exp(t*LOp) (Pade method)
+        // Note                       : Exponential output in same base as LOp
+	// Note			      : Value of t is considered 0 if
+	//				it's magnituded is less than cutoff
+
+  
+MSVCDLL friend super_op expm(const super_op& LOp1);
+
+	// Input		LOp1  : Superoperator
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1) (Pade method)
+        // Note			      : Computed in same base as LOp1
+
+  
+MSVCDLL friend super_op expm(const super_op& LOp1, const complex& t);
+
+	// Input		LOp1  : Superoperator
+        // Return		LOp   : Exponential of LOp1
+	//				LOp = exp(LOp1*t) (Pade method)
+        // Note			      : Computed in same base as LOp1
+  
 
 MSVCDLL friend super_op pow(const super_op& LOp, int power);
 
diff --git a/src/Matrix/_matrix.cc b/src/Matrix/_matrix.cc
index 9c0629b..2cde493 100644
--- a/src/Matrix/_matrix.cc
+++ b/src/Matrix/_matrix.cc
@@ -376,7 +376,8 @@ _matrix* _matrix::IM()        {_MxFatal(5, "Imaginary");         return this; }
 _matrix* _matrix::conjugate() {_MxFatal(5, "Complex Conjugate"); return this; }
 _matrix* _matrix::transpose() {_MxFatal(5, "Transpose");         return this; }
 _matrix* _matrix::adjoint()   {_MxFatal(5, "Adjoint");           return this; }
-_matrix* _matrix::mxexp()     {_MxFatal(5, "Exponential");       return this; }
+_matrix* _matrix::mxexp()     {_MxFatal(5, "Exponential Diag");  return this; }
+_matrix* _matrix::mxpade()    {_MxFatal(5, "Exponential Pade");  return this; }
 complex  _matrix::trace()     {_MxFatal(5, "Trace");         return complex0; }
 
 _matrix* _matrix::swaprows(int i,int j) {_MxFatal(5,"swaprows"); return this;}
diff --git a/src/Matrix/_matrix.h b/src/Matrix/_matrix.h
index 5ceeb73..04bcbab 100644
--- a/src/Matrix/_matrix.h
+++ b/src/Matrix/_matrix.h
@@ -340,6 +340,7 @@ virtual _matrix* IM();
 virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual _matrix* adjoint();
 virtual complex  trace();
 
diff --git a/src/Matrix/d_matrix.cc b/src/Matrix/d_matrix.cc
index defa3c0..8fb0a95 100644
--- a/src/Matrix/d_matrix.cc
+++ b/src/Matrix/d_matrix.cc
@@ -1260,6 +1260,14 @@ _matrix* d_matrix::mxexp()
   return mx;
   }
 
+_matrix* d_matrix::mxpade()
+  {
+  d_matrix* mx = new d_matrix(cols_, rows_);	// If complex, construct new d_matrix
+  for(int i=0; i<rows_; i++)			// Loop over all non_zero elements
+    mx->data[i] = exp(data[i]);			// <i|mx|i> = exp(<i|dmx*|i>)
+  return mx;
+  }
+
 complex d_matrix::trace()
   {
   complex z(0);
diff --git a/src/Matrix/d_matrix.h b/src/Matrix/d_matrix.h
index 5827e98..6928815 100644
--- a/src/Matrix/d_matrix.h
+++ b/src/Matrix/d_matrix.h
@@ -297,6 +297,7 @@ virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* adjoint();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual complex  trace();
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/h_matrix.cc b/src/Matrix/h_matrix.cc
index aec302f..1b70f33 100644
--- a/src/Matrix/h_matrix.cc
+++ b/src/Matrix/h_matrix.cc
@@ -80,6 +80,16 @@ extern "C"
                      const int*, __CLPK_doublecomplex*, const int*, double*, int*); 
   extern void zheev_(const char*, const char*, const int*, __CLPK_doublecomplex*, const int*,
                      double*, __CLPK_doublecomplex*, const int*, double*, int*);
+  extern void zgetri_(const int*, __CLPK_doublecomplex *, const int*, const int*, __CLPK_doublecomplex *,
+                      const int*, const int*);
+  extern void zhetri_(const char*, const int*, __CLPK_doublecomplex *, const int*, const int*, __CLPK_doublecomplex *,
+                      const int*);
+  extern void zgetrf_(const int*, const int*, __CLPK_doublecomplex *, const int*, int*, int*);
+  extern void zhetrf_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      const int*, int*);
+  extern void zgesv_(const int*, const int*, __CLPK_doublecomplex *, const int*, 
+                      int*, __CLPK_doublecomplex*, const int*, int*);
+
 }
 #endif
 
@@ -1452,6 +1462,348 @@ _matrix* h_matrix::transpose()
 
 _matrix* h_matrix::adjoint() { return this; }
   
+
+/*  Matrix exponential using the Pade method. This is a lot faster than the
+    standard exp() function. Uses the same algorithm as MATLAB expm() and is
+    roughly as fast as the MATLAB implementation. 
+
+    Higham, N. J., The Scaling and Squaring Method for the Matrix Exponential Revisited,
+    SIAM J. Matrix Anal. Appl., 26(4) (2005), pp. 1179-1193.
+    Al-Mohy, A. H. and N. J. Higham, A new scaling and squaring algorithm for the matrix
+    exponential, SIAM J. Matrix Anal. Appl., 31(3) (2009), pp. 970-989.
+
+    This seems to be the fastest method currently.
+
+*/
+
+_matrix* h_matrix::mxpade()
+
+{ 
+#ifdef _USING_BLAS_
+
+// Pade needs a linear equation solver so it will only work if the BLAS libraries 
+// are installed. Otherwise we will just use the standard mxexp() which uses diagonalization.
+
+  double L1, n_squ, temp, maxnorm;
+  int k1;
+
+  if(rows_ != cols_)
+  { HMxerror(14,1);			//    Bad rectangular array use
+    HMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 01 h_matrix.cc\n";
+  L1=0;
+  for(k1=0;k1<size;++k1)
+  { temp = norm(data[k1]);
+    if(temp > L1)
+      L1=temp;
+  }
+  _matrix *U;
+  _matrix *V;
+  _matrix *temp1;
+  _matrix *temp2;
+  _matrix *temp3;
+  n_squ=0;
+  _matrix* ident = new i_matrix(rows_,rows_);
+  _matrix* A2=this->multiply(this);
+//std::cerr << "At Point 02\n";
+  if(L1 < 1.495585217958292e-2)
+  { const double b[]={120.0, 60.0, 12.0, 1.0};
+//  std::cerr << "At Point 02 - 01\n";
+    temp1 = ident->multiply(b[1]);
+    temp2 = A2->multiply(b[3]);
+    temp3=temp1->add(temp2);
+    delete temp1;
+//  delete temp2;							// Do not delete temp12since b[3]=1 and the same buffer is returned
+    U = this->multiply(temp3);
+    delete temp3;
+//  U = this*(b[3]*A2+b[1]*ident);
+    temp1 = ident->multiply(b[0]);
+    temp2 = A2->multiply(b[2]);
+    V = temp1->add(temp2);
+    delete temp1;
+    delete temp2;
+//  V = b[2]*A2+b[0]*ident;
+  }
+  else
+  { _matrix *A4=A2->multiply(A2);
+    if(L1 < 2.539398330063230e-1)
+    { const double b[]={30240., 15120., 3360., 420., 30., 1.};
+//    std::cerr << "At Point 02 - 02\n";
+      temp1 = ident->multiply(b[1]);
+      temp2 = A2->multiply(b[3]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[5]);
+      temp2=temp1->add(temp3);
+//    delete temp1;							// Do not delete temp1 since b[5]=1 and the same buffer is returned
+      delete temp3;
+      U = this->multiply(temp2);
+      delete temp2;
+//    U = this*(b[5]*A4+b[3]*A2+b[1]*ident);
+      temp1 = ident->multiply(b[0]);
+      temp2 = A2->multiply(b[2]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[4]);
+      V=temp1->add(temp3);
+      delete temp1;
+      delete temp3;
+//    V = b[4]*A4+b[2]*A2+b[0]*ident;
+    }
+    else
+    { _matrix* A6=A4->multiply(A2);
+      if(L1 < 9.504178996162932e-1)
+      { const double b[]={17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.};
+//      std::cerr << "At Point 02 - 03\n";
+        temp1 = ident->multiply(b[1]);
+        temp2 = A2->multiply(b[3]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[5]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[7]);
+        temp3=temp1->add(temp2);
+//      delete temp1;							// Do not delete temp1 since b[7]=1 and the same buffer is returned
+	delete temp2;
+        U = this->multiply(temp3);
+	delete temp3;
+//      U = this*(b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+        temp1 = ident->multiply(b[0]);
+        temp2 = A2->multiply(b[2]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[4]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[6]);
+        V=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+//      V = b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+      }
+      else
+      { _matrix* A8=A6->multiply(A2);
+        if(L1 < 2.097847961257068e0)
+        { const double b[]={17643225600., 8821612800., 2075673600., 302702400., 30270240.,
+	                    2162160., 110880., 3960., 90., 1.};
+//        std::cerr << "At Point 02 - 04\n";
+          temp1 = ident->multiply(b[1]);
+          temp2 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[9]);
+          temp2=temp1->add(temp3);
+//        delete temp1;								// Do not delete temp1 since b[9]=1 and the same buffer is returned
+	  delete temp3;
+          U = this->multiply(temp2);
+	  delete temp2;
+//        U = this*(b[9]*A8+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = ident->multiply(b[0]);
+          temp2 = A2->multiply(b[2]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[6]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[8]);
+          V=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+//        V = b[8]*A8+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+	}
+        else
+        { maxnorm = 5.371920351148152e0;
+          n_squ = std::max(0, int(std::ceil(log2(L1/maxnorm))));
+//        std::cerr << "n_squ = " << n_squ << "\n";
+          const double b[]={64764752532480000., 32382376266240000., 7771770303897600.,
+                            1187353796428800., 129060195264000., 10559470521600.,
+                            670442572800., 33522128640., 1323241920., 40840800., 960960.,
+                            16380., 182., 1.};
+//        std::cerr << "At Point 02 - 05\n";
+	  A2=A2->multiply_two(std::pow(2.0,-n_squ*2));
+	  A4=A4->multiply_two(std::pow(2.0,-n_squ*4));
+	  A6=A6->multiply_two(std::pow(2.0,-n_squ*6));
+          temp1 = A2->multiply(b[9]);
+          temp2 = A4->multiply(b[11]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[13]);
+          temp2=temp1->add(temp3);
+//        delete temp1;                                           // Do not delete temp1 since b[13]=1 and the same buffer is returned!
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+          delete temp2;
+	  temp1 = ident->multiply(b[1]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          U = this->multiply(temp3);
+	  delete temp3;
+	  U=U->multiply_two(std::pow(2.0,-n_squ));
+//        U = mx1*(A6*(b[13]*A6+b[11]*A4+b[9]*A2)+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = A2->multiply(b[8]);
+          temp2 = A4->multiply(b[10]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[12]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+	  delete temp2;
+	  temp1 = ident->multiply(b[0]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[2]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[6]);
+          V=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+//        V = A6*(b[12]*A6+b[10]*A4+b[8]*A2)+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+        }
+	delete A8;
+      }
+     delete A6;
+    }
+    delete A4;
+  }
+  delete A2;
+//std::cerr << "At Point 03\n";
+//std::cerr << "This is U \n";
+//U->print(std::cerr,MxPrDefs);
+//std::cerr << "This is V \n";
+//V->print(std::cerr,MxPrDefs);
+  _matrix* P = U->add(V);
+  _matrix* Q = V->subtract(U);
+  n_matrix* P2 = (n_matrix *) P;
+  n_matrix* Q2 = (n_matrix *) Q;
+//std::cerr << "This is P \n";
+//P->print(std::cerr,MxPrDefs);
+//std::cerr << "This is Q \n";
+//Q->print(std::cerr,MxPrDefs);
+#ifdef _USING_SUNPERFLIB_
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+  zgesv(N, NHRS, (doublecomplex *) Q2->data, LDA, ipiv, (doublecomplex *) P2->data, LDB, &info);
+#endif
+#ifdef _USING_LAPACK_
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+#else
+  long int N    = rows_;
+  long int NHRS = rows_;
+  long int LDA  = rows_;
+  long int *ipiv = new long int[rows_];
+  long int LDB   = rows_;
+  long int info  = -5555;
+#endif
+//std::cerr << "At Point 04\n";
+  zgesv_(&N, &NHRS, (__CLPK_doublecomplex *) Q2->data, &LDA, ipiv, (__CLPK_doublecomplex *) P2->data, &LDB, &info);
+#endif
+  if(info == 0)
+  { // everything looks good
+  }
+  else
+  { HMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 05\n";
+  _matrix* R = P2;
+  delete [] ipiv;
+  delete ident;
+//std::cerr << "At Point 06\n";
+  for(k1=0;k1<n_squ;++k1)
+  { temp1 = R->multiply(R);
+    delete R;
+    R=temp1;
+  }
+  delete U;
+  delete V;
+  delete Q2;
+//std::cerr << "At Point 07\n";
+  return(R);
+
+#else          // BLAS NOT available
+
+// Without BLAS, we just use the standard mxexp() function here
+// This will be much slower. But Pade needs a linear equation solver.
+
+  _matrix* dmx;                                 // For eigenvalues
+  _matrix* emx;                                 // For eigenvectors
+  diag(dmx, emx);                               // Diagonalize mx to dmx & emx 
+  complex *d00 = ((d_matrix*)dmx)->data;        // Start of dmx: d00 = <0|dmx|0>
+  complex *dii, *dend = d00 + cols_;            // Indexing on dmx diagonal
+  for(dii=d00; dii<dend; dii++)                 // Effective loop over i
+    (*dii) = exp(*dii);                         // Set <i|dmx|i> to exp(<i|dmx|i>
+//  _matrix* pdt1  = dmx->multiply(emx);                // Perform exp(D)*E
+//  _matrix* emxi  = emx->inv();                        // Get inverse of emx
+//  _matrix* expmx = emxi->multiply(pdt1);      // Perform inv(E)*exp(D)*E
+
+  _matrix* emxi  = emx->inv();                  // Get inverse of emx
+  _matrix* pdt1  = dmx->multiply(emxi);
+  _matrix* expmx = emx->multiply(pdt1);
+  delete dmx;                                   // Clean up the dmx matrix
+  delete emx;                                   // Clean up the emx matrix
+  delete pdt1;                                  // Clean up the pdt1 matrix
+  delete emxi;                                  // Clean up the emxi matrix
+  return expmx;
+#endif
+}
+
+
 _matrix* h_matrix::mxexp()
   {
   _matrix* dmx;                                 // For eigenvalues
@@ -3169,17 +3521,102 @@ void h_matrix::SymDiag(_matrix* (&D), _matrix* (&U)) { tqli(U, D, 1); }
 
 _matrix* h_matrix::inv()
   {
-// sosi - this will use up memory
-  int hd = rows_;			// Matrix dimension
-  i_matrix* I = new i_matrix(hd,hd);	// Construct an Identity matrix
-  int *indx;
-  indx = new int[hd];
-//  int indx[hd];				// Array for any row permutations
-  n_matrix* hLU = (n_matrix*)LU(indx);	// Perform LU decomposition of hmx
-  _matrix* hinvmx = I->LUinv(indx, hLU);
-  delete [] indx;
+#if defined(_USING_SUNPERFLIB_) || defined(_USING_LAPACK_)
+    int nrows = this->rows();
+    int ncols = this->cols();
+    _matrix* hinvmx;
+    if(nrows < 32)  // then do it the old fashioned gamma way.
+    {
+      int hd = rows_;			// Matrix dimension
+      i_matrix* I = new i_matrix(hd,hd);	// Construct an Identity matrix
+      int *indx;
+      indx = new int[hd];
+      n_matrix* hLU = (n_matrix*)LU(indx);	// Perform LU decomposition of hmx
+      hinvmx = I->LUinv(indx, hLU);
+      delete [] indx;
+    }
+    else
+    {
+        hinvmx = new n_matrix(nrows, ncols);
+        this->convert(hinvmx);
+        n_matrix* hinvmx1 = (n_matrix *)hinvmx->transpose();
+#ifdef _USING_SUNPERFLIB_
+        char uplo = 'U';  // Upper triangular...
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int info = -55555;
+        zhetrf(uplo, N, (doublecomplex *)hinvmx1->data, lda, ipiv , &info);
+        zhetri(uplo, N, (doublecomplex *)hinvmx1->data, lda, ipiv , &info);
+#endif
+#ifdef _USING_LAPACK_
+        char uplo = 'U';  // Upper triangular...
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int lwork = N*N;
+        int info = -55555;
+#else
+        long int  N    = nrows;
+        long int  lda  = nrows;
+        long int *ipiv = new long int[N];
+        long int lwork = N*N;
+        long int info = -55555;
+#endif
+        complex *work= new complex [2*lwork];
+        zhetrf_(&uplo, &N, (__CLPK_doublecomplex *) hinvmx1->data, &lda, ipiv, (__CLPK_doublecomplex *) work, &lwork, &info); 
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nh_matrix: zhetrf failed to converge\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+        zhetri_(&uplo, &N, (__CLPK_doublecomplex *) hinvmx1->data, &lda, ipiv, (__CLPK_doublecomplex *) work, &info); 
+#endif
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nh_matrix: Inversion failed to converge\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+
+        // Copy results back into D and U.
+        complex *hmxp = hinvmx1->data;
+        for(int i=0; i<nrows; i++)
+        { for(int j=0; j<ncols; j++)
+          { hinvmx->put(hmxp[j*nrows+i], i, j);
+          }
+        }
+        delete hinvmx1;
+        delete [] ipiv;
+#ifdef _USING_LAPACK_
+        delete [] work;
+#endif
+//      std::cerr << "h_matrix diag: using sunperflib code\n";
+    } 
+#else
+    int hd = rows_;			// Matrix dimension
+    i_matrix* I = new i_matrix(hd,hd);	// Construct an Identity matrix
+    int *indx;
+    indx = new int[hd];
+    n_matrix* hLU = (n_matrix*)LU(indx);	// Perform LU decomposition of hmx
+    _matrix* hinvmx = I->LUinv(indx, hLU);
+    delete [] indx;
+#endif
   return hinvmx; 
-//  return I->LUinv(indx, hLU);		// Return the inverse via LU*hinv=I;
   }
 
 _matrix* h_matrix::LU(int *indx)
diff --git a/src/Matrix/h_matrix.h b/src/Matrix/h_matrix.h
index b37aacf..365500e 100644
--- a/src/Matrix/h_matrix.h
+++ b/src/Matrix/h_matrix.h
@@ -290,6 +290,7 @@ virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* adjoint();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual complex  trace();
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/i_matrix.cc b/src/Matrix/i_matrix.cc
index a756bb3..d0a5bab 100644
--- a/src/Matrix/i_matrix.cc
+++ b/src/Matrix/i_matrix.cc
@@ -285,7 +285,9 @@ hermitian_type i_matrix::test_hermitian(double d) const
 
 matrix_type i_matrix::stored_type( ) const { return i_matrix_type; }
 matrix_type i_matrix::test_type(const matrix_type m, const double d) const	
-                                           { return m; double dtmp; dtmp=d; }
+                                           { return m;}
+// 10/11/2024 changed by MAER to eliminate compiler complaints
+//                                           { return m; double dtmp; dtmp=d; }
 std::string i_matrix::mxtype()        const { return std::string("Identity"); }
 std::string i_matrix::mxtype(bool pf) const { return std::string("Identity"); }
 
@@ -500,7 +502,7 @@ _matrix* i_matrix::divide(_matrix* mx)
     IMxfatal(3, "divide");			//   Fail in divide function
     }
   else
-    switch(mx->stored_type())
+  { switch(mx->stored_type())
       {
       case i_matrix_type: return this; break;   // Divide imx into imx
       case d_matrix_type:                       // Divide imx by dmx
@@ -513,6 +515,7 @@ _matrix* i_matrix::divide(_matrix* mx)
         return mx->inv();			// Return inv(mx)
         break;
       }   
+  }
     IMxerror(6, " mx1 / mx2", 1);		//   Matrix divide problems
     IMxerror(3, " divide", 1);			//   Matrix divide problems
     IMxfatal(25);				//   Not fully implemented
@@ -745,6 +748,7 @@ _matrix* i_matrix::adjoint()   { return this; }		    // imx self adjoint
 _matrix* i_matrix::negate()    { return new d_matrix(rows_,cols_,-complex1); } 
 _matrix* i_matrix::IM()        { return new d_matrix(rows_,cols_, complex0); }
 _matrix* i_matrix::mxexp()     { return new d_matrix(rows_,cols_, exp(1.0)); }
+_matrix* i_matrix::mxpade()    { return new d_matrix(rows_,cols_, exp(1.0)); }
 complex  i_matrix::trace()     { return complex(rows_); }
 
 /*     Function    Output                       Description
@@ -1030,9 +1034,11 @@ void i_matrix::print(std::ostream& ostr, const MxPrint& PFlgs) const
   for(i=0; i<rows_; i++ )		// Loop rows
     {
     ostr << sp;
-    for(j=0; j<i; j++)       ostr << "0 ";
-                             ostr << "1 ";
-    for(j=i+1; j<cols_; j++) ostr << "0 ";
+    for(j=0; j<i; j++)  
+      ostr << "0 ";
+    ostr << "1 ";
+    for(j=i+1; j<cols_; j++)
+      ostr << "0 ";
     ostr << "\n";
     }
   }
diff --git a/src/Matrix/i_matrix.h b/src/Matrix/i_matrix.h
index f7a71c0..1dde843 100644
--- a/src/Matrix/i_matrix.h
+++ b/src/Matrix/i_matrix.h
@@ -296,6 +296,7 @@ virtual        _matrix* conjugate();		// Return is this
 virtual inline _matrix* transpose();		// Return is this
 virtual inline _matrix* adjoint();		// Return is this
 virtual inline _matrix* mxexp();		// Return is this
+virtual inline _matrix* mxpade();		// Return is this
 virtual inline complex  trace();		// Return is size
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/matrix.cc b/src/Matrix/matrix.cc
index f1df37b..26ec2df 100644
--- a/src/Matrix/matrix.cc
+++ b/src/Matrix/matrix.cc
@@ -891,7 +891,8 @@ matrix& matrix::operator /= (double d)	// Implements this /= d
           conj	           mx1 = mx* 	  conjugate <i|mx1|j>=<i|mx*|j>
         transpose 	   mx1 = mxt	  transpose <i|mx1|j>=<j|mx|i>
         adjoint	           mx1 = mxt*	  adjoint   <i|mx1|j>=<j|mx*|i>
-	 exp               mx1 = exp(mx)  exponentiation 
+	 exp               mx1 = exp(mx)  exponentiation by diagonalization
+	 expm              mx1 = expm(mx) exponentiation by Pade approximation
 	 trace		     z = Tr{mx}	  trace     z = sum <i|mx|i>         */
 
 //matrix operator- (const matrix& mx) { return matrix(mx.m->negate());    }
@@ -909,6 +910,7 @@ matrix  matrix::conj()       const  { return matrix(m->conjugate()); }
 matrix  matrix::transpose()  const  { return matrix(m->transpose()); }
 matrix  matrix::adjoint()    const  { return matrix(m->adjoint());   }
 matrix  matrix::exp()        const  { return matrix(m->mxexp());     }
+matrix  matrix::expm()       const  { return matrix(m->mxpade());     }
 complex matrix::trace()      const  { return        m->trace();      }
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/matrix.h b/src/Matrix/matrix.h
index 6e7a894..f1ccfe4 100644
--- a/src/Matrix/matrix.h
+++ b/src/Matrix/matrix.h
@@ -366,7 +366,8 @@ MSVCDLL matrix& operator /= (double d);
           conj                mx1 = mx*         conjugate   <i|mx1|j>=<i|mx*|j>
         transpose             mx1 = mxt         transpose   <i|mx1|j>=<j|mx|i>
         adjoint               mx1 = mxt*        adjoint     <i|mx1|j>=<j|mx*|i>
-          exp                 mx1 = exp(mx)     exponential <i|mx1|j>=<j|mx*|i>
+          exp                 mx1 = exp(mx)     exponential by diagonalization
+          expm                mx1 = expm(mx)    exponential by Pade approximation
          trace                  z = Tr{mx}      trace       z = sum <i|mx|i> */
 
 MSVCDLL friend matrix  Re(const matrix& mx);
@@ -385,6 +386,7 @@ MSVCDLL matrix  conj()       const;
 MSVCDLL matrix  transpose()  const; 
 MSVCDLL matrix  adjoint()    const;
 MSVCDLL matrix  exp()        const;
+MSVCDLL matrix  expm()       const;
 MSVCDLL complex trace()      const;    
 
 /*     Function    Output                       Description
diff --git a/src/Matrix/n_matrix.cc b/src/Matrix/n_matrix.cc
index 7728a38..b978aee 100644
--- a/src/Matrix/n_matrix.cc
+++ b/src/Matrix/n_matrix.cc
@@ -77,6 +77,15 @@ extern "C"
                      const int*, __CLPK_doublecomplex*, const int*, double*, int*);
   extern void zheev_(const char*, const char*, const int*, __CLPK_doublecomplex*, const int*,
                      double*, __CLPK_doublecomplex*, const int*, double*, int*);
+  extern void zgetri_(const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      const int*, int*);
+  extern void zhetri_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      int*);
+  extern void zgetrf_(const int*, const int*, __CLPK_doublecomplex *, const int*, int*, int*);
+  extern void zhetrf_(const char*, const int*, __CLPK_doublecomplex *, const int*, int*, __CLPK_doublecomplex *,
+                      int*);
+  extern void zgesv_(const int*, const int*, __CLPK_doublecomplex *, const int*, 
+                       int*, __CLPK_doublecomplex*, const int*, int*);
 }
 #endif
 
@@ -1361,7 +1370,350 @@ _matrix* n_matrix::adjoint()
   return amx;
   }
 
-/*  This curretly uses an eigensystem method for generating the exponential.
+
+/*  Matrix exponential using the Pade method. This is a lot faster than the
+    standard exp() function. Uses the same algorithm as MATLAB expm() and is
+    roughly as fast as the MATLAB implementation. 
+
+    Higham, N. J., The Scaling and Squaring Method for the Matrix Exponential Revisited,
+    SIAM J. Matrix Anal. Appl., 26(4) (2005), pp. 1179-1193.
+    Al-Mohy, A. H. and N. J. Higham, A new scaling and squaring algorithm for the matrix
+    exponential, SIAM J. Matrix Anal. Appl., 31(3) (2009), pp. 970-989.
+
+    This seems to be the fastest method currently.
+
+*/
+
+
+_matrix* n_matrix::mxpade()
+
+{ 
+#ifdef _USING_BLAS_
+
+// Pade needs a linear equation solver so it will only work if the BLAS libraries 
+// are installed. Otherwise we will just use the standard mxexp() which uses diagonalization.
+
+  double L1, n_squ, temp, maxnorm;
+  int k1;
+
+  if(rows_ != cols_)
+  { NMxerror(14,1);			//    Bad rectangular array use
+    NMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 01 n_matrix.cc\n";
+  L1=0;
+  for(k1=0;k1<size;++k1)
+  { temp = norm(data[k1]);
+    if(temp > L1)
+      L1=temp;
+  }
+  _matrix *U;
+  _matrix *V;
+  _matrix *temp1;
+  _matrix *temp2;
+  _matrix *temp3;
+  n_squ=0;
+  _matrix* ident = new i_matrix(rows_,rows_);
+  _matrix* A2=this->multiply(this);
+//std::cerr << "At Point 02\n";
+  if(L1 < 1.495585217958292e-2)
+  { const double b[]={120.0, 60.0, 12.0, 1.0};
+//  std::cerr << "At Point 02 - 01\n";
+    temp1 = ident->multiply(b[1]);
+    temp2 = A2->multiply(b[3]);
+    temp3=temp1->add(temp2);
+    delete temp1;
+//  delete temp2;							// Do not delete temp12since b[3]=1 and the same buffer is returned
+    U = this->multiply(temp3);
+    delete temp3;
+//  U = this*(b[3]*A2+b[1]*ident);
+    temp1 = ident->multiply(b[0]);
+    temp2 = A2->multiply(b[2]);
+    V = temp1->add(temp2);
+    delete temp1;
+    delete temp2;
+//  V = b[2]*A2+b[0]*ident;
+  }
+  else
+  { _matrix *A4=A2->multiply(A2);
+    if(L1 < 2.539398330063230e-1)
+    { const double b[]={30240., 15120., 3360., 420., 30., 1.};
+//    std::cerr << "At Point 02 - 02\n";
+      temp1 = ident->multiply(b[1]);
+      temp2 = A2->multiply(b[3]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[5]);
+      temp2=temp1->add(temp3);
+//    delete temp1;							// Do not delete temp1 since b[5]=1 and the same buffer is returned
+      delete temp3;
+      U = this->multiply(temp2);
+      delete temp2;
+//    U = this*(b[5]*A4+b[3]*A2+b[1]*ident);
+      temp1 = ident->multiply(b[0]);
+      temp2 = A2->multiply(b[2]);
+      temp3=temp1->add(temp2);
+      delete temp1;
+      delete temp2;
+      temp1 = A4->multiply(b[4]);
+      V=temp1->add(temp3);
+      delete temp1;
+      delete temp3;
+//    V = b[4]*A4+b[2]*A2+b[0]*ident;
+    }
+    else
+    { _matrix* A6=A4->multiply(A2);
+      if(L1 < 9.504178996162932e-1)
+      { const double b[]={17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.};
+//      std::cerr << "At Point 02 - 03\n";
+        temp1 = ident->multiply(b[1]);
+        temp2 = A2->multiply(b[3]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[5]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[7]);
+        temp3=temp1->add(temp2);
+//      delete temp1;							// Do not delete temp1 since b[7]=1 and the same buffer is returned
+	delete temp2;
+        U = this->multiply(temp3);
+	delete temp3;
+//      U = this*(b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+        temp1 = ident->multiply(b[0]);
+        temp2 = A2->multiply(b[2]);
+        temp3=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+        temp1 = A4->multiply(b[4]);
+        temp2=temp1->add(temp3);
+	delete temp1;
+	delete temp3;
+        temp1 = A6->multiply(b[6]);
+        V=temp1->add(temp2);
+	delete temp1;
+	delete temp2;
+//      V = b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+      }
+      else
+      { _matrix* A8=A6->multiply(A2);
+        if(L1 < 2.097847961257068e0)
+        { const double b[]={17643225600., 8821612800., 2075673600., 302702400., 30270240.,
+	                    2162160., 110880., 3960., 90., 1.};
+//        std::cerr << "At Point 02 - 04\n";
+          temp1 = ident->multiply(b[1]);
+          temp2 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[9]);
+          temp2=temp1->add(temp3);
+//        delete temp1;								// Do not delete temp1 since b[9]=1 and the same buffer is returned
+	  delete temp3;
+          U = this->multiply(temp2);
+	  delete temp2;
+//        U = this*(b[9]*A8+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = ident->multiply(b[0]);
+          temp2 = A2->multiply(b[2]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp1 = A6->multiply(b[6]);
+          temp3 = temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          temp1 = A8->multiply(b[8]);
+          V=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+//        V = b[8]*A8+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+	}
+        else
+        { maxnorm = 5.371920351148152e0;
+          n_squ = std::max(0, int(std::ceil(std::log(L1/maxnorm))));
+//        std::cerr << "n_squ = " << n_squ << "\n";
+          const double b[]={64764752532480000., 32382376266240000., 7771770303897600.,
+                            1187353796428800., 129060195264000., 10559470521600.,
+                            670442572800., 33522128640., 1323241920., 40840800., 960960.,
+                            16380., 182., 1.};
+//        std::cerr << "At Point 02 - 05\n";
+	  A2=A2->multiply_two(std::pow(2.0,-n_squ*2));
+	  A4=A4->multiply_two(std::pow(2.0,-n_squ*4));
+	  A6=A6->multiply_two(std::pow(2.0,-n_squ*6));
+          temp1 = A2->multiply(b[9]);
+          temp2 = A4->multiply(b[11]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[13]);
+          temp2=temp1->add(temp3);
+//        delete temp1;                                           // Do not delete temp1 since b[13]=1 and the same buffer is returned!
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+          delete temp2;
+	  temp1 = ident->multiply(b[1]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[3]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[5]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[7]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+          U = this->multiply(temp3);
+	  delete temp3;
+	  U=U->multiply_two(std::pow(2.0,-n_squ));
+//        U = mx1*(A6*(b[13]*A6+b[11]*A4+b[9]*A2)+b[7]*A6+b[5]*A4+b[3]*A2+b[1]*ident);
+          temp1 = A2->multiply(b[8]);
+          temp2 = A4->multiply(b[10]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A6->multiply(b[12]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+          temp3 = A6->multiply(temp2);
+	  delete temp2;
+	  temp1 = ident->multiply(b[0]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A2->multiply(b[2]);
+          temp3=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+	  temp1 = A4->multiply(b[4]);
+          temp2=temp1->add(temp3);
+	  delete temp1;
+	  delete temp3;
+	  temp1 = A6->multiply(b[6]);
+          V=temp1->add(temp2);
+	  delete temp1;
+	  delete temp2;
+//        V = A6*(b[12]*A6+b[10]*A4+b[8]*A2)+b[6]*A6+b[4]*A4+b[2]*A2+b[0]*ident;
+        }
+	delete A8;
+      }
+     delete A6;
+    }
+    delete A4;
+  }
+  delete A2;
+//std::cerr << "At Point 03\n";
+//std::cerr << "This is U \n";
+//U->print(std::cerr,MxPrDefs);
+//std::cerr << "This is V \n";
+//V->print(std::cerr,MxPrDefs);
+  _matrix* P = U->add(V);
+  _matrix* Q = V->subtract(U);
+  n_matrix* P2 = (n_matrix *) P;
+  n_matrix* Q2 = (n_matrix *) Q;
+//std::cerr << "This is P \n";
+//P->print(std::cerr,MxPrDefs);
+//std::cerr << "This is Q \n";
+//Q->print(std::cerr,MxPrDefs);
+#ifdef _USING_SUNPERFLIB_
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+  zgesv(N, NHRS, (doublecomplex *) Q2->data, LDA, ipiv, (doublecomplex *) P2->data, LDB, &info);
+#endif
+#ifdef _USING_LAPACK_
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+  int N    = rows_;
+  int NHRS = rows_;
+  int LDA  = rows_;
+  int *ipiv = new int[rows_];
+  int LDB   = rows_;
+  int info  = -5555;
+#else
+  long int N    = rows_;
+  long int NHRS = rows_;
+  long int LDA  = rows_;
+  long int *ipiv = new long int[rows_];
+  long int LDB   = rows_;
+  long int info  = -5555;
+#endif
+//std::cerr << "At Point 04\n";
+  zgesv_(&N, &NHRS, (__CLPK_doublecomplex *) Q2->data, &LDA, ipiv, (__CLPK_doublecomplex *) P2->data, &LDB, &info);
+#endif
+  if(info == 0)
+  { // everything looks good
+  }
+  else
+  { NMxfatal(28);			//    Unable to diagonalize
+  }
+//std::cerr << "At Point 05\n";
+  _matrix* R = P2;
+  delete [] ipiv;
+  delete ident;
+//std::cerr << "At Point 06\n";
+  for(k1=0;k1<n_squ;++k1)
+  { temp1 = R->multiply(R);
+    delete R;
+    R=temp1;
+  }
+  delete U;
+  delete V;
+  delete Q2;
+//std::cerr << "At Point 07\n";
+  return(R);
+
+#else          // BLAS NOT available
+
+// Without BLAS, we just use the standard mxexp() function here
+// This will be much slower. But Pade needs a linear equation solver.
+  
+  _matrix* dmx;					// For eigenvalues
+  _matrix* emx;					// For eigenvectors
+  diag(dmx, emx);				// Diagonalize mx to dmx & emx 
+  complex *d00 = ((d_matrix*)dmx)->data;	// Start of dmx: d00 = <0|dmx|0>
+  complex *dii, *dend = d00 + cols_;		// Indexing on dmx diagonal
+  for(dii=d00; dii<dend; dii++)			// Effective loop over i
+    (*dii) = exp(*dii);				// Set <i|dmx|i> to exp(<i|dmx|i>
+//  _matrix* pdt1  = dmx->multiply(emx);		// Perform exp(D)*E
+//  _matrix* emxi  = emx->inv();			// Get inverse of emx
+//  _matrix* expmx = emxi->multiply(pdt1);	// Perform inv(E)*exp(D)*E
+
+  _matrix* emxi  = emx->inv();			// Get inverse of emx
+  _matrix* pdt1  = dmx->multiply(emxi);
+  _matrix* expmx = emx->multiply(pdt1);
+  delete dmx;                                   // Clean up the dmx matrix
+  delete emx;                                   // Clean up the emx matrix
+  delete pdt1;					// Clean up the pdt1 matrix
+  delete emxi;					// Clean up the emxi matrix
+  return expmx;
+#endif
+}
+
+
+/*  This currently uses an eigensystem method for generating the exponential.
 
                    exp(A) = exp[E*D*inv(E)] = E*exp(D)*inv(E)
 
@@ -2304,14 +2656,14 @@ std::vector<std::string> n_matrix::printStrings(const MxPrint& PFlgs) const
     if(is_real())           ptype = 1;          // Real elements output
     else if(is_imaginary()) ptype = 2;          // Imag elements output
     }
-  int    elen;                                  // A single element length
-  switch(ptype)                                 // Get the element length
-    {                                           // from class complex. This 
-    default:                                    // depends on the output
-    case 0: elen = complex::zlength(); break;   // format
-    case 1:
-    case 2: elen = complex::dlength(); break;
-    }
+//int    elen;                                  // A single element length
+//switch(ptype)                                 // Get the element length
+//  {                                           // from class complex. This 
+//  default:                                    // depends on the output
+//  case 0: elen = complex::zlength(); break;   // format
+//  case 1:
+//  case 2: elen = complex::dlength(); break;
+//  }
   std::string pline;
   std::string efmt = complex::dformat();        // Real/Imag element format
   int i,j,pos;
@@ -2690,22 +3042,112 @@ _matrix* n_matrix::inv()
 
   {
   if(unitary) return adjoint();			// This is real new (sosi)
-  int nd = rows_;				// Matrix dimension
-  if(nd != cols_)
-    {
+  int nrows = rows_;				// Matrix dimension
+  if(nrows != cols_)
+  {
     NMxerror(14,1);				//    Bad rectangular array use
     NMxfatal(25);				//    Cannot take inverse
-    }
+  }
+  #if defined(_USING_SUNPERFLIB_) || defined(_USING_LAPACK_)
+  _matrix *mxinv;
+  if(nrows < 16)  // then do it the old fashioned gamma way.
+  { int *indx;
+    indx = new int[nrows];
+    n_matrix* nLU = new n_matrix(*this);          // Copy input array for LU decomp
+    nLU->LU_decomp(indx);                         // LU decomposition of nLU
+    n_matrix* I = new n_matrix(nrows,nrows,complex0);   // Constuct a new n_matrix
+    for(int i=0; i<nrows; i++) I->put(complex1,i,i); // Make it the identity matrix
+    mxinv = I->LUinv(indx,nLU);          // Back solve for inverse
+    delete nLU;                                   // Clean up the nLU matrix
+    delete I;                                     // Clean up the I matrix
+    delete [] indx;
+  }
+  else
+  {
+        mxinv = new n_matrix(nrows, nrows, complex0);
+        n_matrix* mxinv1 = (n_matrix *)this->transpose();
+
+#ifdef _USING_SUNPERFLIB_
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int info = -55555;
+        zgetrf(N, N, (doublecomplex *) mxinv1->data, lda, ipiv, &info);
+        zgetri(N, (doublecomplex *)mxinv1->data, lda, ipiv , &info);
+#endif
+#ifdef _USING_LAPACK_
+#if defined(__LP64__) /* this is needed on MacOS CLAPACK to make types match*/
+        int  N    = nrows;
+        int  lda  = nrows;
+        int *ipiv = new int[N];
+        int info = -55555;
+        int lwork = N*N;
+#else
+        long int  N    = nrows;
+        long int  lda  = nrows;
+        long int *ipiv = new long int[N];
+        long int info = -55555;
+        long int lwork = N*N;
+#endif
+        complex *work= new complex [2*lwork];
+        zgetrf_(&N, &N, (__CLPK_doublecomplex *) mxinv1->data, &lda, ipiv, &info);
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nn_matrix: zgetrf failed to converge\n";
+          std::cerr << "info = " << info << "\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+        zgetri_(&N, (__CLPK_doublecomplex *) mxinv1->data, &lda, ipiv, (__CLPK_doublecomplex *) work, &lwork, &info);
+#endif
+        if(info == 0)
+        { // all is well.
+        }
+        else if (info > 0)
+        { std::cerr << "\nn_matrix: Inversion failed to converge\n";
+          std::cerr << "info = " << info << "\n";
+        }
+        else if(info == -55555)
+        { std::cerr << "\nReturn value, 'info', does not appear to have been set\n";
+        }
+        else
+        { std::cerr << "\ninfo = " << info << "\n";
+        }
+   
+        // Copy results back into D and U.
+        complex *hmxp = mxinv1->data;
+        for(int i=0; i<nrows; i++)
+        { for(int j=0; j<nrows; j++)
+          { mxinv->put(hmxp[j*nrows+i], i, j);
+          }
+        }
+        delete mxinv1;
+        delete [] ipiv;
+#ifdef _USING_LAPACK_
+        delete [] work;
+#endif
+//      std::cerr << "n_matrix inv: using sunperflib code\n";
+
+
+  }
+  #else
   int *indx;
-  indx = new int[nd];
+  indx = new int[nrows];
   n_matrix* nLU = new n_matrix(*this);          // Copy input array for LU decomp
   nLU->LU_decomp(indx);                         // LU decomposition of nLU
-  n_matrix* I = new n_matrix(nd,nd,complex0);   // Constuct a new n_matrix
-  for(int i=0; i<nd; i++) I->put(complex1,i,i); // Make it the identity matrix
+  n_matrix* I = new n_matrix(nrows,nrows,complex0);   // Constuct a new n_matrix
+  for(int i=0; i<nrows; i++) I->put(complex1,i,i); // Make it the identity matrix
   _matrix* mxinv = I->LUinv(indx,nLU);          // Back solve for inverse
   delete nLU;                                   // Clean up the nLU matrix
   delete I;                                     // Clean up the I matrix
   delete [] indx;
+#endif
   return mxinv;                                 // Return the inverse of nmx
   }
 
@@ -4027,7 +4469,7 @@ void n_matrix::diag(_matrix* (&mxd), _matrix* (&mxev))
   else						// Matrix isnt Hermitian
     {
 #if defined(_USING_SUNPERFLIB_) || defined(_USING_LAPACK_)
-    if(nrows < 128)  // then do it the old fashioned gamma way.
+    if(nrows < 32)  // then do it the old fashioned gamma way.
     {
       n_matrix mx2(*this);			// Workspace to diagonalization
       complex *tmp = mx2.corth(0,nrows-1);		// To upper Hessenberg form
@@ -4038,7 +4480,7 @@ void n_matrix::diag(_matrix* (&mxd), _matrix* (&mxev))
         NMxfatal(28);				// Unable to diagonalize
       }						// Almost surely bad input
       delete [] tmp;				// Delete tmp array from corth
-//    std::cerr << "n_matrix diag: using gamma code\n";
+//    std::cerr << "n_matrix diag: using gamma code: dimension = " << nrows << "\n";
     }
     else
     {
diff --git a/src/Matrix/n_matrix.h b/src/Matrix/n_matrix.h
index 4fe44b4..1955f95 100644
--- a/src/Matrix/n_matrix.h
+++ b/src/Matrix/n_matrix.h
@@ -366,6 +366,7 @@ virtual _matrix* conjugate();
 virtual _matrix* transpose();
 virtual _matrix* adjoint();
 virtual _matrix* mxexp();
+virtual _matrix* mxpade();
 virtual complex  trace();
 
 /*     Function    Output                       Description
diff --git a/src/MultiSys/ExProcess.cc b/src/MultiSys/ExProcess.cc
index 332a0c2..c74a11c 100644
--- a/src/MultiSys/ExProcess.cc
+++ b/src/MultiSys/ExProcess.cc
@@ -213,7 +213,7 @@ bool ExchProc::parseExch(string& Exval,
   int len = Exval.length();			// Length of input string
   int nc  = Exval.find("<=>");			// Number of chars before <=>
   string SLHS = Exval.substr(1, nc-1);		// Get string between ( & <=>
-  string SRHS = Exval.substr(nc+3, len-6);	// Get string between <=> & )
+  string SRHS = Exval.substr(nc+3, len-(5+SLHS.length()));	// Get string between <=> & )
   if(!SLHS.length())				// If there isn't anything on
     { 						// the left of <=>, then we don't
     XPerror(33, 1);				// know left hand side components