diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d9be4e5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,67 @@
+*_wrapper.cxx
+*.a
+*.class
+*.d
+*.dep
+*.o
+*.orig
+*.py[co]
+*.so
+*.swp
+
+/SParseval
+/tmp
+
+evalb/evalb
+
+build*
+dist*
+
+MANIFEST
+regression-test-*
+tags
+TAGS
+
+python/bllipparser/CharniakParser.py
+python/bllipparser/JohnsonReranker.py
+
+first-stage/PARSE/evalTree
+first-stage/PARSE/parseAndEval
+first-stage/PARSE/parseIt
+first-stage/PARSE/parser_wrapper.C
+first-stage/PARSE/swig/*/build/*
+first-stage/PARSE/swig/*/lib/*
+first-stage/TRAIN/iScale
+first-stage/TRAIN/kn3Counts
+first-stage/TRAIN/pSfgT
+first-stage/TRAIN/pSgT
+first-stage/TRAIN/pTgNt
+first-stage/TRAIN/pUgT
+first-stage/TRAIN/rCounts
+first-stage/TRAIN/selFeats
+first-stage/TRAIN/trainRs
+
+/second-stage/nbest
+
+second-stage/programs/*/read-tree.cc
+second-stage/programs/eval-beam/main
+second-stage/programs/eval-weights/eval-weights
+second-stage/programs/features/best-*parses
+second-stage/programs/features/count-*features
+second-stage/programs/features/extract-*features
+second-stage/programs/features/oracle-score
+second-stage/programs/features/parallel-extract-nfeatures
+second-stage/programs/features/parallel-extract-spfeatures
+second-stage/programs/features/reranker_wrapper.C
+second-stage/programs/features/swig/*/build/*
+second-stage/programs/features/swig/*/lib/*
+second-stage/programs/prepare-data/copy-trees-ss
+second-stage/programs/prepare-data/prepare-ec-data
+second-stage/programs/prepare-data/prepare-ec-data100
+second-stage/programs/prepare-data/prepare-new-data
+second-stage/programs/prepare-data/ptb
+second-stage/programs/wlle/avper
+second-stage/programs/wlle/cvlm
+second-stage/programs/wlle/cvlm-lbfgs
+second-stage/programs/wlle/gavper
+second-stage/programs/wlle/oracle
diff --git a/Makefile b/Makefile
index 4c0c390..0e14311 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@
 #
 # The following high-level goals may also be useful:
 #
-# make nbestrain-clean # removes temporary files used in nbesttrain
+# make nbesttrain-clean # removes temporary files used in nbesttrain
 # make nbest-oracle    # oracle evaluation of n-best results 
 # make features        # extracts features from 20-fold parses
 # make train-reranker  # trains reranker model
@@ -68,12 +68,12 @@
 # Version 4.1 and later gcc permit -march=native, but older
 # versions will need -march=pentium4 or -march=opteron
 #
-# GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32
+# GCCFLAGS ?= -march=native -mfpmath=sse -msse2 -mmmx -m32
 
 # CFLAGS is used for all C and C++ compilation
 #
 CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS)
-LDFLAGS = $(GCCLDFLAGS)
+
 EXEC = time
 
 # for SWIG wrappers, use these flags instead
@@ -88,11 +88,16 @@ EXEC = time
 # LDFLAGS = -g -Wall $(GCCLDFLAGS)
 # EXEC = valgrind
 
-CXXFLAGS = $(CFLAGS) -Wno-deprecated
+CXXFLAGS ?= $(CFLAGS) -Wno-deprecated
 export CFLAGS
 export CXXFLAGS
 export LDFLAGS
 
+CC ?= gcc
+CXX ?= g++
+export CC
+export CXX
+
 # Building the 20-fold training data with nbesttrain 
 # --------------------------------------------------
 
@@ -101,7 +106,7 @@ export LDFLAGS
 #
 # PENNWSJTREEBANK must be set to the base directory of the Penn WSJ Treebank
 #
-PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/
+PENNWSJTREEBANK ?= /usr/local/data/Penn3/parsed/mrg/wsj/
 
 # NPARSES is the number of alternative parses to consider for each sentence
 #
@@ -190,6 +195,9 @@ FEATURESNICKNAME=sp
 # of these variable values can be found in the train-eval-reranker.sh
 # script.
 #
+
+ifndef USE_OLD_NONLBFGS_ESTIMATOR
+# Newer estimator that depends on liblbfgs:
 ESTIMATOR=second-stage/programs/wlle/cvlm-lbfgs
 
 # ESTIMATORFLAGS are flags given to the estimator
@@ -200,6 +208,22 @@ ESTIMATORFLAGS=-l 1 -c 10 -F 1 -n -1 -p 2
 #
 ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2
 
+else
+# Older estimator that doesn't depend on liblbfgs:
+ESTIMATOR=second-stage/programs/wlle/cvlm-owlqn
+
+# ESTIMATORFLAGS are flags given to the estimator
+#
+# These flags are for cvlm-owlqn:
+ESTIMATORFLAGS=-l 1 -c 10 -F 1 -d 10 -n -1
+# The equivalent ESTIMATORFLAGS for cvlm:
+#   ESTIMATORFLAGS=-l 1 -c0 10 -Pyx_factor 1 -debug 10 -ns -1
+
+# ESTIMATORNICKNAME is used to name the feature weights file
+#
+ESTIMATORNICKNAME=cvlm-l1c10P1
+endif
+
 # ESTIMATORSTACKSIZE is the size (in KB) of the per-thread stacks
 # used during estimation
 #
@@ -517,11 +541,14 @@ train-reranker: $(WEIGHTSFILEGZ)
 # This goal estimates the reranker feature weights (i.e., trains the
 # reranker).
 #
+# Don't use auto-renaming as in "gzip foo" because it fails if there is
+# more than one hardlink on the file (I'm looking at you Time Machine!).
+#
 # $(WEIGHTSFILEGZ): $(ESTIMATOR)
 $(WEIGHTSFILEGZ): $(ESTIMATOR) $(MODELDIR)/features.gz $(FEATDIR)/train.gz $(FEATDIR)/dev.gz $(FEATDIR)/test1.gz
 	$(ESTIMATORENV) $(ZCAT) $(FEATDIR)/train.gz | $(EXEC) $(ESTIMATOR) $(ESTIMATORFLAGS) -e $(FEATDIR)/dev.gz -f $(MODELDIR)/features.gz -o $(WEIGHTSFILE) -x $(FEATDIR)/test1.gz
-	rm -f $(WEIGHTSFILEGZ)
-	gzip $(WEIGHTSFILE)
+	gzip -c $(WEIGHTSFILE) >$(WEIGHTSFILEGZ)
+	rm -f $(WEIGHTSFILE)
 
 ########################################################################
 #                                                                      #
diff --git a/Makefile.here b/Makefile.here
new file mode 100644
index 0000000..c2485d5
--- /dev/null
+++ b/Makefile.here
@@ -0,0 +1,6 @@
+# export MAKEFILES=/projects/SSTP/MYBLLIP/bllip-parser/Makefile.here
+
+PENNWSJTREEBANK=/corpora/LDC/LDC99T42/RAW/parsed/mrg/wsj
+
+USE_OLD_NONLBFGS_ESTIMATOR=1
+
diff --git a/Makefile.mac b/Makefile.mac
new file mode 100644
index 0000000..5c455bd
--- /dev/null
+++ b/Makefile.mac
@@ -0,0 +1,68 @@
+# To use these defaults set the MAKEFILES environment variable when calling make.
+# export MAKEFILES=`pwd`/Makefile.mac
+
+uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
+
+# For Mavericks (and Mountain Lion) I set up gcc using macports:
+# sudo port install gcc47
+# sudo port select --set gcc mp-gcc47
+# sudo port install boost liblbfgs
+
+# Using MacPorts means that we have to override the default include and library locations.
+LD_INCLUDE_PATH=/opt/local/include
+LD_LIBRARY_PATH=/opt/local/lib
+
+export LD_INCLUDE_PATH
+export LD_LIBRARY_PATH
+
+# The SParseval makefile uses a -lm dependency (a bad idea imho) which fails because there
+# is no libm.a to be used.  This trick works by mapping that to the system's libm.dylib.
+# .LIBPATTERNS+=lib%.dylib
+# export .LIBPATTERNS
+
+# On Mac OS X using -march=native doesn't seem to work (a compilation error will occur).
+# Turns out there is a problem with AVX instructions on OSX for gcc after 4.2.
+# http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx
+# http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html
+# So here's what works for me (with or without the -mfpmath=sse - the default is 387):
+
+GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 -I${LD_INCLUDE_PATH}
+
+# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message.
+export GCCFLAGS
+
+# CC = condor_compile gcc
+CC = gcc
+export CC
+
+# CXX = condor_compile g++
+CXX = g++
+export CXX
+
+# fast options
+# Compilation help: you may need to remove -march=native on older compilers.
+# GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx
+FOPENMP=-fopenmp
+# CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP)
+# LDFLAGS=$(FOPENMP) -L/opt/local/lib
+
+# debugging options
+# GCCFLAGS=
+# FOPENMP=
+# CFLAGS=-MMD -O0 -g $(GCCFLAGS) $(FOPENMP)
+# LDFLAGS=-g $(FOPENMP)
+# CXXFLAGS=${CFLAGS} -Wno-deprecated
+
+# CFLAGS is used for all C and C++ compilation
+#
+CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS)
+export CFLAGS
+
+CXXFLAGS=${CFLAGS} -Wno-deprecated
+export CXXFLAGS
+
+LDFLAGS = -L${LD_LIBRARY_PATH} $(GCCLDFLAGS)
+export LDFLAGS
+
+# This is a handy place to put a local setting without changing Makefile.
+# PENNWSJTREEBANK = /usr/local/data/Penn3/parsed/mrg/wsj
diff --git a/parse.sh b/parse.sh
index 4c7a76c..0bf5668 100755
--- a/parse.sh
+++ b/parse.sh
@@ -14,5 +14,6 @@
 # RERANKDATA=ec50-connll-ic-s5
 # RERANKDATA=ec50-f050902-lics5
 MODELDIR=second-stage/models/ec50spfinal
-ESTIMATORNICKNAME=cvlm-l1c10P1
+# ESTIMATORNICKNAME=cvlm-l1c10P1
+ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2
 first-stage/PARSE/parseIt -l399 -N50 first-stage/DATA/EN/ $* | second-stage/programs/features/best-parses -l $MODELDIR/features.gz $MODELDIR/$ESTIMATORNICKNAME-weights.gz
diff --git a/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz b/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz
index 97b917d..1eb900f 100644
Binary files a/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz and b/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz differ
diff --git a/second-stage/models/ec50spfinal/features.gz b/second-stage/models/ec50spfinal/features.gz
index d24dc76..7544432 100644
Binary files a/second-stage/models/ec50spfinal/features.gz and b/second-stage/models/ec50spfinal/features.gz differ
diff --git a/second-stage/programs/eval-beam/utility.h b/second-stage/programs/eval-beam/utility.h
index 58db9f4..8cfe57a 100644
--- a/second-stage/programs/eval-beam/utility.h
+++ b/second-stage/programs/eval-beam/utility.h
@@ -891,24 +891,42 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& s
 
 struct resource_usage { };
 
+#ifndef __i386
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef __APPLE__
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef NO_PROC_SELF_STAT
+inline std::ostream& operator<< (std::ostream& os, resource_usage r)
+{
+  return os;
+}
+#else // Assume we are on a 586 linux
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   FILE* fp = fopen("/proc/self/stat", "r");
-  assert(fp);
-  int utime;
-  int stime;
-  unsigned int vsize;
-  unsigned int rss;
-  int result = 
-    fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
-           "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
-  assert(result == 4);
-  fclose(fp);
-  // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss
-;
-  // return s << "utime = " << utime << ", vsize = " << vsize;
-  return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
-	    << float(vsize)/1048576.0 << " Mb.";
+  // Don't fail if we can't read that (such as on a Mac), just return.
+  if (fp == NULL) {
+     return os;
+  } else {
+    int utime;
+    int stime;
+    unsigned int vsize;
+    unsigned int rss;
+    int result = 
+      fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
+             "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
+    assert(result == 4);
+    fclose(fp);
+    // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss;
+    // return s << "utime = " << utime << ", vsize = " << vsize;
+    return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
+	      << float(vsize)/1048576.0 << " Mb.";
+  }
 }
+#endif
 
 #endif  // UTILITY_H
diff --git a/second-stage/programs/eval-weights/Makefile b/second-stage/programs/eval-weights/Makefile
index b6dbd65..a5612f6 100644
--- a/second-stage/programs/eval-weights/Makefile
+++ b/second-stage/programs/eval-weights/Makefile
@@ -14,7 +14,7 @@ SOURCES = best-indices.cc best-parse.cc best-parses.cc compare-models.cc data.c
 TARGETS = eval-weights # best-indices best-parse best-parses compare-models pretty-print
 OBJECTS = $(patsubst %.l,%.o,$(patsubst %.c,%.o,$(SOURCES:%.cc=%.o)))
 
-CC = gcc
+CC ?= gcc
 
 all: $(TARGETS)
 
diff --git a/second-stage/programs/eval-weights/utility.h b/second-stage/programs/eval-weights/utility.h
index 45ef28e..7c816cd 100644
--- a/second-stage/programs/eval-weights/utility.h
+++ b/second-stage/programs/eval-weights/utility.h
@@ -882,6 +882,14 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& s
 struct resource_usage { };
 
 #ifndef __i386
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef __APPLE__
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef NO_PROC_SELF_STAT
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   return os;
@@ -890,21 +898,24 @@ inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   FILE* fp = fopen("/proc/self/stat", "r");
-  assert(fp);
-  int utime;
-  int stime;
-  unsigned int vsize;
-  unsigned int rss;
-  int result = 
-    fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
-           "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
-  assert(result == 4);
-  fclose(fp);
-  // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss
-;
-  // return s << "utime = " << utime << ", vsize = " << vsize;
-  return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
-	    << float(vsize)/1048576.0 << " Mb.";
+  // Don't fail if we can't read that (such as on a Mac), just return.
+  if (fp == NULL) {
+     return os;
+  } else {
+    int utime;
+    int stime;
+    unsigned int vsize;
+    unsigned int rss;
+    int result = 
+      fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
+             "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
+    assert(result == 4);
+    fclose(fp);
+    // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss;
+    // return s << "utime = " << utime << ", vsize = " << vsize;
+    return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
+	      << float(vsize)/1048576.0 << " Mb.";
+  }
 }
 #endif
 
diff --git a/second-stage/programs/features/utility.h b/second-stage/programs/features/utility.h
index 58db9f4..8cfe57a 100644
--- a/second-stage/programs/features/utility.h
+++ b/second-stage/programs/features/utility.h
@@ -891,24 +891,42 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& s
 
 struct resource_usage { };
 
+#ifndef __i386
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef __APPLE__
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef NO_PROC_SELF_STAT
+inline std::ostream& operator<< (std::ostream& os, resource_usage r)
+{
+  return os;
+}
+#else // Assume we are on a 586 linux
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   FILE* fp = fopen("/proc/self/stat", "r");
-  assert(fp);
-  int utime;
-  int stime;
-  unsigned int vsize;
-  unsigned int rss;
-  int result = 
-    fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
-           "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
-  assert(result == 4);
-  fclose(fp);
-  // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss
-;
-  // return s << "utime = " << utime << ", vsize = " << vsize;
-  return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
-	    << float(vsize)/1048576.0 << " Mb.";
+  // Don't fail if we can't read that (such as on a Mac), just return.
+  if (fp == NULL) {
+     return os;
+  } else {
+    int utime;
+    int stime;
+    unsigned int vsize;
+    unsigned int rss;
+    int result = 
+      fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
+             "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
+    assert(result == 4);
+    fclose(fp);
+    // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss;
+    // return s << "utime = " << utime << ", vsize = " << vsize;
+    return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
+	      << float(vsize)/1048576.0 << " Mb.";
+  }
 }
+#endif
 
 #endif  // UTILITY_H
diff --git a/second-stage/programs/prepare-data/utility.h b/second-stage/programs/prepare-data/utility.h
index 45ef28e..7c816cd 100644
--- a/second-stage/programs/prepare-data/utility.h
+++ b/second-stage/programs/prepare-data/utility.h
@@ -882,6 +882,14 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& s
 struct resource_usage { };
 
 #ifndef __i386
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef __APPLE__
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef NO_PROC_SELF_STAT
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   return os;
@@ -890,21 +898,24 @@ inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   FILE* fp = fopen("/proc/self/stat", "r");
-  assert(fp);
-  int utime;
-  int stime;
-  unsigned int vsize;
-  unsigned int rss;
-  int result = 
-    fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
-           "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
-  assert(result == 4);
-  fclose(fp);
-  // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss
-;
-  // return s << "utime = " << utime << ", vsize = " << vsize;
-  return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
-	    << float(vsize)/1048576.0 << " Mb.";
+  // Don't fail if we can't read that (such as on a Mac), just return.
+  if (fp == NULL) {
+     return os;
+  } else {
+    int utime;
+    int stime;
+    unsigned int vsize;
+    unsigned int rss;
+    int result = 
+      fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
+             "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
+    assert(result == 4);
+    fclose(fp);
+    // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss;
+    // return s << "utime = " << utime << ", vsize = " << vsize;
+    return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
+	      << float(vsize)/1048576.0 << " Mb.";
+  }
 }
 #endif
 
diff --git a/second-stage/programs/wlle/Makefile b/second-stage/programs/wlle/Makefile
index 75a803c..37e1d9f 100644
--- a/second-stage/programs/wlle/Makefile
+++ b/second-stage/programs/wlle/Makefile
@@ -10,8 +10,8 @@
 # License for the specific language governing permissions and limitations
 # under the License.
 
-SOURCES = avper.cc cvlm-lbfgs.cc hlm.cc gavper.cc lm.cc lmdata.c oracle.cc wavper.cc wlle.cc # cvlm.cc OWLQN.cpp TerminationCriterion.cpp
-TARGETS = avper gavper oracle cvlm-lbfgs # cvlm lm oracle wavper cvlm-owlqn hlm
+SOURCES = avper.cc hlm.cc gavper.cc lm.cc lmdata.c oracle.cc wavper.cc wlle.cc cvlm-owlqn.cc OWLQN.cpp TerminationCriterion.cpp # cvlm-lbfgs.cc cvlm.cc 
+TARGETS = avper gavper oracle cvlm-owlqn # cvlm-lbfgs # cvlm lm oracle wavper cvlm-owlqn hlm
 OBJECTS = $(patsubst %.cpp,%.o,$(patsubst %.l,%.o,$(patsubst %.c,%.o,$(SOURCES:%.cc=%.o))))
 
 all: $(TARGETS)
@@ -46,15 +46,15 @@ libdata.a: data.o
 liblmdata.a: lmdata.o
 	ar rcv liblmdata.a lmdata.o; ranlib liblmdata.a
 
-CC=gcc
+# CC=gcc
 
 # fast options
 # Compilation help: you may need to remove -march=native on older compilers.
-GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx
+# GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx
 FOPENMP=-fopenmp
-CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP)
-LDFLAGS=$(FOPENMP)
-CXXFLAGS=${CFLAGS} -Wno-deprecated
+# CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP)
+LDFLAGS+=$(FOPENMP)
+# CXXFLAGS=${CFLAGS} -Wno-deprecated
 
 # debugging options
 # GCCFLAGS=
diff --git a/second-stage/programs/wlle/OWLQN.cpp b/second-stage/programs/wlle/OWLQN.cpp
new file mode 100644
index 0000000..df730a3
--- /dev/null
+++ b/second-stage/programs/wlle/OWLQN.cpp
@@ -0,0 +1,313 @@
+#include "OWLQN.h"
+
+#include "TerminationCriterion.h"
+
+#include <vector>
+#include <deque>
+#include <cmath>
+#include <iostream>
+#include <iomanip>
+
+#include <cassert>
+
+#define EPSILON 1e-20
+
+using namespace std;
+
+double OptimizerState::dotProduct(const DblVec& a, const DblVec& b) {
+	double result = 0;
+	for (size_t i=0; i<a.size(); i++) {
+		result += a[i] * b[i];
+	}
+	return result;
+	assert(result != result+1);
+}
+
+void OptimizerState::addMult(DblVec& a, const DblVec& b, double c) {
+        assert(c != c+1);
+	for (size_t i=0; i<a.size(); i++) {
+		a[i] += b[i] * c;
+		assert(a[i] != a[i]+1);
+	}
+}
+
+void OptimizerState::add(DblVec& a, const DblVec& b) {
+	for (size_t i=0; i<a.size(); i++) {
+		a[i] += b[i];
+		assert(a[i] != a[i]+1);
+	}
+}
+
+void OptimizerState::addMultInto(DblVec& a, const DblVec& b, const DblVec& c, double d) {
+	for (size_t i=0; i<a.size(); i++) {
+		a[i] = b[i] + c[i] * d;
+		assert(a[i] != a[i]+1);
+	}
+}
+
+void OptimizerState::scale(DblVec& a, double b) {
+        assert(b != b+1);
+	for (size_t i=0; i<a.size(); i++) {
+		a[i] *= b;
+		assert(a[i] != a[i]+1);
+	}
+}
+
+void OptimizerState::scaleInto(DblVec& a, const DblVec& b, double c) {
+        assert(c != c+1);
+	for (size_t i=0; i<a.size(); i++) {
+		a[i] = b[i] * c;
+		assert(a[i] != a[i]+1);
+	}
+}
+
+void OptimizerState::MapDirByInverseHessian() {
+        int count = (int)sList.size();
+
+        if (count != 0) {
+                int lastGoodRo = -1;
+
+                for (int i = count - 1; i >= 0; i--) {
+                        if (roList[i] > 0) {
+                                alphas[i] = -dotProduct(*sList[i], dir) / roList[i];
+                                addMult(dir, *yList[i], alphas[i]);
+                                if (lastGoodRo == -1) lastGoodRo = i;
+                        }
+                }
+
+                if (lastGoodRo == -1) return;
+
+                const DblVec& lastY = *yList[lastGoodRo];
+                double yDotY = dotProduct(lastY, lastY);
+		
+		if (yDotY == 0) return;
+
+                double scalar = roList[lastGoodRo] / yDotY;
+                scale(dir, scalar);
+
+                for (int i = 0; i < count; i++) {
+                        if (roList[i] > 0) {
+                                double beta = dotProduct(*yList[i], dir) / roList[i];
+                                addMult(dir, *sList[i], -alphas[i] - beta);
+                        }
+                }
+        }
+}
+
+void OptimizerState::MakeSteepestDescDir() {
+	if (l1weight == 0) {
+		scaleInto(dir, grad, -1);
+	} else {
+
+		for (size_t i=0; i<dim; i++) {
+			if (x[i] < 0) {
+				dir[i] = -grad[i] + l1weight;
+			} else if (x[i] > 0) {
+				dir[i] = -grad[i] - l1weight;
+			} else {
+				if (grad[i] < -l1weight) {
+					dir[i] = -grad[i] - l1weight;
+				} else if (grad[i] > l1weight) {
+					dir[i] = -grad[i] + l1weight;
+				} else {
+					dir[i] = 0;
+				}
+			}
+		}
+	}
+
+	steepestDescDir = dir;
+}
+
+void OptimizerState::FixDirSigns() {
+	if (l1weight > 0) {
+		for (size_t i = 0; i<dim; i++) {
+			if (dir[i] * steepestDescDir[i] <= 0) {
+				dir[i] = 0;
+			}
+		}
+	}
+}
+
+void OptimizerState::UpdateDir() {
+	MakeSteepestDescDir();
+	MapDirByInverseHessian();
+	FixDirSigns();
+
+#ifdef _DEBUG
+	TestDirDeriv();
+#endif
+}
+
+void OptimizerState::TestDirDeriv() {
+	double dirNorm = sqrt(dotProduct(dir, dir));
+	double eps = 1.05e-8 / dirNorm;
+	GetNextPoint(eps);
+	double val2 = EvalL1();
+	double numDeriv = (val2 - value) / eps;
+	double deriv = DirDeriv();
+	if (!quiet) cout << "  Grad check: " << numDeriv << " vs. " << deriv << "  ";
+}
+
+double OptimizerState::DirDeriv() const {
+	if (l1weight == 0) {
+		return dotProduct(dir, grad);
+	} else {
+		double val = 0.0;
+		for (size_t i = 0; i < dim; i++) {
+			if (dir[i] != 0) { 
+				if (x[i] < 0) {
+					val += dir[i] * (grad[i] - l1weight);
+				} else if (x[i] > 0) {
+					val += dir[i] * (grad[i] + l1weight);
+				} else if (dir[i] < 0) {
+					val += dir[i] * (grad[i] - l1weight);
+				} else if (dir[i] > 0) {
+					val += dir[i] * (grad[i] + l1weight);
+				}
+			}
+		}
+
+		return val;
+	}
+}
+
+void OptimizerState::GetNextPoint(double alpha) {
+	addMultInto(newX, x, dir, alpha);
+	if (l1weight > 0) {
+		for (size_t i=0; i<dim; i++) {
+			if (x[i] * newX[i] < 0.0) {
+				newX[i] = 0.0;
+			}
+		}
+	}
+}
+
+double OptimizerState::EvalL1() {
+	double val = func.Eval(newX, newGrad);
+	if (l1weight > 0) {
+		for (size_t i=0; i<dim; i++) {
+			val += fabs(newX[i]) * l1weight;
+		}
+	}
+
+	return val;
+}
+
+void OptimizerState::BackTrackingLineSearch() {
+	double origDirDeriv = DirDeriv();
+	// if a non-descent direction is chosen, the line search will break anyway, so throw here
+	// The most likely reason for this is a bug in your function's gradient computation
+	if (origDirDeriv >= 0) {
+		cerr << "L-BFGS chose a non-descent direction: check your gradient!" << endl;
+		exit(1);
+	}
+
+	double alpha = 1.0;
+	double backoff = 0.5;
+	if (iter == 1) {
+		//alpha = 0.1;
+		//backoff = 0.5;
+		double normDir = sqrt(dotProduct(dir, dir));
+		assert(normDir != 0);
+		assert(normDir != normDir+1);
+		alpha = (1 / normDir);
+		backoff = 0.1;
+	}
+
+	const double c1 = 1e-4;
+	double oldValue = value;
+
+	while (true) {
+		GetNextPoint(alpha);
+		value = EvalL1();
+
+		if (value <= oldValue + c1 * origDirDeriv * alpha) break;
+
+		if (!quiet) cout << "." << flush;
+
+		alpha *= backoff;
+	}
+
+	if (!quiet) cout << endl;
+}
+
+void OptimizerState::Shift() {
+	DblVec *nextS = NULL, *nextY = NULL;
+
+	int listSize = (int)sList.size();
+
+	if (listSize < m) {
+		try {
+			nextS = new vector<double>(dim);
+			nextY = new vector<double>(dim);
+		} catch (bad_alloc) {
+			m = listSize;
+			if (nextS != NULL) {
+				delete nextS;
+				nextS = NULL;
+			}
+		}
+	}
+
+	if (nextS == NULL) {
+		nextS = sList.front();
+		sList.pop_front();
+		nextY = yList.front();
+		yList.pop_front();
+		roList.pop_front();
+	}
+
+	addMultInto(*nextS, newX, x, -1);
+	addMultInto(*nextY, newGrad, grad, -1);
+	double ro = dotProduct(*nextS, *nextY);
+	if (ro == 0)
+	  ro = EPSILON;
+
+	sList.push_back(nextS);
+	yList.push_back(nextY);
+	roList.push_back(ro);
+
+	x.swap(newX);
+	grad.swap(newGrad);
+
+	iter++;
+}
+
+void OWLQN::Minimize(DifferentiableFunction& function, const DblVec& initial, DblVec& minimum, double l1weight, double tol, int m) const {
+	OptimizerState state(function, initial, m, l1weight, quiet);
+
+	if (!quiet) {
+		cout << setprecision(4) << scientific << right;
+		cout << endl << "Optimizing function of " << state.dim << " variables with OWL-QN parameters:" << endl;
+		cout << "   l1 regularization weight: " << l1weight << "." << endl;
+		cout << "   L-BFGS memory parameter (m): " << m << endl;
+		cout << "   Convergence tolerance: " << tol << endl;
+		cout << endl;
+		cout << "Iter    n:  new_value    (conv_crit)   line_search" << endl << flush;
+		cout << "Iter    0:  " << setw(10) << state.value << "  (***********) " << flush;
+	}
+
+	ostringstream str;
+	termCrit->GetValue(state, str);
+
+	while (true) {
+		state.UpdateDir();
+		state.BackTrackingLineSearch();
+
+		ostringstream str;
+		double termCritVal = termCrit->GetValue(state, str);
+		if (!quiet) {
+			cout << "Iter " << setw(4) << state.iter << ":  " << setw(10) << state.value;
+			cout << str.str() << flush;
+		}
+
+		if (termCritVal < tol) break;
+
+		state.Shift();
+	}
+
+	if (!quiet) cout << endl;
+
+	minimum = state.newX;
+}
diff --git a/second-stage/programs/wlle/OWLQN.h b/second-stage/programs/wlle/OWLQN.h
new file mode 100644
index 0000000..50294db
--- /dev/null
+++ b/second-stage/programs/wlle/OWLQN.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include <cstdlib>
+#include <vector>
+#include <deque>
+#include <iostream>
+
+typedef std::vector<double> DblVec;
+
+struct DifferentiableFunction {
+	virtual double Eval(const DblVec& input, DblVec& gradient) = 0;
+};
+
+#include "TerminationCriterion.h"
+
+class OWLQN {
+	bool quiet;
+	bool responsibleForTermCrit;
+
+public:
+	TerminationCriterion *termCrit;
+
+	OWLQN(bool quiet = false) : quiet(quiet) {
+		termCrit = new RelativeMeanImprovementCriterion(5);
+		responsibleForTermCrit = true;
+	}
+
+	OWLQN(TerminationCriterion *termCrit, bool quiet = false) : quiet(quiet), termCrit(termCrit) { 
+		responsibleForTermCrit = false;
+	}
+
+	~OWLQN() {
+		if (termCrit && responsibleForTermCrit) delete termCrit;
+	}
+
+	void Minimize(DifferentiableFunction& function, const DblVec& initial, DblVec& minimum, double l1weight = 1.0, double tol = 1e-4, int m = 10) const;
+	void SetQuiet(bool q) { quiet = q; }
+
+};
+
+class OptimizerState {
+	friend class OWLQN;
+
+	struct DblVecPtrDeque : public std::deque<DblVec*> {
+		~DblVecPtrDeque() {
+			for (size_t s = 0; s < size(); ++s) {
+				if ((*this)[s] != NULL) delete (*this)[s];
+			}
+		}
+	};
+
+	DblVec x, grad, newX, newGrad, dir;
+	DblVec& steepestDescDir; // references newGrad to save memory, since we don't ever use both at the same time
+	DblVecPtrDeque sList, yList;
+	std::deque<double> roList;
+	std::vector<double> alphas;
+	double value;
+	int iter, m;
+	const size_t dim;
+	DifferentiableFunction& func;
+	double l1weight;
+	bool quiet;
+
+	static double dotProduct(const DblVec& a, const DblVec& b);
+	static void add(DblVec& a, const DblVec& b);
+	static void addMult(DblVec& a, const DblVec& b, double c);
+	static void addMultInto(DblVec& a, const DblVec& b, const DblVec& c, double d);
+	static void scale(DblVec& a, double b);
+	static void scaleInto(DblVec& a, const DblVec& b, double c);
+
+	void MapDirByInverseHessian();
+	void UpdateDir();
+	double DirDeriv() const;
+	void GetNextPoint(double alpha);
+	void BackTrackingLineSearch();
+	void Shift();
+	void MakeSteepestDescDir();
+	double EvalL1();
+	void FixDirSigns();
+	void TestDirDeriv();
+
+	OptimizerState(DifferentiableFunction& f, const DblVec& init, int m, double l1weight, bool quiet) 
+		: x(init), grad(init.size()), newX(init), newGrad(init.size()), dir(init.size()), steepestDescDir(newGrad), alphas(m), iter(1), m(m), dim(init.size()), func(f), l1weight(l1weight), quiet(quiet) {
+			if (m <= 0) {
+				std::cerr << "m must be an integer greater than zero." << std::endl;
+				exit(1);
+			}
+			value = EvalL1();
+			grad = newGrad;
+	}
+
+public:
+	const DblVec& GetX() const { return newX; }
+	const DblVec& GetLastX() const { return x; }
+	const DblVec& GetGrad() const { return newGrad; }
+	const DblVec& GetLastGrad() const { return grad; }
+	const DblVec& GetLastDir() const { return dir; }
+	double GetValue() const { return value; }
+	int GetIter() const { return iter; }
+	size_t GetDim() const { return dim; }
+};
diff --git a/second-stage/programs/wlle/TerminationCriterion.cpp b/second-stage/programs/wlle/TerminationCriterion.cpp
new file mode 100644
index 0000000..a0e8488
--- /dev/null
+++ b/second-stage/programs/wlle/TerminationCriterion.cpp
@@ -0,0 +1,28 @@
+#include "TerminationCriterion.h"
+
+#include "OWLQN.h"
+
+#include <limits>
+#include <iomanip>
+#include <cmath>
+
+using namespace std;
+
+double RelativeMeanImprovementCriterion::GetValue(const OptimizerState& state, std::ostream& message) {
+	double retVal = numeric_limits<double>::infinity();
+
+	if (prevVals.size() > 5) {
+		double prevVal = prevVals.front();
+		if (prevVals.size() == 10) prevVals.pop_front();
+		double averageImprovement = (prevVal - state.GetValue()) / prevVals.size();
+		double relAvgImpr = averageImprovement /(1+fabs(state.GetValue()));  // !!!! modified MJ, 26th Aug 2008
+		message << setprecision(4) << scientific << right;
+		message << "  (" << setw(10) << relAvgImpr << ") " << flush;
+		retVal = relAvgImpr;
+	} else {
+		message << "  (wait for five iters) " << flush;
+	}
+
+	prevVals.push_back(state.GetValue());
+	return retVal;
+}
diff --git a/second-stage/programs/wlle/TerminationCriterion.h b/second-stage/programs/wlle/TerminationCriterion.h
new file mode 100644
index 0000000..3059972
--- /dev/null
+++ b/second-stage/programs/wlle/TerminationCriterion.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <deque>
+#include <sstream>
+#include <string>
+
+class OptimizerState;
+
+struct TerminationCriterion {
+	virtual double GetValue(const OptimizerState& state, std::ostream& message) = 0;
+};
+
+class RelativeMeanImprovementCriterion : public TerminationCriterion {
+	const int numItersToAvg;
+	std::deque<double> prevVals;
+
+public:
+	RelativeMeanImprovementCriterion(int numItersToAvg = 5) : numItersToAvg(numItersToAvg) {}
+
+	double GetValue(const OptimizerState& state, std::ostream& message);
+};
diff --git a/second-stage/programs/wlle/cvlm-owlqn.cc b/second-stage/programs/wlle/cvlm-owlqn.cc
new file mode 100644
index 0000000..d8c37d8
--- /dev/null
+++ b/second-stage/programs/wlle/cvlm-owlqn.cc
@@ -0,0 +1,758 @@
+// cvlm-owlqn.cc -- A linear model estimator for a variety of user-selectable loss functions.
+
+const char usage[] =
+"cvlm-owlqn -- A cross-validating linear model estimator for a variety of user-selectable loss functions.\n"
+"\n"
+" Mark Johnson, 21st July 2008\n"
+"\n"
+" It uses a modified version of L-BFGS developed by Galen Andrew at Microsoft Research\n"
+" to be especially efficient for L1-regularized loss functions.\n"
+"\n"
+" The regularizer weight(s) are set by cross-validation on development data.\n"
+"\n"
+"Usage: cvlm-owlqn [-h] [-d debug_level] [-c c0] [-C c00] [-p p] [-r r] [-s s] [-t tol]\n"
+"                  [-l ltype] [-F f] [-G] [-n ns] [-f feat-file]\n"
+"                  [-o weights-file]  [-e eval-file] [-x eval-file2]\n"
+"	           < train-file\n"
+"\n"
+"where:\n"
+"\n"
+" debug_level > 0 controls the amount of output produced\n"
+"\n"
+" -c c0 is the initial value for the regularizer constant.\n"
+"\n"
+" -C c00 multiplies the regularizer constant for the first feature class\n"
+" by c00 (this can be used to allow the first feature class to be regularized less).\n"
+"\n"
+" -l ltype identifies the type of loss function used:\n"
+"\n"
+"    -l 0 - log loss (c0 ~ 5)\n"
+"    -l 1 - EM-style log loss (c0 ~ 5)\n"
+"    -l 2 - pairwise log loss \n"
+"    -l 3 - exp loss (c0 ~ 25, s ~ 1e-5)\n"
+"    -l 4 - log exp loss (c0 ~ 1e-4)\n"
+"    -l 5 - maximize expected F-score (c ~ ?)\n"
+"\n"
+" -r r specifies that the weights are initialized to random values in\n"
+"   [-r ... +r],\n"
+"\n"
+" -t tol specifies the stopping tolerance for the IWLQN optimizer\n"
+"\n"
+" -F f indicates that a parse should be taken as correct\n"
+"   proportional to f raised to its f-score, and\n"
+"\n"
+" -G indicates that each sentence is weighted by the number of\n"
+"   edges in its gold parse.\n"
+"\n"
+" -n ns is the maximum number of ':' characters in a <featclass>, used to\n"
+" determine how features are binned into feature classes (ns = -1 bins\n"
+" all features into the same class)\n"
+"\n"
+" -f feat-file is a file of <featclass> <featuredetails> lines, used for\n"
+" cross-validating regularizer weights,\n"
+"\n"
+" train-file, eval-file and eval-file2 are files from which training and evaluation\n"
+" data are read (if eval-file ends in the suffix .bz2 then bzcat is used\n"
+" to read it; if no eval-file is specified, then the program tests on the\n"
+" training data),\n"
+"\n"
+" weights-file is a file to which the estimated weights are written,\n"
+"\n"
+"The function that the program minimizes is:\n"
+"\n"
+"   Q(w) = s * (- L(w) + c * sum_j pow(fabs(w[j]), p) ), where:\n"
+"\n"
+"   L(w) is the loss function to be optimized.\n"
+"\n"
+"With debug = 0, the program writes a single line to stdout:\n"
+"\n"
+"c p r s it nzeroweights/nweights neglogP/nsentence ncorrect/nsentences\n"
+"\n"
+"With debug >= 10, the program writes out a histogram of weights as well\n"
+"\n"
+"Data format:\n"
+"-----------\n"
+"\n"
+"<Data>     --> [S=<NS>] <Sentence>*\n"
+"<Sentence> --> [G=<G>] N=<N> <Parse>*\n"
+"<Parse>    --> [P=<P>] [W=<W>] <FC>*,\n"
+"<FC>       --> <F>[=<C>]\n"
+"\n"
+"NS is the number of sentences.\n"
+"\n"
+"Each <Sentence> consists of N <Parse>s.  <G> is the gold standard\n"
+"score.  To get parsing precision and recall results, set <G> to the\n"
+"number of edges in the gold standard parse.  To get accuracy results,\n"
+"set <G> to 1 (the default).\n"
+"\n"
+"A <Parse> consists of <FC> pairs.  <P> is the parse's possible highest\n"
+"score and <W> is the parse's actual score.  To get parsing precision and\n"
+"recall results, set <P> to the number of edges in the parse and <W> to\n"
+"the number of edges in common between the gold and parse trees.\n"
+"\n"
+"A <FC> consists of a feature (a non-negative integer) and an optional\n"
+"count (a real).\n"
+"\n"
+"The default for all numbers except <W> is 1.  The default for <W> is 0.\n";
+
+#include "custom_allocator.h"    // must come first
+#define _GLIBCPP_CONCEPT_CHECKS  // uncomment this for checking
+
+#include <cassert>
+#include <cctype>
+#include <cerrno>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <unistd.h>
+#include <vector>
+
+#include "lmdata.h"
+#include "powell.h"
+#include "utility.h"
+
+#include "OWLQN.h"
+#include "TerminationCriterion.h"
+
+typedef std::vector<double> doubles;
+typedef std::vector<size_t> size_ts;
+
+int debug_level = 0;
+
+enum loss_type { log_loss, em_log_loss, em_log_loss_noomp, pairwise_log_loss, exp_loss, log_exp_loss, 
+		 expected_fscore_loss };
+
+const char* loss_type_name[] = { "log_loss", "em_log_loss", "em_log_loss_noomp", "pairwise_log_loss", "exp_loss", 
+				 "log_exp_loss", "expected_fscore_loss" };
+
+void print_histogram(int nx, const double x[], int nbins=20) {
+  int nx_nonzero = 0;
+  for (int i = 0; i < nx; ++i)
+    if (x[i] != 0)
+      ++nx_nonzero;
+
+  std::cout << "#   There are " << nx_nonzero << " non-zero values and " 
+	    << nx-nx_nonzero << " zero values." << std::endl;
+
+  if (nx_nonzero > 0) {
+    std::vector<double> s;
+    s.reserve(nx_nonzero);
+    for (int i = 0; i < nx; ++i)
+      if (x[i] != 0)
+	s.push_back(x[i]);
+    std::sort(s.begin(), s.end());
+    for (int i = 0; i <= nbins; ++i) {
+      int j = i*(nx_nonzero-1);
+      j /= nbins;
+      std::cout << float(i)/float(nbins) << '\t' << s[j] << std::endl;
+    }
+  }
+}  // print_histogram()
+
+// f_df() evaluates the statistics of the corpus, and prints the f-score
+//  if required
+//
+double f_df(loss_type ltype, corpus_type* corpus, const double x[], double df_dx[]) {
+  Float sum_g = 0, sum_p = 0, sum_w = 0, L = 0;
+
+  switch (ltype) {
+  case log_loss:
+    L = corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    break;
+  case em_log_loss:
+    L = emll_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    break;
+  case em_log_loss_noomp:
+    L = emll_corpus_stats_noomp(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    break;
+  case pairwise_log_loss:
+    L = pwlog_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    break;
+  case exp_loss:
+    L = exp_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    break;
+  case log_exp_loss:
+    L = log_exp_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    break;
+  case expected_fscore_loss:
+    L = 1 - fscore_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w);
+    for (size_type j = 0; j < corpus->nfeatures; ++j)
+      df_dx[j] = -df_dx[j];
+    break;
+  default:
+    L = 0;
+    std::cerr << "## Error: unrecognized loss_type loss = " << int(ltype) 
+	      << std::endl;
+  }  
+  
+  if (debug_level >= 1000)
+    std::cerr << "f score = " << 2*sum_w/(sum_g+sum_p) << ", " << std::flush;
+  
+  assert(finite(L));
+  return L;
+}
+
+// LossFn() is the loss function
+//
+class LossFn : public DifferentiableFunction {
+public:
+
+  const loss_type ltype;
+  corpus_type* corpus;
+  const size_ts& f_c;	//!< feature -> cross-validation class
+  const doubles& cs;	//!< cross-validation class -> regularizer factor
+  double p, s;
+  int it;
+  double L, R, Q;
+
+  LossFn(loss_type ltype, corpus_type* corpus, const size_ts& f_c, 
+	 const doubles& cs, double p, double s) 
+    : ltype(ltype), corpus(corpus), f_c(f_c), cs(cs), p(p), s(s), it(0) 
+  { 
+    assert(f_c.size() == corpus->nfeatures);
+    for (size_type f = 0; f < f_c.size(); ++f)
+      assert(f_c[f] < cs.size());
+  }
+
+  virtual double Eval(const DblVec& x, DblVec& df_dx) {
+    size_t nx = x.size();
+
+    assert(size_t(nx) == corpus->nfeatures);
+    assert(f_c.size() == corpus->nfeatures);
+
+    it++;
+    if (debug_level >= 1000)      
+      std::cerr << "it = " << it << ", " << std::flush;
+
+    L = f_df(ltype, corpus, &x[0], &df_dx[0]);
+
+    if (s != 1) {
+      L *= s;
+
+      for (size_t i = 0; i < nx; ++i) {
+	assert(finite(df_dx[i]));
+	df_dx[i] *= s;
+      }
+    }
+
+    R = 0;
+    if (p != 1) {
+      for (size_t i = 0; i < nx; ++i) 
+	R +=  cs[f_c[i]] * pow(fabs(x[i]), p);
+      R *= s;
+      double sp = s * p;
+      for (size_t i = 0; i < nx; ++i) 
+	df_dx[i] += sp * cs[f_c[i]] * pow(fabs(x[i]), p-1) 
+	  * (x[i] >= 0 ? (x[i] == 0 ? 0 : 1) : -1);
+    }
+
+    Q = L + R;
+    
+    if (debug_level >= 1000)
+      std::cerr << "Q = " << Q << " = L = " << L << " + R = " << R << std::endl;
+
+    assert(finite(Q));
+    
+    if (debug_level >= 10000) {
+      std::cerr << "Histogram of derivatives:" << std::endl;
+      print_histogram(nx, &df_dx[0]);
+      std::cerr << "--------------------------" << std::endl;
+    }
+
+    for (size_t i = 0; i < nx; ++i)
+      assert(finite(df_dx[i]));
+    return Q;
+  }   // LossFn::operator()
+
+};  // LossFn{}
+
+// The Estimator1 does one round of estimation
+//
+struct Estimator1 {
+  typedef std::vector<size_t> size_ts;
+  typedef std::vector<double> doubles;
+  
+  corpus_type* train;	//!< training data
+  size_type nx;		//!< number of features
+  corpus_type* eval;	//!< evaluation data
+  corpus_type* eval2;	//!< 2nd evaluation data
+  loss_type ltype;	//!< type of loss function
+  double c0;		//!< default regularizer factor
+  double c00;           //!< multiply default regularizer factor for first feature class
+  double p;		//!< regularizer power
+  double r;		//!< random initialization
+  double s;		//!< scale factor
+  double tol;           //!< stopping tolerance
+  bool opt_fscore;	//!< optimize f-score or - log likelihood
+
+  doubles x;		//!< feature -> weight
+  size_ts f_c;		//!< feature -> cross-validation class
+  doubles lcs;		//!< cross-validation class -> log factor
+  size_type nc;		//!< number of cross-validation classes
+
+  size_type nits;	//!< number of iterations of last round
+  size_type sum_nits;	//!< total number of iterations
+  size_type nrounds;	//!< number of cross-validation rounds so far
+  double best_score;    //!< best score seen so far
+  std::string weightsfile; //!< name of weights file
+
+  typedef std::map<std::string,size_t> S_C;
+  S_C identifier_regclass; //!< map from feature class identifiers to regularization class
+  typedef std::vector<std::string> Ss;
+  Ss regclass_identifiers; //!< vector of class identifiers
+
+  Estimator1(loss_type ltype, double c0, double c00, double p, double r, double s,
+	     double tol=1e-5, bool opt_fscore = true, std::string weightsfile = "") 
+    : train(NULL), nx(0), eval(NULL), eval2(NULL),
+      ltype(ltype), c0(c0), c00(c00), p(p), r(r), s(s), tol(tol), 
+      opt_fscore(opt_fscore), lcs(1, log(c0)), nc(1), 
+      nits(0), sum_nits(0), nrounds(0), best_score(0),
+      weightsfile(weightsfile)
+  { }  // Estimator1::Estimator1()
+
+  //! set_data() sets the training and evaluation data
+  //
+  void set_data(corpus_type* t, corpus_type* e, corpus_type* e2) {
+    train = t;
+    eval = e;
+    eval2 = e2;
+    nx = train->nfeatures;
+    x.resize(nx);
+    assert(f_c.size() <= nx);
+    assert(eval == NULL || eval->nfeatures <= train->nfeatures);
+    assert(eval2 == NULL || eval2->nfeatures <= train->nfeatures);
+  } // Estimator1::set_data()
+
+  // operator() actually runs one round of estimation
+  //
+  double operator() (const doubles& lccs) {
+    assert(lccs.size() == nc);
+    doubles ccs(nc);
+    double L, R, Q;
+
+    for (size_type i = 0; i < nc; ++i)
+      ccs[i] = exp(lccs[i]);
+
+    assert(x.size() == nx);
+    nits = 0;
+    nrounds++;
+
+    if (debug_level >= 10) {
+      if (nrounds == 1) 
+	std::cerr << "# round	nfeval	L	R	Q	neglogP	f-score	css" << std::endl;
+      std::cerr << nrounds << std::flush;
+    }
+ 
+    LossFn fn(ltype, train, f_c, ccs, p, s);
+
+    DblVec x0(nx);
+
+    if (r != 0) 
+      for (size_type i = 0; i < nx; ++i)
+	x0[i] = r*double(random()-RAND_MAX/2)/double(RAND_MAX/2);
+    
+    OWLQN owlqn(true);
+    if (p == 1) {
+      assert(ccs.size() == 1);
+      owlqn.Minimize(fn, x0, x, ccs[0], tol);
+    }
+    else
+      owlqn.Minimize(fn, x0, x, 0, tol);      
+      
+    nits = fn.it;
+    L = fn.L;
+    R = fn.R;
+    Q = fn.Q;
+
+    // Clean up, collect stats
+
+    sum_nits += nits;
+
+    if (debug_level >= 10)
+      std::cerr << '\t' << nits << '\t' << L << '\t' << R << '\t' << Q;
+
+    double score = evaluate(opt_fscore, true);
+
+    if (debug_level >= 10)
+      std::cerr << '\t' << ccs << std::endl;
+
+    return score;
+
+  }  // Estimator1::operator()
+
+  // evaluate() evaluates the current model on the eval data, prints
+  // out debugging information if appropriate, and returns either
+  // the - log likelihood or 1 - f-score.
+  //
+  double evaluate(bool opt_fscore = false, bool internal = false) {
+
+    std::vector<double> df_dx(nx);
+    Float sum_g = 0, sum_p = 0, sum_w = 0;
+    Float neglogP = corpus_stats(eval, &x[0], &df_dx[0], 
+				 &sum_g, &sum_p, &sum_w);
+    Float fscore = 2*sum_w/(sum_g+sum_p);
+    
+    if (internal) {  // internal evaluation, use a short print-out
+      if (debug_level >= 10) {
+	std::cerr << '\t' << neglogP << '\t' << fscore; 
+	if (eval2 != NULL) {
+	  Float sum_g2 = 0, sum_p2 = 0, sum_w2 = 0;
+	  Float neglogP2 = corpus_stats(eval2, &x[0], &df_dx[0], 
+					&sum_g2, &sum_p2, &sum_w2);
+	  Float fscore2 = 2*sum_w2/(sum_g2+sum_p2);
+	  std::cerr << '\t' << neglogP2 << '\t' << fscore2;
+	}
+      }
+    }
+    else { // final evaluation, print out more info
+    
+      int nzeros = 0;
+      for (size_type i = 0; i < nx; ++i) 
+	if (x[i] == 0)
+	  ++nzeros;
+  
+      std::cerr << "# Regularizer power p = " << p << std::endl;
+      std::cerr << "# " << nx-nzeros << " non-zero feature weights of " 
+		<< nx << " features." << std::endl;
+      std::cerr << "# Eval neglogP = " << neglogP 
+		<< ", neglogP/nsentences = " << neglogP/eval->nsentences
+		<< std::endl;
+      std::cerr << "# Eval precision = " << sum_w/sum_p 
+		<< ", recall = " << sum_w/sum_g
+		<< ", f-score = " << 2*sum_w/(sum_g+sum_p)
+		<< std::endl;
+      if (eval2 != NULL) {
+	Float sum_g = 0, sum_p = 0, sum_w = 0;
+	Float neglogP = corpus_stats(eval2, &x[0], &df_dx[0], 
+				     &sum_g, &sum_p, &sum_w);
+	std::cerr << "# Eval2 neglogP = " << neglogP 
+		  << ", neglogP/nsentences = " << neglogP/train->nsentences
+		  << std::endl;
+	std::cerr << "# Eval2 precision = " << sum_w/sum_p 
+		  << ", recall = " << sum_w/sum_g
+		  << ", f-score = " << 2*sum_w/(sum_g+sum_p)
+		  << std::endl;
+      }
+      {
+	Float sum_g = 0, sum_p = 0, sum_w = 0;
+	Float neglogP = corpus_stats(train, &x[0], &df_dx[0], 
+				     &sum_g, &sum_p, &sum_w);
+	std::cerr << "# Train neglogP = " << neglogP 
+		  << ", neglogP/nsentences = " << neglogP/train->nsentences
+		  << std::endl;
+	std::cerr << "# Train precision = " << sum_w/sum_p 
+		  << ", recall = " << sum_w/sum_g
+		  << ", f-score = " << 2*sum_w/(sum_g+sum_p)
+		  << std::endl;
+      }
+
+      std::cerr << "# regclass_identifiers = " << regclass_identifiers << std::endl;
+      std::cerr << "# lcs = " << lcs << std::endl;
+      {
+	doubles cs(nc);
+	for (size_type i = 0; i < nc; ++i)
+	  cs[i] = exp(lcs[i]);
+	std::cerr << "# cs = " << cs << std::endl;
+      }
+      if (debug_level >= 100) {
+	std::cerr << "# Cumulative distribution of feature weights:" << std::endl;
+	print_histogram(nx, &x[0]);
+      }
+    }
+
+    double score = (opt_fscore ? 1 - fscore : neglogP);
+
+    if (nrounds == 1 || score < best_score) {
+      best_score = score;
+
+      // Write out weights file
+    
+      if (!weightsfile.empty()) {
+	FILE* out = fopen(weightsfile.c_str(), "w");
+	// fprintf(out, "%d@", nx-nzeros);
+	for (size_type i = 0; i < x.size(); ++i) 
+	  if (x[i] != 0) {
+	    fprintf(out, "%d", i);
+	    if (x[i] != 1)
+	      fprintf(out, "=%g", x[i]);
+	    fprintf(out, "\n");
+	  }
+	fclose(out);
+      }      
+    }
+    return score;
+  } // Estimator1::evaluate()
+
+  //! fc_bin() maps a feature count to its corresponding bin
+  //
+  static int fc_bin(double feature_count_base, int feature_count) {
+    if (feature_count <= 4)
+      return feature_count;
+    else
+      return lrint(4.0 + pow(feature_count_base, 
+			     lrint(log(feature_count-4)/log(feature_count_base))));
+  }  // Estimator1::fc_bin()
+
+  // read_featureclasses() reads the feature classes from a feature file
+  //
+  void read_featureclasses(const char* filename, 
+			   int nseparators = 1,
+			   const char* separators = ":") {
+    
+    const char* filesuffix = strrchr(filename, '.');
+    bool popen_flag = false;
+    FILE *in;
+    if (strcasecmp(filesuffix, ".bz2") == 0) {
+      std::string command("bzcat ");
+      command += filename;
+      in = popen(command.c_str(), "r");
+      if (in == NULL) {
+	perror("## Error in cvlm-owlqn: ");
+	std::cerr << "## popen(\"" << command << "\", \"r\") failed, usage = " << resource_usage() << std::endl;
+      }
+      popen_flag = true;
+    }
+    else if (strcasecmp(filesuffix, ".gz") == 0) {
+      std::string command("zcat ");
+      command += filename;
+      errno = 0;
+      in = popen(command.c_str(), "r");
+      if (in == NULL) {
+	perror("## Error in cvlm-owlqn: ");
+	std::cerr << "## popen(\"" << command << "\", \"r\") failed, usage = " << resource_usage() << std::endl;
+      }
+      popen_flag = true;
+    }
+    else
+      in = fopen(filename, "r");
+    if (in == NULL) {
+      std::cerr << "## Couldn't open evalfile " << filename
+		<< ", errno = " << errno << "\n" 
+		<< usage << std::endl;
+      exit(EXIT_FAILURE);
+    }
+
+    size_type featno;
+
+    // read feature number 
+
+    while (fscanf(in, " %u ", &featno) == 1) {
+      int c = ':';
+      
+      // read the prefix of the feature class identifier
+      
+      std::string identifier;
+      int iseparators = 0;
+      if (nseparators >= 0)
+	while ((c = getc(in)) != EOF && !isspace(c)) {
+	  if (index(separators, c) != NULL)
+	    if (++iseparators > nseparators)
+	      break;
+	  identifier.push_back(c);
+	}
+      
+      // skip the rest of the line
+
+      while ((c = getc(in)) != EOF && c != '\n')
+	;
+
+      // insert the prefix into the prefix -> regularization class map
+      
+      S_C::iterator it 
+	= identifier_regclass.insert(S_C::value_type(identifier, 
+						     identifier_regclass.size())).first;
+      
+      size_type cl = it->second;    // regularization class
+
+      f_c.resize(featno+1);
+      f_c[featno] = cl;          // set feature's regularization class
+    }
+      
+    nc = identifier_regclass.size();   // set nc
+    lcs.resize(nc, log(c0));           // set each regularizer class' factor to c0
+    lcs[0] += log(c00);                // increment first regularizer class' factor by c00
+
+    // construct regclass_identifiers
+    
+    regclass_identifiers.resize(nc);
+    cforeach (S_C, it, identifier_regclass) {
+      assert(it->second < regclass_identifiers.size());
+      regclass_identifiers[it->second] = it->first;
+    }
+
+    if (debug_level >= 0) 
+      std::cerr << "# Regularization classes: " << regclass_identifiers << std::endl;
+
+    if (popen_flag)
+      pclose(in);
+    else
+      fclose(in);
+  }  // Estimator1::read_featureclasses()
+    
+  void estimate(const int maxruns=10)
+  {
+    powell::control cntrl(1e-4, 1e-2, maxruns, maxruns);
+    powell::minimize(lcs, *this, log(2), cntrl);
+
+    if (debug_level > 0) {
+      std::cerr << "# Regularizer class weights = (";
+      for (size_type i = 0; i < lcs.size(); ++i) {
+	if (i > 0)
+	  std::cerr << ' ';
+	std::cerr << exp(lcs[i]);
+      }
+      std::cerr << ')' << std::endl;
+    }
+
+  }  // Estimator1::estimate()
+
+};  // Estimator1{}
+
+//! exit_failure() causes the program to halt immediately
+//
+inline std::ostream& exit_failure(std::ostream& os) {
+  os << std::endl;
+  exit(EXIT_FAILURE);
+  return os;
+}  // util::exit_failure
+
+int main(int argc, char** argv) 
+{
+  std::ios::sync_with_stdio(false);
+
+  loss_type ltype = log_loss;
+  double c0 = 2.0;
+  double c00 = 1.0;
+  double p = 2.0;
+  double r = 0.0;
+  double s = 1.0;
+  double tol = 1e-5;
+  double Pyx_factor = 0.0;
+  bool Px_propto_g = false;
+  int maxruns = 10;
+  int nseparators = 1;
+  std::string  feat_file, weights_file, eval_file, eval2_file;
+  int opt;
+  while ((opt = getopt(argc, argv, "hd:c:C:p:r:s:t:l:F:Gm:n:f:o:e:x:")) != -1) 
+    switch (opt) {
+    case 'h':
+      std::cerr << usage << exit_failure;
+      break;
+    case 'd':
+      debug_level = atoi(optarg);
+      break;
+    case 'c':
+      c0 = atof(optarg);
+      break;
+    case 'C':
+      c00 = atof(optarg);
+      break;
+    case 'p':
+      p = atof(optarg);
+      break;
+    case 'r':
+      r = atof(optarg);
+      break;
+    case 's':
+      s = atof(optarg);
+      break;
+    case 't':
+      tol = atof(optarg);
+      break;
+    case 'l':
+      ltype = loss_type(atoi(optarg));
+      break;
+    case 'F':
+      Pyx_factor = atof(optarg);
+      break;
+    case 'G':
+      Px_propto_g = true;
+      break;
+    case 'm':
+      maxruns = atoi(optarg);
+      break;
+    case 'n':
+      nseparators = atoi(optarg);
+      break;
+    case 'f':
+      feat_file = optarg;
+      break;
+    case 'o':
+      weights_file = optarg;
+      break;
+    case 'e':
+      eval_file = optarg;
+      break;
+    case 'x':
+      eval2_file = optarg;
+      break;
+    }
+
+  if (debug_level >= 10)
+    std::cerr << "#  ltype -l = " << ltype
+	      << " (" << loss_type_name[ltype] << ")"
+	      << " regularization -c = " << c0
+	      << ", c00 -C = " << c00
+	      << ", power -p = " << p 
+	      << ", scale -s = " << s
+	      << ", tol -t = " << tol
+	      << ", random init -r = " << r 
+	      << ", Pyx_factor -F = " << Pyx_factor
+	      << ", Px_propto_g -G = " << Px_propto_g
+	      << ", maxruns -m = " << maxruns
+	      << ", nseparators -n = " << nseparators
+	      << ", feat_file -f = " << feat_file
+	      << ", weights_file -o = " << weights_file
+	      << ", eval_file -e = " << eval_file
+	      << ", eval2_file -x = " << eval2_file
+	      << std::endl;
+
+  // I discovered a couple of years after I wrote this program that popen
+  // uses fork, which doubles your virtual memory for a short instant!
+
+  Estimator1 e(ltype, c0, c00, p, r, s, tol, true, weights_file);
+
+  if (!feat_file.empty())
+    e.read_featureclasses(feat_file.c_str(), nseparators, ":");  
+
+  // Read in eval data first, as that way we may squeeze everything into 4GB
+    
+  corpusflags_type corpusflags = { Pyx_factor, Px_propto_g };
+
+  corpus_type* evaldata = NULL;
+  if (!eval_file.empty()) {
+    evaldata = read_corpus_file(&corpusflags, eval_file.c_str());
+    if (debug_level >= 10)
+      std::cerr << "# read eval_file = " << eval_file 
+		<< ", nsentences = " << evaldata->nsentences
+		<< std::endl;
+  }
+
+  corpus_type* evaldata2 = NULL;
+  if (!eval2_file.empty()) {
+    evaldata2 = read_corpus_file(&corpusflags, eval2_file.c_str());
+    if (debug_level >= 10)
+      std::cerr << "# read eval2_file = " << eval2_file
+		<< ", nsentences = " << evaldata2->nsentences
+		<< std::endl;
+  }
+
+  corpus_type* traindata = read_corpus(&corpusflags, stdin);
+  int nx = traindata->nfeatures;
+
+  if (errno != 0) {
+    perror("## cvlm-owlqn, after reading main corpus, nonzero errno  ");
+    errno = 0;
+  }
+
+  std::cerr << "# " << nx << " features in training data, " << resource_usage() << std::endl;
+
+  if (evaldata == NULL)
+    evaldata = traindata;
+
+  e.set_data(traindata, evaldata, evaldata2);
+  e.estimate(maxruns);
+
+}  // main()
diff --git a/second-stage/programs/wlle/powell.h b/second-stage/programs/wlle/powell.h
new file mode 100644
index 0000000..0f80e21
--- /dev/null
+++ b/second-stage/programs/wlle/powell.h
@@ -0,0 +1,575 @@
+//! powell.h
+//!
+//! Powell's algorithm for minimizing an n-dimensional function
+//! without derivatives, based on Numerical Recipes in C++ code.
+//!
+//! Mark Johnson, 29th September 2002, last modified 14th August 2008
+//!
+//! The main routine is powell::minimize().  Its calling sequence
+//! is:
+//!
+//! Float minimize(              // => value of f at minimum
+//!                p,            // <= starting x value, => minimizing x value
+//!                f,            // <= function object being minimized
+//!                initial_step, // <= initial step length 
+//!                cntrl)        // <= control{} structure
+//!
+//!
+//! There is also a specialized version for 1-d minimization
+//!
+//! Float minimize1d(              // => minimizing x value
+//!                  p,            // <= starting x value
+//!                  f,            // <= function object being minimized
+//!                  initial_step, // <= initial step length 
+//!                  cntrl)        // <= control{} structure
+
+#ifndef POWELL_H
+#define POWELL_H
+
+#include <cassert>
+#include <cmath> 
+#include <cstdlib>
+#include <iomanip>
+#include <iostream>
+#include <limits>
+#include <vector>
+
+namespace powell {
+
+  typedef double Float;
+  typedef std::vector<Float> Floats;
+  typedef std::vector<Floats> Array;
+
+  template <typename T>
+  inline void error_abort(const T& t) {
+    std::cerr << "Error in powell.h: " << t << std::endl;
+    abort();
+  }
+
+  //! The control class gives the user a way to monitor and
+  //! control the minimization process.  You can specialize its
+  //! behaviour by deriving your own monitor from this class
+  //! and changing its behaviour.
+  //
+  class control {
+
+  private:
+    Float tol;	       	     //!< Stopping function tolerance
+    Float linmin_tol;  	     //!< Line minimization stopping tolerance
+    int max_nfeval;	     //!< Max number of function evaluations
+    int linmin_max_nfeval_;  //!< Max number of function evaluations in brent()
+    int debug;               //!< controls tracing, etc
+
+  public:
+    Float linmin_rel_tol() const { return linmin_tol; }
+    Float linmin_abs_tol() const { return linmin_tol; }
+    int linmin_max_nfeval() const { return linmin_max_nfeval_; }
+    Float linmin_xinit() const { return 1.0; }
+    size_t cache_size() const { return 20; }        //!< function cache size
+
+    // control() sets the defaults for the program
+    //
+    control(Float tol = 1e-7,            //!< stopping tolerance
+	    Float linmin_tol = 1e-7,	 //!< line minimization tolerance
+	    int max_nfeval = 0,          //!< max number of fn evalns, 
+	                                 //!<  if neg, halt w/ error
+	    int linmin_max_nfeval_ = 0,  //!< max number of fn evalns 
+	                                 //!<  per line minimization
+	    size_t debug = 0) 
+      : tol(tol),
+	linmin_tol(linmin_tol),
+	max_nfeval(max_nfeval),
+	linmin_max_nfeval_(linmin_max_nfeval_),
+	debug(debug)
+    { }  // control()
+
+    //! operator() is called at each powell iteration
+    //! If operator() returns true, the powell routine halts.
+    //! Specialize its behaviour if you want custom tracing
+    //
+    bool operator() (const Floats& ps,
+		     const Float fx, const Float fx_last,
+		     const int iteration,
+		     const int nfeval) const 
+    {
+      const Float TINY = 1.0e-25;
+      if (debug >= 1000) {  // print tracing information
+	if (iteration == 1)
+	  std::cout << "#" << std::setw(7) << "iter" << std::setw(10) << "f(x)" 
+		    << std::setw(10) << "nfeval" << std::endl;
+	std::cout << std::setw(8) << iteration << ' ' << std::setw(9) << fx
+		  << ' ' << std::setw(9) << nfeval << std::endl;
+
+	if (debug >= 100000) {
+	  std::cout << "  x = (" << ps[0];
+	  for (size_t j = 1; j < ps.size(); ++j)
+	    std::cerr << ' ' << ps[j];
+	  std::cerr << ')' << std::endl;
+	}
+      }
+
+      if (fx_last < fx)
+	std::cerr << " *** Error in powell::powell() iteration " << iteration 
+		  << ", fx = " << fx << " > fx_last = " << fx_last << std::endl;
+      
+      // This is the termination criterion used by Numerical Recipies
+      //
+      if (2.0*fabs(fx-fx_last) <= tol*(fabs(fx_last)+fabs(fx))+TINY)
+	return true;
+
+      if (max_nfeval != 0 && nfeval >= abs(max_nfeval)) {
+	if (max_nfeval > 0) {
+	  if (debug >= 100)
+	    std::cerr << "*** powell::powell() returning at iteration " << iteration
+		      << " in with error " << fx_last-fx 
+		      << std::endl;
+	  return true;
+	}
+	else
+	  error_abort("Too many iterations in powell::powell");
+      }
+
+      return false;
+    }  // operator()
+  };  // control{}
+
+
+  template <typename T>
+  inline T SQR(T x) { return x*x; }
+
+  inline void shft3(Float& a, Float& b, Float& c, const Float& d)
+  {
+    a=b;
+    b=c;
+    c=d;
+  }
+
+  inline Float sign(Float a, Float b) {
+    return ((b) > 0.0 ? fabs(a) : -fabs(a));
+  }
+
+  inline void shift(Float& a, Float& b, Float& c, Float d) { 
+    a = b; b = c; c = d; 
+  }
+
+  //! mnbrak()
+  //!
+  //! Given a function f, and given distinct initial points ax and bx,
+  //! this routine searches in the downhill direction (defined by the
+  //! function as evaluated at the initial points) and returns new points
+  //! ax, bx, cx that bracket a minimum of the function. Also returned
+  //! are the function values at the three points, fa, fb, and fc.
+  //!
+  //! The arguments to f() are:
+  //!  x     <= point at which to evaluate f
+  //!  df    => derivative of f at x
+  //!  f()   => value of f at x
+  //!
+  //! Here GOLD is the default ratio by which successive intervals are
+  //! magnified; GLIMIT is the maximum magnification allowed for a
+  //! parabolic-fit step.
+  //
+  template <typename F>
+  void mnbrak(Float& ax, Float& bx, Float& cx, 
+	      Float& fa, Float& fb, Float& fc, 
+	      F& f) 
+  { 
+    const float GOLD = 1.618034;
+    const float GLIMIT = 100.0;
+    const double TINY = 1.0e-20;
+
+    Float ulim, u, r, q, fu;
+    fa = f(ax); 
+    fb = f(bx); 
+    if (fb > fa) { 
+      // Switch roles of a and b so that we can go downhill in the
+      // direction from a to b. 
+      std::swap(ax, bx);
+      std::swap(fa, fb);
+    } 
+    cx = bx+GOLD*(bx-ax);       // First guess for c.
+    fc = f(cx); 
+    while (fb > fc) {           // Keep returning here until we bracket.
+      r = (bx-ax)*(fb- fc); 
+      // Compute u by parabolic extrapolation from a; b; c. 
+      // TINY is used to prevent any possible division by zero. 
+      q = (bx-cx)*(fb-fa); 
+      u = bx-((bx-cx)*q-(bx-ax)*r)/(2.0*sign(std::max(fabs(q-r),TINY),q-r)); 
+      ulim = bx+GLIMIT*(cx-bx); 
+      // We won't go farther than this. Test various possibilities: 
+      if ((bx-u)*(u-cx) > 0.0) { 
+	// Parabolic u is between b and c: try it.
+	fu = f(u); 
+	if (fu < fc) { 
+	  // Got a minimum between b and c. 
+	  ax = bx; 
+	  bx = u; 
+	  fa = fb; 
+	  fb = fu; 
+	  return; 
+	} 
+	else if (fu > fb) { 
+	  // Got a minimum between between a and u. */
+	  cx = u; 
+	  fc = fu; 
+	  return; 
+	} 
+	u = cx+GOLD*(cx-bx); 
+	// Parabolic fit was no use. Use default magnification. 
+	fu = f(u); 
+      } 
+      else if ((cx-u)*(u-ulim) > 0.0) { 
+	// Parabolic fit is between c and its allowed limit. 
+	fu = f(u); 
+	if (fu < fc) { 
+	  bx = cx; cx = u; u = cx+GOLD*(cx-bx);
+	  fb = fc; fc = fu; fu = f(u);
+	} 
+      } 
+      else if ((u-ulim)*(ulim-cx) >= 0.0) { 
+	// Limit parabolic u to maximum allowed value. 
+	u = ulim; 
+	fu = f(u); 
+      } 
+      else { 
+	// Reject parabolic u, use default magnification. 
+	u = cx+GOLD*(cx-bx); 
+	fu = f(u); 
+      } 
+      // Eliminate oldest point and continue. 
+      ax = bx; bx = cx; cx = u;
+      fa = fb; fb = fc; fc = fu;
+    } 
+  } // mnbrak()
+
+  //! brent() was modified so that fxmin is the smallest f() seen so far,
+  //! and xmin is the value of x at which fxmin was encountered.
+  //
+  template <typename F, typename C>
+  Float brent(const Float ax, const Float bx, const Float cx, F& f,
+	      C& control, Float &xmin)
+  {
+    const Float CGOLD = 0.3819660;     // Golden ratio
+    Float a, b, d=0.0, etemp, fu, fv, fw, fx;
+    Float p, q, r, u, v, w, x, xm;
+    Float e=0.0;
+    Float fxmin;
+
+    a = (ax < cx ? ax : cx);
+    b = (ax > cx ? ax : cx);
+    fw = fv = fx = f(x = w = v = bx);
+    xmin = x;
+    fxmin = fx;
+    size_t max_nfeval = f.nfeval() + control.linmin_max_nfeval();
+    for (size_t iter = 0; iter < 200; iter++) {
+      xm = 0.5*(a+b);
+      Float tol1 = control.linmin_rel_tol() * fabs(x) + control.linmin_abs_tol();
+      Float tol2 = 2.0*tol1;
+      if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
+	// xmin = x;
+	// return fx;
+	return fxmin;
+      }
+      if (control.linmin_max_nfeval() != 0 && f.nfeval() >= max_nfeval) {
+	// xmin = x;
+	// return fx;
+	return fxmin;
+      }
+      if (fabs(e) > tol1) {
+	r = (x-w)*(fx-fv);
+	q = (x-v)*(fx-fw);
+	p = (x-v)*q-(x-w)*r;
+	q = 2.0*(q-r);
+	if (q > 0.0) 
+	  p = -p;
+	q = fabs(q);
+	etemp=e;
+	e=d;
+	if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
+	  d=CGOLD*(e=(x >= xm ? a-x : b-x));
+	else {
+	  d=p/q;
+	  u=x+d;
+	  if (u-a < tol2 || b-u < tol2)
+	    d=sign(tol1,xm-x);
+	}
+      } else {
+	d=CGOLD*(e=(x >= xm ? a-x : b-x));
+      }
+      u=(fabs(d) >= tol1 ? x+d : x+sign(tol1,d));
+      fu=f(u);
+      if (fu < fxmin) {
+	fxmin = fu;
+	xmin = u;
+      }
+      if (fu <= fx) {
+	if (u >= x) a=x; else b=x;
+	shift(v, w, x, u);
+	shift(fv, fw, fx, fu);
+      } else {
+	if (u < x) a=u; else b=u;
+	if (fu <= fw || w == x) {
+	  v=w;
+	  w=u;
+	  fv=fw;
+	  fw=fu;
+	} else if (fu <= fv || v == x || v == w) {
+	  v=u;
+	  fv=fu;
+	}
+      }
+    }
+    // xmin = x;
+    // return fx;
+    return fxmin;
+  }  // brent()
+
+  template <typename F>
+  struct f1cache {
+    const size_t cache_size; //!< number of function evaluations to cache
+    F& f;                    //!< original function 
+    Floats xs;               //!< cached x values
+    Floats fs;               //!< cached f vector
+    size_t last;             //!< last position inserted
+    size_t nfeval_;          //!< number of function evaluations
+
+    f1cache(F& f, size_t cache_size=2) 
+      : cache_size(cache_size), f(f), last(cache_size-1), nfeval_(0) { }
+
+    Float operator() (Float x) {
+      for (size_t i = 0; i < cache_size; ++i) {
+	if (i >= xs.size()) {
+	  Float fx = f(x);
+	  xs.push_back(x);
+	  fs.push_back(fx);
+	  ++nfeval_;
+	  return fx;
+	}
+	if (x == xs[i]) 
+	  return fs[i];
+      }
+      Float fx = f(x);
+      if (++last >= xs.size())
+	last = 0;
+      xs[last] = x;
+      fs[last] = fx;
+      ++nfeval_;
+      return fx;
+    } // f1cache::operator()
+
+    size_t nfeval() const { return nfeval_; }
+
+  };  // f1cache{}
+    
+  //! minimize1d() is the 1d-minimization routine.  It returns the minimizing x value.
+  //
+  template <typename F>
+  Float minimize1d(Float p, F& f, Float initial_step = 1.0, Float tol=1e-7, int max_nfeval=100)
+  {
+    control c(tol, tol, max_nfeval, max_nfeval);
+    f1cache<F> func(f, c.cache_size());
+    Float a = p;
+    Float x = a + initial_step;
+    Float b, fa, fx, fb, xmin;
+
+    mnbrak(a, x, b, fa, fx, fb, func);
+    brent(a, x, b, func, c, xmin);  // brent() returns min_f, if you want it
+    return xmin;
+  }  // minimize1d()
+
+  //! linmin_f{} maps the n-dimensional function into a 1-d function 
+  //!  in direction p
+  //
+  template <typename F>
+  struct linmin_f {      /* structure to communicate with linmin */
+    F& f;
+    Floats& p;
+    Floats& xi;
+    Floats xt;
+    
+    linmin_f(F& f, Floats& p, Floats& xi) 
+      : f(f), p(p), xi(xi), xt(p.size()) { }
+  
+    Float operator() (Float x) {
+      const size_t n = p.size();
+      for (size_t j = 0; j < n; ++j)
+	xt[j] = p[j] + x*xi[j];
+      Float fx = f(xt);
+      return fx;
+    }
+
+    size_t nfeval() const { return f.nfeval; }
+
+  };  // linmin_f{}
+  
+  template <typename F, typename C>
+  Float linmin(Floats &p, Floats &xi, F& f, C& control)
+  {
+    assert(p.size() == xi.size());
+    linmin_f<F> f1dim(f, p, xi);
+
+    Float a = 0.0;
+    Float x = control.linmin_xinit();
+    Float b, fa, fx, fb, xmin;
+
+    mnbrak(a, x, b, fa, fx, fb, f1dim);
+    Float fret = brent(a, x, b, f1dim, control, xmin);
+    for (size_t j = 0; j < p.size(); j++) {
+      xi[j] *= xmin;
+      p[j] += xi[j];
+    }
+    return fret;
+  }  // linmin()
+
+  template <typename F>
+  struct fcache {
+    typedef std::vector<Floats> Fss;
+    const size_t cache_size; //!< number of function evaluations to cache
+    F& f;                    //!< original function 
+    Fss xs;                  //!< cached x vectors
+    Floats fs;               //!< cached f vector
+    size_t last;             //!< last position inserted
+    size_t nfeval;           //!< number of function evaluations
+
+    fcache(F& f, size_t cache_size=2) 
+      : cache_size(cache_size), f(f), last(cache_size-1), nfeval(0) { }
+
+    Float operator() (const Floats& x) {
+      for (size_t i = 0; i < cache_size; ++i) {
+	if (i >= xs.size()) {
+	  Float fx = f(x);
+	  xs.push_back(x);
+	  fs.push_back(fx);
+	  ++nfeval;
+	  return fx;
+	}
+	if (x == xs[i]) 
+	  return fs[i];
+      }
+      Float fx = f(x);
+      if (++last >= xs.size())
+	last = 0;
+      xs[last] = x;
+      fs[last] = fx;
+      ++nfeval;
+      return fx;
+    } // fcache::operator()
+
+  };  // fcache{}
+
+  // Minimization of a function func of n variables. Input consists of an initial 
+  // starting point p[1..n]; an initial matrix xi[1..n][1..n], whose columns contain 
+  // the initial set of directions (usually the n unit vectors); and ftol, the 
+  // fractional tolerance in the function value such that failure to decrease by 
+  // more than this amount on one iteration signals doneness. On output, p is set 
+  // to the best point found, xi is the then-current direction set, fret is the 
+  // returned function value at p, and iter is the number of iterations taken. 
+  // The routine linmin is used.
+  //
+  template <typename F, typename Control>
+  Float minimize_n(Floats& p, F& f, Float initial_step, Control& cntrl)
+  {
+    fcache<F> func(f, cntrl.cache_size());
+    size_t n = p.size();
+    
+    Array xi(n);                 	// array of conjugate directions
+    for (size_t i = 0; i < n; ++i) {  	// initialize to unit vectors
+      xi[i].resize(n);
+      xi[i][i] = initial_step;
+    }
+
+    Float del,fp,fptt,t;
+
+    Floats pt(p);			// Save the initial point
+    Floats ptt(n), xit(n);
+
+    Float fret = func(p);
+
+    for (size_t iter = 1; ; ++iter) {
+      fp = fret;
+      size_t ibig=0;
+      del = 0.0;				// Will be the biggest function decrease.
+      for (size_t i = 0; i < n; i++) {		// In each iteration, loop over all directions.
+	for (size_t j = 0; j < n; j++)		// Copy the direction,
+	  xit[j] = xi[j][i];
+	fptt = fret;
+	fret = linmin(p, xit, func, cntrl); 	//  minimize along it,
+	if (fptt-fret > del) {			//  and record it if it is the largest decrease
+	  del = fptt-fret;			//   so far.
+	  ibig = i+1;
+	}
+      }
+
+      if (cntrl(p, fret, fp, iter, func.nfeval))
+	return fret;				// Normal termination
+
+      for (size_t j = 0; j < n; j++) {	// Construct the extrapolated point and the
+	ptt[j] = 2.0*p[j]-pt[j];	//  average direction moved. Save the
+	xit[j] = p[j]-pt[j];		//  old starting point.
+	pt[j] = p[j];
+      }
+
+      fptt = func(ptt);			// Function value at extrapolated point.
+
+      if (fptt < fp) {
+	t=2.0*(fp-2.0*fret+fptt)*SQR(fp-fret-del)-del*SQR(fp-fptt);
+	if (t < 0.0) {
+	  fret = linmin(p, xit, func, cntrl);
+	  for (size_t j = 0; j < n; j++) {
+	    xi[j][ibig-1] = xi[j][n-1];
+	    xi[j][n-1] = xit[j];
+	  }
+	}
+      }
+    }
+  }  // minimize_n()
+
+  //! minimize() calls minimize_n() if p.size() > 1, otherwise it does a one-dimensional
+  //! minimization.
+  //
+  template <typename F, typename Control>
+  Float minimize(Floats& p, F& f, Float initial_step, Control& control) {
+    assert(p.size() > 0);
+    if (p.size() > 1)
+      return minimize_n(p, f, initial_step, control);
+    else {    // 1-d minimization
+      fcache<F> func(f, control.cache_size());
+      Floats xi(1, 1);
+      return linmin(p, xi, func, control);
+    }
+  } // minimize()
+    
+  //! minimize() is a wrapper that calls the main minimization routine with a default control.
+  //
+  template <typename F>
+  Float minimize(Floats& p, F& f, Float initial_step = 1.0)
+  {
+    control c;
+    return minimize(p, f, initial_step, c);
+  }  // minimize()
+
+} //  namespace powell
+
+#endif // POWELL_H
+
+/*
+
+// Code for testing the 1-d minimization routines
+
+#include <cmath>
+#include "powell.h"
+
+struct f_type {
+  double operator() (double x) const {
+    double fx = sin(x);
+    std::cout << "x = " << x << ", fx = " << fx << std::endl;
+    return fx;
+  }
+};
+
+int main(int argc, char** argv) {
+  f_type f;
+  double xmin = powell::minimize1d(0, f, 0.1, 1e-5, 10);
+  std::cout << "xmin = " << xmin << std::endl;
+}
+*/
diff --git a/second-stage/programs/wlle/utility.h b/second-stage/programs/wlle/utility.h
index a6aa90c..19f48bc 100644
--- a/second-stage/programs/wlle/utility.h
+++ b/second-stage/programs/wlle/utility.h
@@ -894,6 +894,14 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr<T>& s
 struct resource_usage { };
 
 #ifndef __i386
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef __APPLE__
+#define NO_PROC_SELF_STAT
+#endif
+
+#ifdef NO_PROC_SELF_STAT
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   return os;
@@ -902,21 +910,24 @@ inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 inline std::ostream& operator<< (std::ostream& os, resource_usage r)
 {
   FILE* fp = fopen("/proc/self/stat", "r");
-  assert(fp);
-  int utime;
-  int stime;
-  unsigned int vsize;
-  unsigned int rss;
-  int result = 
-    fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
-           "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
-  assert(result == 4);
-  fclose(fp);
-  // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss
-;
-  // return s << "utime = " << utime << ", vsize = " << vsize;
-  return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
-	    << float(vsize)/1048576.0 << " Mb.";
+  // Don't fail if we can't read that (such as on a Mac), just return.
+  if (fp == NULL) {
+     return os;
+  } else {
+    int utime;
+    int stime;
+    unsigned int vsize;
+    unsigned int rss;
+    int result = 
+      fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d"
+             "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss);
+    assert(result == 4);
+    fclose(fp);
+    // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss;
+    // return s << "utime = " << utime << ", vsize = " << vsize;
+    return os << "utime " << float(utime)/1.0e2 << "s, vsize " 
+	      << float(vsize)/1048576.0 << " Mb.";
+  }
 }
 #endif