diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d9be4e5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ +*_wrapper.cxx +*.a +*.class +*.d +*.dep +*.o +*.orig +*.py[co] +*.so +*.swp + +/SParseval +/tmp + +evalb/evalb + +build* +dist* + +MANIFEST +regression-test-* +tags +TAGS + +python/bllipparser/CharniakParser.py +python/bllipparser/JohnsonReranker.py + +first-stage/PARSE/evalTree +first-stage/PARSE/parseAndEval +first-stage/PARSE/parseIt +first-stage/PARSE/parser_wrapper.C +first-stage/PARSE/swig/*/build/* +first-stage/PARSE/swig/*/lib/* +first-stage/TRAIN/iScale +first-stage/TRAIN/kn3Counts +first-stage/TRAIN/pSfgT +first-stage/TRAIN/pSgT +first-stage/TRAIN/pTgNt +first-stage/TRAIN/pUgT +first-stage/TRAIN/rCounts +first-stage/TRAIN/selFeats +first-stage/TRAIN/trainRs + +/second-stage/nbest + +second-stage/programs/*/read-tree.cc +second-stage/programs/eval-beam/main +second-stage/programs/eval-weights/eval-weights +second-stage/programs/features/best-*parses +second-stage/programs/features/count-*features +second-stage/programs/features/extract-*features +second-stage/programs/features/oracle-score +second-stage/programs/features/parallel-extract-nfeatures +second-stage/programs/features/parallel-extract-spfeatures +second-stage/programs/features/reranker_wrapper.C +second-stage/programs/features/swig/*/build/* +second-stage/programs/features/swig/*/lib/* +second-stage/programs/prepare-data/copy-trees-ss +second-stage/programs/prepare-data/prepare-ec-data +second-stage/programs/prepare-data/prepare-ec-data100 +second-stage/programs/prepare-data/prepare-new-data +second-stage/programs/prepare-data/ptb +second-stage/programs/wlle/avper +second-stage/programs/wlle/cvlm +second-stage/programs/wlle/cvlm-lbfgs +second-stage/programs/wlle/gavper +second-stage/programs/wlle/oracle diff --git a/Makefile b/Makefile index 4c0c390..0e14311 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ # # The following high-level goals may also be useful: # -# make nbestrain-clean # removes temporary files used in nbesttrain +# make nbesttrain-clean # removes temporary files used in nbesttrain # make nbest-oracle # oracle evaluation of n-best results # make features # extracts features from 20-fold parses # make train-reranker # trains reranker model @@ -68,12 +68,12 @@ # Version 4.1 and later gcc permit -march=native, but older # versions will need -march=pentium4 or -march=opteron # -# GCCFLAGS = -march=native -mfpmath=sse -msse2 -mmmx -m32 +# GCCFLAGS ?= -march=native -mfpmath=sse -msse2 -mmmx -m32 # CFLAGS is used for all C and C++ compilation # CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) -LDFLAGS = $(GCCLDFLAGS) + EXEC = time # for SWIG wrappers, use these flags instead @@ -88,11 +88,16 @@ EXEC = time # LDFLAGS = -g -Wall $(GCCLDFLAGS) # EXEC = valgrind -CXXFLAGS = $(CFLAGS) -Wno-deprecated +CXXFLAGS ?= $(CFLAGS) -Wno-deprecated export CFLAGS export CXXFLAGS export LDFLAGS +CC ?= gcc +CXX ?= g++ +export CC +export CXX + # Building the 20-fold training data with nbesttrain # -------------------------------------------------- @@ -101,7 +106,7 @@ export LDFLAGS # # PENNWSJTREEBANK must be set to the base directory of the Penn WSJ Treebank # -PENNWSJTREEBANK=/usr/local/data/Penn3/parsed/mrg/wsj/ +PENNWSJTREEBANK ?= /usr/local/data/Penn3/parsed/mrg/wsj/ # NPARSES is the number of alternative parses to consider for each sentence # @@ -190,6 +195,9 @@ FEATURESNICKNAME=sp # of these variable values can be found in the train-eval-reranker.sh # script. # + +ifndef USE_OLD_NONLBFGS_ESTIMATOR +# Newer estimator that depends on liblbfgs: ESTIMATOR=second-stage/programs/wlle/cvlm-lbfgs # ESTIMATORFLAGS are flags given to the estimator @@ -200,6 +208,22 @@ ESTIMATORFLAGS=-l 1 -c 10 -F 1 -n -1 -p 2 # ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 +else +# Older estimator that doesn't depend on liblbfgs: +ESTIMATOR=second-stage/programs/wlle/cvlm-owlqn + +# ESTIMATORFLAGS are flags given to the estimator +# +# These flags are for cvlm-owlqn: +ESTIMATORFLAGS=-l 1 -c 10 -F 1 -d 10 -n -1 +# The equivalent ESTIMATORFLAGS for cvlm: +# ESTIMATORFLAGS=-l 1 -c0 10 -Pyx_factor 1 -debug 10 -ns -1 + +# ESTIMATORNICKNAME is used to name the feature weights file +# +ESTIMATORNICKNAME=cvlm-l1c10P1 +endif + # ESTIMATORSTACKSIZE is the size (in KB) of the per-thread stacks # used during estimation # @@ -517,11 +541,14 @@ train-reranker: $(WEIGHTSFILEGZ) # This goal estimates the reranker feature weights (i.e., trains the # reranker). # +# Don't use auto-renaming as in "gzip foo" because it fails if there is +# more than one hardlink on the file (I'm looking at you Time Machine!). +# # $(WEIGHTSFILEGZ): $(ESTIMATOR) $(WEIGHTSFILEGZ): $(ESTIMATOR) $(MODELDIR)/features.gz $(FEATDIR)/train.gz $(FEATDIR)/dev.gz $(FEATDIR)/test1.gz $(ESTIMATORENV) $(ZCAT) $(FEATDIR)/train.gz | $(EXEC) $(ESTIMATOR) $(ESTIMATORFLAGS) -e $(FEATDIR)/dev.gz -f $(MODELDIR)/features.gz -o $(WEIGHTSFILE) -x $(FEATDIR)/test1.gz - rm -f $(WEIGHTSFILEGZ) - gzip $(WEIGHTSFILE) + gzip -c $(WEIGHTSFILE) >$(WEIGHTSFILEGZ) + rm -f $(WEIGHTSFILE) ######################################################################## # # diff --git a/Makefile.here b/Makefile.here new file mode 100644 index 0000000..c2485d5 --- /dev/null +++ b/Makefile.here @@ -0,0 +1,6 @@ +# export MAKEFILES=/projects/SSTP/MYBLLIP/bllip-parser/Makefile.here + +PENNWSJTREEBANK=/corpora/LDC/LDC99T42/RAW/parsed/mrg/wsj + +USE_OLD_NONLBFGS_ESTIMATOR=1 + diff --git a/Makefile.mac b/Makefile.mac new file mode 100644 index 0000000..5c455bd --- /dev/null +++ b/Makefile.mac @@ -0,0 +1,68 @@ +# To use these defaults set the MAKEFILES environment variable when calling make. +# export MAKEFILES=`pwd`/Makefile.mac + +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') + +# For Mavericks (and Mountain Lion) I set up gcc using macports: +# sudo port install gcc47 +# sudo port select --set gcc mp-gcc47 +# sudo port install boost liblbfgs + +# Using MacPorts means that we have to override the default include and library locations. +LD_INCLUDE_PATH=/opt/local/include +LD_LIBRARY_PATH=/opt/local/lib + +export LD_INCLUDE_PATH +export LD_LIBRARY_PATH + +# The SParseval makefile uses a -lm dependency (a bad idea imho) which fails because there +# is no libm.a to be used. This trick works by mapping that to the system's libm.dylib. +# .LIBPATTERNS+=lib%.dylib +# export .LIBPATTERNS + +# On Mac OS X using -march=native doesn't seem to work (a compilation error will occur). +# Turns out there is a problem with AVX instructions on OSX for gcc after 4.2. +# http://stackoverflow.com/questions/12016281/g-no-such-instruction-with-avx +# http://mac-os-forge.2317878.n4.nabble.com/gcc-as-AVX-binutils-and-MacOS-X-10-7-td144472.html +# So here's what works for me (with or without the -mfpmath=sse - the default is 387): + +GCCFLAGS = -m64 -march=x86-64 -mfpmath=sse -msse -msse2 -msse3 -msse4 -msse4.1 -msse4.2 -mssse3 -I${LD_INCLUDE_PATH} + +# Must use export because otherwise second-stage/programs/wlle/Makefile doesn't get the message. +export GCCFLAGS + +# CC = condor_compile gcc +CC = gcc +export CC + +# CXX = condor_compile g++ +CXX = g++ +export CXX + +# fast options +# Compilation help: you may need to remove -march=native on older compilers. +# GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx +FOPENMP=-fopenmp +# CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) +# LDFLAGS=$(FOPENMP) -L/opt/local/lib + +# debugging options +# GCCFLAGS= +# FOPENMP= +# CFLAGS=-MMD -O0 -g $(GCCFLAGS) $(FOPENMP) +# LDFLAGS=-g $(FOPENMP) +# CXXFLAGS=${CFLAGS} -Wno-deprecated + +# CFLAGS is used for all C and C++ compilation +# +CFLAGS = -MMD -O3 -Wall -ffast-math -finline-functions -fomit-frame-pointer -fstrict-aliasing $(GCCFLAGS) +export CFLAGS + +CXXFLAGS=${CFLAGS} -Wno-deprecated +export CXXFLAGS + +LDFLAGS = -L${LD_LIBRARY_PATH} $(GCCLDFLAGS) +export LDFLAGS + +# This is a handy place to put a local setting without changing Makefile. +# PENNWSJTREEBANK = /usr/local/data/Penn3/parsed/mrg/wsj diff --git a/parse.sh b/parse.sh index 4c7a76c..0bf5668 100755 --- a/parse.sh +++ b/parse.sh @@ -14,5 +14,6 @@ # RERANKDATA=ec50-connll-ic-s5 # RERANKDATA=ec50-f050902-lics5 MODELDIR=second-stage/models/ec50spfinal -ESTIMATORNICKNAME=cvlm-l1c10P1 +# ESTIMATORNICKNAME=cvlm-l1c10P1 +ESTIMATORNICKNAME=lbfgs-l1c10F1n1p2 first-stage/PARSE/parseIt -l399 -N50 first-stage/DATA/EN/ $* | second-stage/programs/features/best-parses -l $MODELDIR/features.gz $MODELDIR/$ESTIMATORNICKNAME-weights.gz diff --git a/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz b/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz index 97b917d..1eb900f 100644 Binary files a/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz and b/second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz differ diff --git a/second-stage/models/ec50spfinal/features.gz b/second-stage/models/ec50spfinal/features.gz index d24dc76..7544432 100644 Binary files a/second-stage/models/ec50spfinal/features.gz and b/second-stage/models/ec50spfinal/features.gz differ diff --git a/second-stage/programs/eval-beam/utility.h b/second-stage/programs/eval-beam/utility.h index 58db9f4..8cfe57a 100644 --- a/second-stage/programs/eval-beam/utility.h +++ b/second-stage/programs/eval-beam/utility.h @@ -891,24 +891,42 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr& s struct resource_usage { }; +#ifndef __i386 +#define NO_PROC_SELF_STAT +#endif + +#ifdef __APPLE__ +#define NO_PROC_SELF_STAT +#endif + +#ifdef NO_PROC_SELF_STAT +inline std::ostream& operator<< (std::ostream& os, resource_usage r) +{ + return os; +} +#else // Assume we are on a 586 linux inline std::ostream& operator<< (std::ostream& os, resource_usage r) { FILE* fp = fopen("/proc/self/stat", "r"); - assert(fp); - int utime; - int stime; - unsigned int vsize; - unsigned int rss; - int result = - fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" - "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); - assert(result == 4); - fclose(fp); - // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss -; - // return s << "utime = " << utime << ", vsize = " << vsize; - return os << "utime " << float(utime)/1.0e2 << "s, vsize " - << float(vsize)/1048576.0 << " Mb."; + // Don't fail if we can't read that (such as on a Mac), just return. + if (fp == NULL) { + return os; + } else { + int utime; + int stime; + unsigned int vsize; + unsigned int rss; + int result = + fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" + "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); + assert(result == 4); + fclose(fp); + // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss; + // return s << "utime = " << utime << ", vsize = " << vsize; + return os << "utime " << float(utime)/1.0e2 << "s, vsize " + << float(vsize)/1048576.0 << " Mb."; + } } +#endif #endif // UTILITY_H diff --git a/second-stage/programs/eval-weights/Makefile b/second-stage/programs/eval-weights/Makefile index b6dbd65..a5612f6 100644 --- a/second-stage/programs/eval-weights/Makefile +++ b/second-stage/programs/eval-weights/Makefile @@ -14,7 +14,7 @@ SOURCES = best-indices.cc best-parse.cc best-parses.cc compare-models.cc data.c TARGETS = eval-weights # best-indices best-parse best-parses compare-models pretty-print OBJECTS = $(patsubst %.l,%.o,$(patsubst %.c,%.o,$(SOURCES:%.cc=%.o))) -CC = gcc +CC ?= gcc all: $(TARGETS) diff --git a/second-stage/programs/eval-weights/utility.h b/second-stage/programs/eval-weights/utility.h index 45ef28e..7c816cd 100644 --- a/second-stage/programs/eval-weights/utility.h +++ b/second-stage/programs/eval-weights/utility.h @@ -882,6 +882,14 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr& s struct resource_usage { }; #ifndef __i386 +#define NO_PROC_SELF_STAT +#endif + +#ifdef __APPLE__ +#define NO_PROC_SELF_STAT +#endif + +#ifdef NO_PROC_SELF_STAT inline std::ostream& operator<< (std::ostream& os, resource_usage r) { return os; @@ -890,21 +898,24 @@ inline std::ostream& operator<< (std::ostream& os, resource_usage r) inline std::ostream& operator<< (std::ostream& os, resource_usage r) { FILE* fp = fopen("/proc/self/stat", "r"); - assert(fp); - int utime; - int stime; - unsigned int vsize; - unsigned int rss; - int result = - fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" - "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); - assert(result == 4); - fclose(fp); - // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss -; - // return s << "utime = " << utime << ", vsize = " << vsize; - return os << "utime " << float(utime)/1.0e2 << "s, vsize " - << float(vsize)/1048576.0 << " Mb."; + // Don't fail if we can't read that (such as on a Mac), just return. + if (fp == NULL) { + return os; + } else { + int utime; + int stime; + unsigned int vsize; + unsigned int rss; + int result = + fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" + "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); + assert(result == 4); + fclose(fp); + // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss; + // return s << "utime = " << utime << ", vsize = " << vsize; + return os << "utime " << float(utime)/1.0e2 << "s, vsize " + << float(vsize)/1048576.0 << " Mb."; + } } #endif diff --git a/second-stage/programs/features/utility.h b/second-stage/programs/features/utility.h index 58db9f4..8cfe57a 100644 --- a/second-stage/programs/features/utility.h +++ b/second-stage/programs/features/utility.h @@ -891,24 +891,42 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr& s struct resource_usage { }; +#ifndef __i386 +#define NO_PROC_SELF_STAT +#endif + +#ifdef __APPLE__ +#define NO_PROC_SELF_STAT +#endif + +#ifdef NO_PROC_SELF_STAT +inline std::ostream& operator<< (std::ostream& os, resource_usage r) +{ + return os; +} +#else // Assume we are on a 586 linux inline std::ostream& operator<< (std::ostream& os, resource_usage r) { FILE* fp = fopen("/proc/self/stat", "r"); - assert(fp); - int utime; - int stime; - unsigned int vsize; - unsigned int rss; - int result = - fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" - "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); - assert(result == 4); - fclose(fp); - // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss -; - // return s << "utime = " << utime << ", vsize = " << vsize; - return os << "utime " << float(utime)/1.0e2 << "s, vsize " - << float(vsize)/1048576.0 << " Mb."; + // Don't fail if we can't read that (such as on a Mac), just return. + if (fp == NULL) { + return os; + } else { + int utime; + int stime; + unsigned int vsize; + unsigned int rss; + int result = + fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" + "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); + assert(result == 4); + fclose(fp); + // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss; + // return s << "utime = " << utime << ", vsize = " << vsize; + return os << "utime " << float(utime)/1.0e2 << "s, vsize " + << float(vsize)/1048576.0 << " Mb."; + } } +#endif #endif // UTILITY_H diff --git a/second-stage/programs/prepare-data/utility.h b/second-stage/programs/prepare-data/utility.h index 45ef28e..7c816cd 100644 --- a/second-stage/programs/prepare-data/utility.h +++ b/second-stage/programs/prepare-data/utility.h @@ -882,6 +882,14 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr& s struct resource_usage { }; #ifndef __i386 +#define NO_PROC_SELF_STAT +#endif + +#ifdef __APPLE__ +#define NO_PROC_SELF_STAT +#endif + +#ifdef NO_PROC_SELF_STAT inline std::ostream& operator<< (std::ostream& os, resource_usage r) { return os; @@ -890,21 +898,24 @@ inline std::ostream& operator<< (std::ostream& os, resource_usage r) inline std::ostream& operator<< (std::ostream& os, resource_usage r) { FILE* fp = fopen("/proc/self/stat", "r"); - assert(fp); - int utime; - int stime; - unsigned int vsize; - unsigned int rss; - int result = - fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" - "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); - assert(result == 4); - fclose(fp); - // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss -; - // return s << "utime = " << utime << ", vsize = " << vsize; - return os << "utime " << float(utime)/1.0e2 << "s, vsize " - << float(vsize)/1048576.0 << " Mb."; + // Don't fail if we can't read that (such as on a Mac), just return. + if (fp == NULL) { + return os; + } else { + int utime; + int stime; + unsigned int vsize; + unsigned int rss; + int result = + fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" + "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); + assert(result == 4); + fclose(fp); + // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss; + // return s << "utime = " << utime << ", vsize = " << vsize; + return os << "utime " << float(utime)/1.0e2 << "s, vsize " + << float(vsize)/1048576.0 << " Mb."; + } } #endif diff --git a/second-stage/programs/wlle/Makefile b/second-stage/programs/wlle/Makefile index 75a803c..37e1d9f 100644 --- a/second-stage/programs/wlle/Makefile +++ b/second-stage/programs/wlle/Makefile @@ -10,8 +10,8 @@ # License for the specific language governing permissions and limitations # under the License. -SOURCES = avper.cc cvlm-lbfgs.cc hlm.cc gavper.cc lm.cc lmdata.c oracle.cc wavper.cc wlle.cc # cvlm.cc OWLQN.cpp TerminationCriterion.cpp -TARGETS = avper gavper oracle cvlm-lbfgs # cvlm lm oracle wavper cvlm-owlqn hlm +SOURCES = avper.cc hlm.cc gavper.cc lm.cc lmdata.c oracle.cc wavper.cc wlle.cc cvlm-owlqn.cc OWLQN.cpp TerminationCriterion.cpp # cvlm-lbfgs.cc cvlm.cc +TARGETS = avper gavper oracle cvlm-owlqn # cvlm-lbfgs # cvlm lm oracle wavper cvlm-owlqn hlm OBJECTS = $(patsubst %.cpp,%.o,$(patsubst %.l,%.o,$(patsubst %.c,%.o,$(SOURCES:%.cc=%.o)))) all: $(TARGETS) @@ -46,15 +46,15 @@ libdata.a: data.o liblmdata.a: lmdata.o ar rcv liblmdata.a lmdata.o; ranlib liblmdata.a -CC=gcc +# CC=gcc # fast options # Compilation help: you may need to remove -march=native on older compilers. -GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx +# GCCFLAGS=-march=native -mfpmath=sse -msse2 -mmmx FOPENMP=-fopenmp -CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) -LDFLAGS=$(FOPENMP) -CXXFLAGS=${CFLAGS} -Wno-deprecated +# CFLAGS=-MMD -O3 -ffast-math -fstrict-aliasing -Wall -finline-functions $(GCCFLAGS) $(FOPENMP) +LDFLAGS+=$(FOPENMP) +# CXXFLAGS=${CFLAGS} -Wno-deprecated # debugging options # GCCFLAGS= diff --git a/second-stage/programs/wlle/OWLQN.cpp b/second-stage/programs/wlle/OWLQN.cpp new file mode 100644 index 0000000..df730a3 --- /dev/null +++ b/second-stage/programs/wlle/OWLQN.cpp @@ -0,0 +1,313 @@ +#include "OWLQN.h" + +#include "TerminationCriterion.h" + +#include +#include +#include +#include +#include + +#include + +#define EPSILON 1e-20 + +using namespace std; + +double OptimizerState::dotProduct(const DblVec& a, const DblVec& b) { + double result = 0; + for (size_t i=0; i= 0; i--) { + if (roList[i] > 0) { + alphas[i] = -dotProduct(*sList[i], dir) / roList[i]; + addMult(dir, *yList[i], alphas[i]); + if (lastGoodRo == -1) lastGoodRo = i; + } + } + + if (lastGoodRo == -1) return; + + const DblVec& lastY = *yList[lastGoodRo]; + double yDotY = dotProduct(lastY, lastY); + + if (yDotY == 0) return; + + double scalar = roList[lastGoodRo] / yDotY; + scale(dir, scalar); + + for (int i = 0; i < count; i++) { + if (roList[i] > 0) { + double beta = dotProduct(*yList[i], dir) / roList[i]; + addMult(dir, *sList[i], -alphas[i] - beta); + } + } + } +} + +void OptimizerState::MakeSteepestDescDir() { + if (l1weight == 0) { + scaleInto(dir, grad, -1); + } else { + + for (size_t i=0; i 0) { + dir[i] = -grad[i] - l1weight; + } else { + if (grad[i] < -l1weight) { + dir[i] = -grad[i] - l1weight; + } else if (grad[i] > l1weight) { + dir[i] = -grad[i] + l1weight; + } else { + dir[i] = 0; + } + } + } + } + + steepestDescDir = dir; +} + +void OptimizerState::FixDirSigns() { + if (l1weight > 0) { + for (size_t i = 0; i 0) { + val += dir[i] * (grad[i] + l1weight); + } else if (dir[i] < 0) { + val += dir[i] * (grad[i] - l1weight); + } else if (dir[i] > 0) { + val += dir[i] * (grad[i] + l1weight); + } + } + } + + return val; + } +} + +void OptimizerState::GetNextPoint(double alpha) { + addMultInto(newX, x, dir, alpha); + if (l1weight > 0) { + for (size_t i=0; i 0) { + for (size_t i=0; i= 0) { + cerr << "L-BFGS chose a non-descent direction: check your gradient!" << endl; + exit(1); + } + + double alpha = 1.0; + double backoff = 0.5; + if (iter == 1) { + //alpha = 0.1; + //backoff = 0.5; + double normDir = sqrt(dotProduct(dir, dir)); + assert(normDir != 0); + assert(normDir != normDir+1); + alpha = (1 / normDir); + backoff = 0.1; + } + + const double c1 = 1e-4; + double oldValue = value; + + while (true) { + GetNextPoint(alpha); + value = EvalL1(); + + if (value <= oldValue + c1 * origDirDeriv * alpha) break; + + if (!quiet) cout << "." << flush; + + alpha *= backoff; + } + + if (!quiet) cout << endl; +} + +void OptimizerState::Shift() { + DblVec *nextS = NULL, *nextY = NULL; + + int listSize = (int)sList.size(); + + if (listSize < m) { + try { + nextS = new vector(dim); + nextY = new vector(dim); + } catch (bad_alloc) { + m = listSize; + if (nextS != NULL) { + delete nextS; + nextS = NULL; + } + } + } + + if (nextS == NULL) { + nextS = sList.front(); + sList.pop_front(); + nextY = yList.front(); + yList.pop_front(); + roList.pop_front(); + } + + addMultInto(*nextS, newX, x, -1); + addMultInto(*nextY, newGrad, grad, -1); + double ro = dotProduct(*nextS, *nextY); + if (ro == 0) + ro = EPSILON; + + sList.push_back(nextS); + yList.push_back(nextY); + roList.push_back(ro); + + x.swap(newX); + grad.swap(newGrad); + + iter++; +} + +void OWLQN::Minimize(DifferentiableFunction& function, const DblVec& initial, DblVec& minimum, double l1weight, double tol, int m) const { + OptimizerState state(function, initial, m, l1weight, quiet); + + if (!quiet) { + cout << setprecision(4) << scientific << right; + cout << endl << "Optimizing function of " << state.dim << " variables with OWL-QN parameters:" << endl; + cout << " l1 regularization weight: " << l1weight << "." << endl; + cout << " L-BFGS memory parameter (m): " << m << endl; + cout << " Convergence tolerance: " << tol << endl; + cout << endl; + cout << "Iter n: new_value (conv_crit) line_search" << endl << flush; + cout << "Iter 0: " << setw(10) << state.value << " (***********) " << flush; + } + + ostringstream str; + termCrit->GetValue(state, str); + + while (true) { + state.UpdateDir(); + state.BackTrackingLineSearch(); + + ostringstream str; + double termCritVal = termCrit->GetValue(state, str); + if (!quiet) { + cout << "Iter " << setw(4) << state.iter << ": " << setw(10) << state.value; + cout << str.str() << flush; + } + + if (termCritVal < tol) break; + + state.Shift(); + } + + if (!quiet) cout << endl; + + minimum = state.newX; +} diff --git a/second-stage/programs/wlle/OWLQN.h b/second-stage/programs/wlle/OWLQN.h new file mode 100644 index 0000000..50294db --- /dev/null +++ b/second-stage/programs/wlle/OWLQN.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include +#include +#include + +typedef std::vector DblVec; + +struct DifferentiableFunction { + virtual double Eval(const DblVec& input, DblVec& gradient) = 0; +}; + +#include "TerminationCriterion.h" + +class OWLQN { + bool quiet; + bool responsibleForTermCrit; + +public: + TerminationCriterion *termCrit; + + OWLQN(bool quiet = false) : quiet(quiet) { + termCrit = new RelativeMeanImprovementCriterion(5); + responsibleForTermCrit = true; + } + + OWLQN(TerminationCriterion *termCrit, bool quiet = false) : quiet(quiet), termCrit(termCrit) { + responsibleForTermCrit = false; + } + + ~OWLQN() { + if (termCrit && responsibleForTermCrit) delete termCrit; + } + + void Minimize(DifferentiableFunction& function, const DblVec& initial, DblVec& minimum, double l1weight = 1.0, double tol = 1e-4, int m = 10) const; + void SetQuiet(bool q) { quiet = q; } + +}; + +class OptimizerState { + friend class OWLQN; + + struct DblVecPtrDeque : public std::deque { + ~DblVecPtrDeque() { + for (size_t s = 0; s < size(); ++s) { + if ((*this)[s] != NULL) delete (*this)[s]; + } + } + }; + + DblVec x, grad, newX, newGrad, dir; + DblVec& steepestDescDir; // references newGrad to save memory, since we don't ever use both at the same time + DblVecPtrDeque sList, yList; + std::deque roList; + std::vector alphas; + double value; + int iter, m; + const size_t dim; + DifferentiableFunction& func; + double l1weight; + bool quiet; + + static double dotProduct(const DblVec& a, const DblVec& b); + static void add(DblVec& a, const DblVec& b); + static void addMult(DblVec& a, const DblVec& b, double c); + static void addMultInto(DblVec& a, const DblVec& b, const DblVec& c, double d); + static void scale(DblVec& a, double b); + static void scaleInto(DblVec& a, const DblVec& b, double c); + + void MapDirByInverseHessian(); + void UpdateDir(); + double DirDeriv() const; + void GetNextPoint(double alpha); + void BackTrackingLineSearch(); + void Shift(); + void MakeSteepestDescDir(); + double EvalL1(); + void FixDirSigns(); + void TestDirDeriv(); + + OptimizerState(DifferentiableFunction& f, const DblVec& init, int m, double l1weight, bool quiet) + : x(init), grad(init.size()), newX(init), newGrad(init.size()), dir(init.size()), steepestDescDir(newGrad), alphas(m), iter(1), m(m), dim(init.size()), func(f), l1weight(l1weight), quiet(quiet) { + if (m <= 0) { + std::cerr << "m must be an integer greater than zero." << std::endl; + exit(1); + } + value = EvalL1(); + grad = newGrad; + } + +public: + const DblVec& GetX() const { return newX; } + const DblVec& GetLastX() const { return x; } + const DblVec& GetGrad() const { return newGrad; } + const DblVec& GetLastGrad() const { return grad; } + const DblVec& GetLastDir() const { return dir; } + double GetValue() const { return value; } + int GetIter() const { return iter; } + size_t GetDim() const { return dim; } +}; diff --git a/second-stage/programs/wlle/TerminationCriterion.cpp b/second-stage/programs/wlle/TerminationCriterion.cpp new file mode 100644 index 0000000..a0e8488 --- /dev/null +++ b/second-stage/programs/wlle/TerminationCriterion.cpp @@ -0,0 +1,28 @@ +#include "TerminationCriterion.h" + +#include "OWLQN.h" + +#include +#include +#include + +using namespace std; + +double RelativeMeanImprovementCriterion::GetValue(const OptimizerState& state, std::ostream& message) { + double retVal = numeric_limits::infinity(); + + if (prevVals.size() > 5) { + double prevVal = prevVals.front(); + if (prevVals.size() == 10) prevVals.pop_front(); + double averageImprovement = (prevVal - state.GetValue()) / prevVals.size(); + double relAvgImpr = averageImprovement /(1+fabs(state.GetValue())); // !!!! modified MJ, 26th Aug 2008 + message << setprecision(4) << scientific << right; + message << " (" << setw(10) << relAvgImpr << ") " << flush; + retVal = relAvgImpr; + } else { + message << " (wait for five iters) " << flush; + } + + prevVals.push_back(state.GetValue()); + return retVal; +} diff --git a/second-stage/programs/wlle/TerminationCriterion.h b/second-stage/programs/wlle/TerminationCriterion.h new file mode 100644 index 0000000..3059972 --- /dev/null +++ b/second-stage/programs/wlle/TerminationCriterion.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include +#include + +class OptimizerState; + +struct TerminationCriterion { + virtual double GetValue(const OptimizerState& state, std::ostream& message) = 0; +}; + +class RelativeMeanImprovementCriterion : public TerminationCriterion { + const int numItersToAvg; + std::deque prevVals; + +public: + RelativeMeanImprovementCriterion(int numItersToAvg = 5) : numItersToAvg(numItersToAvg) {} + + double GetValue(const OptimizerState& state, std::ostream& message); +}; diff --git a/second-stage/programs/wlle/cvlm-owlqn.cc b/second-stage/programs/wlle/cvlm-owlqn.cc new file mode 100644 index 0000000..d8c37d8 --- /dev/null +++ b/second-stage/programs/wlle/cvlm-owlqn.cc @@ -0,0 +1,758 @@ +// cvlm-owlqn.cc -- A linear model estimator for a variety of user-selectable loss functions. + +const char usage[] = +"cvlm-owlqn -- A cross-validating linear model estimator for a variety of user-selectable loss functions.\n" +"\n" +" Mark Johnson, 21st July 2008\n" +"\n" +" It uses a modified version of L-BFGS developed by Galen Andrew at Microsoft Research\n" +" to be especially efficient for L1-regularized loss functions.\n" +"\n" +" The regularizer weight(s) are set by cross-validation on development data.\n" +"\n" +"Usage: cvlm-owlqn [-h] [-d debug_level] [-c c0] [-C c00] [-p p] [-r r] [-s s] [-t tol]\n" +" [-l ltype] [-F f] [-G] [-n ns] [-f feat-file]\n" +" [-o weights-file] [-e eval-file] [-x eval-file2]\n" +" < train-file\n" +"\n" +"where:\n" +"\n" +" debug_level > 0 controls the amount of output produced\n" +"\n" +" -c c0 is the initial value for the regularizer constant.\n" +"\n" +" -C c00 multiplies the regularizer constant for the first feature class\n" +" by c00 (this can be used to allow the first feature class to be regularized less).\n" +"\n" +" -l ltype identifies the type of loss function used:\n" +"\n" +" -l 0 - log loss (c0 ~ 5)\n" +" -l 1 - EM-style log loss (c0 ~ 5)\n" +" -l 2 - pairwise log loss \n" +" -l 3 - exp loss (c0 ~ 25, s ~ 1e-5)\n" +" -l 4 - log exp loss (c0 ~ 1e-4)\n" +" -l 5 - maximize expected F-score (c ~ ?)\n" +"\n" +" -r r specifies that the weights are initialized to random values in\n" +" [-r ... +r],\n" +"\n" +" -t tol specifies the stopping tolerance for the IWLQN optimizer\n" +"\n" +" -F f indicates that a parse should be taken as correct\n" +" proportional to f raised to its f-score, and\n" +"\n" +" -G indicates that each sentence is weighted by the number of\n" +" edges in its gold parse.\n" +"\n" +" -n ns is the maximum number of ':' characters in a , used to\n" +" determine how features are binned into feature classes (ns = -1 bins\n" +" all features into the same class)\n" +"\n" +" -f feat-file is a file of lines, used for\n" +" cross-validating regularizer weights,\n" +"\n" +" train-file, eval-file and eval-file2 are files from which training and evaluation\n" +" data are read (if eval-file ends in the suffix .bz2 then bzcat is used\n" +" to read it; if no eval-file is specified, then the program tests on the\n" +" training data),\n" +"\n" +" weights-file is a file to which the estimated weights are written,\n" +"\n" +"The function that the program minimizes is:\n" +"\n" +" Q(w) = s * (- L(w) + c * sum_j pow(fabs(w[j]), p) ), where:\n" +"\n" +" L(w) is the loss function to be optimized.\n" +"\n" +"With debug = 0, the program writes a single line to stdout:\n" +"\n" +"c p r s it nzeroweights/nweights neglogP/nsentence ncorrect/nsentences\n" +"\n" +"With debug >= 10, the program writes out a histogram of weights as well\n" +"\n" +"Data format:\n" +"-----------\n" +"\n" +" --> [S=] *\n" +" --> [G=] N= *\n" +" --> [P=

] [W=] *,\n" +" --> [=]\n" +"\n" +"NS is the number of sentences.\n" +"\n" +"Each consists of N s. is the gold standard\n" +"score. To get parsing precision and recall results, set to the\n" +"number of edges in the gold standard parse. To get accuracy results,\n" +"set to 1 (the default).\n" +"\n" +"A consists of pairs.

is the parse's possible highest\n" +"score and is the parse's actual score. To get parsing precision and\n" +"recall results, set

to the number of edges in the parse and to\n" +"the number of edges in common between the gold and parse trees.\n" +"\n" +"A consists of a feature (a non-negative integer) and an optional\n" +"count (a real).\n" +"\n" +"The default for all numbers except is 1. The default for is 0.\n"; + +#include "custom_allocator.h" // must come first +#define _GLIBCPP_CONCEPT_CHECKS // uncomment this for checking + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lmdata.h" +#include "powell.h" +#include "utility.h" + +#include "OWLQN.h" +#include "TerminationCriterion.h" + +typedef std::vector doubles; +typedef std::vector size_ts; + +int debug_level = 0; + +enum loss_type { log_loss, em_log_loss, em_log_loss_noomp, pairwise_log_loss, exp_loss, log_exp_loss, + expected_fscore_loss }; + +const char* loss_type_name[] = { "log_loss", "em_log_loss", "em_log_loss_noomp", "pairwise_log_loss", "exp_loss", + "log_exp_loss", "expected_fscore_loss" }; + +void print_histogram(int nx, const double x[], int nbins=20) { + int nx_nonzero = 0; + for (int i = 0; i < nx; ++i) + if (x[i] != 0) + ++nx_nonzero; + + std::cout << "# There are " << nx_nonzero << " non-zero values and " + << nx-nx_nonzero << " zero values." << std::endl; + + if (nx_nonzero > 0) { + std::vector s; + s.reserve(nx_nonzero); + for (int i = 0; i < nx; ++i) + if (x[i] != 0) + s.push_back(x[i]); + std::sort(s.begin(), s.end()); + for (int i = 0; i <= nbins; ++i) { + int j = i*(nx_nonzero-1); + j /= nbins; + std::cout << float(i)/float(nbins) << '\t' << s[j] << std::endl; + } + } +} // print_histogram() + +// f_df() evaluates the statistics of the corpus, and prints the f-score +// if required +// +double f_df(loss_type ltype, corpus_type* corpus, const double x[], double df_dx[]) { + Float sum_g = 0, sum_p = 0, sum_w = 0, L = 0; + + switch (ltype) { + case log_loss: + L = corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + break; + case em_log_loss: + L = emll_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + break; + case em_log_loss_noomp: + L = emll_corpus_stats_noomp(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + break; + case pairwise_log_loss: + L = pwlog_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + break; + case exp_loss: + L = exp_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + break; + case log_exp_loss: + L = log_exp_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + break; + case expected_fscore_loss: + L = 1 - fscore_corpus_stats(corpus, &x[0], &df_dx[0], &sum_g, &sum_p, &sum_w); + for (size_type j = 0; j < corpus->nfeatures; ++j) + df_dx[j] = -df_dx[j]; + break; + default: + L = 0; + std::cerr << "## Error: unrecognized loss_type loss = " << int(ltype) + << std::endl; + } + + if (debug_level >= 1000) + std::cerr << "f score = " << 2*sum_w/(sum_g+sum_p) << ", " << std::flush; + + assert(finite(L)); + return L; +} + +// LossFn() is the loss function +// +class LossFn : public DifferentiableFunction { +public: + + const loss_type ltype; + corpus_type* corpus; + const size_ts& f_c; //!< feature -> cross-validation class + const doubles& cs; //!< cross-validation class -> regularizer factor + double p, s; + int it; + double L, R, Q; + + LossFn(loss_type ltype, corpus_type* corpus, const size_ts& f_c, + const doubles& cs, double p, double s) + : ltype(ltype), corpus(corpus), f_c(f_c), cs(cs), p(p), s(s), it(0) + { + assert(f_c.size() == corpus->nfeatures); + for (size_type f = 0; f < f_c.size(); ++f) + assert(f_c[f] < cs.size()); + } + + virtual double Eval(const DblVec& x, DblVec& df_dx) { + size_t nx = x.size(); + + assert(size_t(nx) == corpus->nfeatures); + assert(f_c.size() == corpus->nfeatures); + + it++; + if (debug_level >= 1000) + std::cerr << "it = " << it << ", " << std::flush; + + L = f_df(ltype, corpus, &x[0], &df_dx[0]); + + if (s != 1) { + L *= s; + + for (size_t i = 0; i < nx; ++i) { + assert(finite(df_dx[i])); + df_dx[i] *= s; + } + } + + R = 0; + if (p != 1) { + for (size_t i = 0; i < nx; ++i) + R += cs[f_c[i]] * pow(fabs(x[i]), p); + R *= s; + double sp = s * p; + for (size_t i = 0; i < nx; ++i) + df_dx[i] += sp * cs[f_c[i]] * pow(fabs(x[i]), p-1) + * (x[i] >= 0 ? (x[i] == 0 ? 0 : 1) : -1); + } + + Q = L + R; + + if (debug_level >= 1000) + std::cerr << "Q = " << Q << " = L = " << L << " + R = " << R << std::endl; + + assert(finite(Q)); + + if (debug_level >= 10000) { + std::cerr << "Histogram of derivatives:" << std::endl; + print_histogram(nx, &df_dx[0]); + std::cerr << "--------------------------" << std::endl; + } + + for (size_t i = 0; i < nx; ++i) + assert(finite(df_dx[i])); + return Q; + } // LossFn::operator() + +}; // LossFn{} + +// The Estimator1 does one round of estimation +// +struct Estimator1 { + typedef std::vector size_ts; + typedef std::vector doubles; + + corpus_type* train; //!< training data + size_type nx; //!< number of features + corpus_type* eval; //!< evaluation data + corpus_type* eval2; //!< 2nd evaluation data + loss_type ltype; //!< type of loss function + double c0; //!< default regularizer factor + double c00; //!< multiply default regularizer factor for first feature class + double p; //!< regularizer power + double r; //!< random initialization + double s; //!< scale factor + double tol; //!< stopping tolerance + bool opt_fscore; //!< optimize f-score or - log likelihood + + doubles x; //!< feature -> weight + size_ts f_c; //!< feature -> cross-validation class + doubles lcs; //!< cross-validation class -> log factor + size_type nc; //!< number of cross-validation classes + + size_type nits; //!< number of iterations of last round + size_type sum_nits; //!< total number of iterations + size_type nrounds; //!< number of cross-validation rounds so far + double best_score; //!< best score seen so far + std::string weightsfile; //!< name of weights file + + typedef std::map S_C; + S_C identifier_regclass; //!< map from feature class identifiers to regularization class + typedef std::vector Ss; + Ss regclass_identifiers; //!< vector of class identifiers + + Estimator1(loss_type ltype, double c0, double c00, double p, double r, double s, + double tol=1e-5, bool opt_fscore = true, std::string weightsfile = "") + : train(NULL), nx(0), eval(NULL), eval2(NULL), + ltype(ltype), c0(c0), c00(c00), p(p), r(r), s(s), tol(tol), + opt_fscore(opt_fscore), lcs(1, log(c0)), nc(1), + nits(0), sum_nits(0), nrounds(0), best_score(0), + weightsfile(weightsfile) + { } // Estimator1::Estimator1() + + //! set_data() sets the training and evaluation data + // + void set_data(corpus_type* t, corpus_type* e, corpus_type* e2) { + train = t; + eval = e; + eval2 = e2; + nx = train->nfeatures; + x.resize(nx); + assert(f_c.size() <= nx); + assert(eval == NULL || eval->nfeatures <= train->nfeatures); + assert(eval2 == NULL || eval2->nfeatures <= train->nfeatures); + } // Estimator1::set_data() + + // operator() actually runs one round of estimation + // + double operator() (const doubles& lccs) { + assert(lccs.size() == nc); + doubles ccs(nc); + double L, R, Q; + + for (size_type i = 0; i < nc; ++i) + ccs[i] = exp(lccs[i]); + + assert(x.size() == nx); + nits = 0; + nrounds++; + + if (debug_level >= 10) { + if (nrounds == 1) + std::cerr << "# round nfeval L R Q neglogP f-score css" << std::endl; + std::cerr << nrounds << std::flush; + } + + LossFn fn(ltype, train, f_c, ccs, p, s); + + DblVec x0(nx); + + if (r != 0) + for (size_type i = 0; i < nx; ++i) + x0[i] = r*double(random()-RAND_MAX/2)/double(RAND_MAX/2); + + OWLQN owlqn(true); + if (p == 1) { + assert(ccs.size() == 1); + owlqn.Minimize(fn, x0, x, ccs[0], tol); + } + else + owlqn.Minimize(fn, x0, x, 0, tol); + + nits = fn.it; + L = fn.L; + R = fn.R; + Q = fn.Q; + + // Clean up, collect stats + + sum_nits += nits; + + if (debug_level >= 10) + std::cerr << '\t' << nits << '\t' << L << '\t' << R << '\t' << Q; + + double score = evaluate(opt_fscore, true); + + if (debug_level >= 10) + std::cerr << '\t' << ccs << std::endl; + + return score; + + } // Estimator1::operator() + + // evaluate() evaluates the current model on the eval data, prints + // out debugging information if appropriate, and returns either + // the - log likelihood or 1 - f-score. + // + double evaluate(bool opt_fscore = false, bool internal = false) { + + std::vector df_dx(nx); + Float sum_g = 0, sum_p = 0, sum_w = 0; + Float neglogP = corpus_stats(eval, &x[0], &df_dx[0], + &sum_g, &sum_p, &sum_w); + Float fscore = 2*sum_w/(sum_g+sum_p); + + if (internal) { // internal evaluation, use a short print-out + if (debug_level >= 10) { + std::cerr << '\t' << neglogP << '\t' << fscore; + if (eval2 != NULL) { + Float sum_g2 = 0, sum_p2 = 0, sum_w2 = 0; + Float neglogP2 = corpus_stats(eval2, &x[0], &df_dx[0], + &sum_g2, &sum_p2, &sum_w2); + Float fscore2 = 2*sum_w2/(sum_g2+sum_p2); + std::cerr << '\t' << neglogP2 << '\t' << fscore2; + } + } + } + else { // final evaluation, print out more info + + int nzeros = 0; + for (size_type i = 0; i < nx; ++i) + if (x[i] == 0) + ++nzeros; + + std::cerr << "# Regularizer power p = " << p << std::endl; + std::cerr << "# " << nx-nzeros << " non-zero feature weights of " + << nx << " features." << std::endl; + std::cerr << "# Eval neglogP = " << neglogP + << ", neglogP/nsentences = " << neglogP/eval->nsentences + << std::endl; + std::cerr << "# Eval precision = " << sum_w/sum_p + << ", recall = " << sum_w/sum_g + << ", f-score = " << 2*sum_w/(sum_g+sum_p) + << std::endl; + if (eval2 != NULL) { + Float sum_g = 0, sum_p = 0, sum_w = 0; + Float neglogP = corpus_stats(eval2, &x[0], &df_dx[0], + &sum_g, &sum_p, &sum_w); + std::cerr << "# Eval2 neglogP = " << neglogP + << ", neglogP/nsentences = " << neglogP/train->nsentences + << std::endl; + std::cerr << "# Eval2 precision = " << sum_w/sum_p + << ", recall = " << sum_w/sum_g + << ", f-score = " << 2*sum_w/(sum_g+sum_p) + << std::endl; + } + { + Float sum_g = 0, sum_p = 0, sum_w = 0; + Float neglogP = corpus_stats(train, &x[0], &df_dx[0], + &sum_g, &sum_p, &sum_w); + std::cerr << "# Train neglogP = " << neglogP + << ", neglogP/nsentences = " << neglogP/train->nsentences + << std::endl; + std::cerr << "# Train precision = " << sum_w/sum_p + << ", recall = " << sum_w/sum_g + << ", f-score = " << 2*sum_w/(sum_g+sum_p) + << std::endl; + } + + std::cerr << "# regclass_identifiers = " << regclass_identifiers << std::endl; + std::cerr << "# lcs = " << lcs << std::endl; + { + doubles cs(nc); + for (size_type i = 0; i < nc; ++i) + cs[i] = exp(lcs[i]); + std::cerr << "# cs = " << cs << std::endl; + } + if (debug_level >= 100) { + std::cerr << "# Cumulative distribution of feature weights:" << std::endl; + print_histogram(nx, &x[0]); + } + } + + double score = (opt_fscore ? 1 - fscore : neglogP); + + if (nrounds == 1 || score < best_score) { + best_score = score; + + // Write out weights file + + if (!weightsfile.empty()) { + FILE* out = fopen(weightsfile.c_str(), "w"); + // fprintf(out, "%d@", nx-nzeros); + for (size_type i = 0; i < x.size(); ++i) + if (x[i] != 0) { + fprintf(out, "%d", i); + if (x[i] != 1) + fprintf(out, "=%g", x[i]); + fprintf(out, "\n"); + } + fclose(out); + } + } + return score; + } // Estimator1::evaluate() + + //! fc_bin() maps a feature count to its corresponding bin + // + static int fc_bin(double feature_count_base, int feature_count) { + if (feature_count <= 4) + return feature_count; + else + return lrint(4.0 + pow(feature_count_base, + lrint(log(feature_count-4)/log(feature_count_base)))); + } // Estimator1::fc_bin() + + // read_featureclasses() reads the feature classes from a feature file + // + void read_featureclasses(const char* filename, + int nseparators = 1, + const char* separators = ":") { + + const char* filesuffix = strrchr(filename, '.'); + bool popen_flag = false; + FILE *in; + if (strcasecmp(filesuffix, ".bz2") == 0) { + std::string command("bzcat "); + command += filename; + in = popen(command.c_str(), "r"); + if (in == NULL) { + perror("## Error in cvlm-owlqn: "); + std::cerr << "## popen(\"" << command << "\", \"r\") failed, usage = " << resource_usage() << std::endl; + } + popen_flag = true; + } + else if (strcasecmp(filesuffix, ".gz") == 0) { + std::string command("zcat "); + command += filename; + errno = 0; + in = popen(command.c_str(), "r"); + if (in == NULL) { + perror("## Error in cvlm-owlqn: "); + std::cerr << "## popen(\"" << command << "\", \"r\") failed, usage = " << resource_usage() << std::endl; + } + popen_flag = true; + } + else + in = fopen(filename, "r"); + if (in == NULL) { + std::cerr << "## Couldn't open evalfile " << filename + << ", errno = " << errno << "\n" + << usage << std::endl; + exit(EXIT_FAILURE); + } + + size_type featno; + + // read feature number + + while (fscanf(in, " %u ", &featno) == 1) { + int c = ':'; + + // read the prefix of the feature class identifier + + std::string identifier; + int iseparators = 0; + if (nseparators >= 0) + while ((c = getc(in)) != EOF && !isspace(c)) { + if (index(separators, c) != NULL) + if (++iseparators > nseparators) + break; + identifier.push_back(c); + } + + // skip the rest of the line + + while ((c = getc(in)) != EOF && c != '\n') + ; + + // insert the prefix into the prefix -> regularization class map + + S_C::iterator it + = identifier_regclass.insert(S_C::value_type(identifier, + identifier_regclass.size())).first; + + size_type cl = it->second; // regularization class + + f_c.resize(featno+1); + f_c[featno] = cl; // set feature's regularization class + } + + nc = identifier_regclass.size(); // set nc + lcs.resize(nc, log(c0)); // set each regularizer class' factor to c0 + lcs[0] += log(c00); // increment first regularizer class' factor by c00 + + // construct regclass_identifiers + + regclass_identifiers.resize(nc); + cforeach (S_C, it, identifier_regclass) { + assert(it->second < regclass_identifiers.size()); + regclass_identifiers[it->second] = it->first; + } + + if (debug_level >= 0) + std::cerr << "# Regularization classes: " << regclass_identifiers << std::endl; + + if (popen_flag) + pclose(in); + else + fclose(in); + } // Estimator1::read_featureclasses() + + void estimate(const int maxruns=10) + { + powell::control cntrl(1e-4, 1e-2, maxruns, maxruns); + powell::minimize(lcs, *this, log(2), cntrl); + + if (debug_level > 0) { + std::cerr << "# Regularizer class weights = ("; + for (size_type i = 0; i < lcs.size(); ++i) { + if (i > 0) + std::cerr << ' '; + std::cerr << exp(lcs[i]); + } + std::cerr << ')' << std::endl; + } + + } // Estimator1::estimate() + +}; // Estimator1{} + +//! exit_failure() causes the program to halt immediately +// +inline std::ostream& exit_failure(std::ostream& os) { + os << std::endl; + exit(EXIT_FAILURE); + return os; +} // util::exit_failure + +int main(int argc, char** argv) +{ + std::ios::sync_with_stdio(false); + + loss_type ltype = log_loss; + double c0 = 2.0; + double c00 = 1.0; + double p = 2.0; + double r = 0.0; + double s = 1.0; + double tol = 1e-5; + double Pyx_factor = 0.0; + bool Px_propto_g = false; + int maxruns = 10; + int nseparators = 1; + std::string feat_file, weights_file, eval_file, eval2_file; + int opt; + while ((opt = getopt(argc, argv, "hd:c:C:p:r:s:t:l:F:Gm:n:f:o:e:x:")) != -1) + switch (opt) { + case 'h': + std::cerr << usage << exit_failure; + break; + case 'd': + debug_level = atoi(optarg); + break; + case 'c': + c0 = atof(optarg); + break; + case 'C': + c00 = atof(optarg); + break; + case 'p': + p = atof(optarg); + break; + case 'r': + r = atof(optarg); + break; + case 's': + s = atof(optarg); + break; + case 't': + tol = atof(optarg); + break; + case 'l': + ltype = loss_type(atoi(optarg)); + break; + case 'F': + Pyx_factor = atof(optarg); + break; + case 'G': + Px_propto_g = true; + break; + case 'm': + maxruns = atoi(optarg); + break; + case 'n': + nseparators = atoi(optarg); + break; + case 'f': + feat_file = optarg; + break; + case 'o': + weights_file = optarg; + break; + case 'e': + eval_file = optarg; + break; + case 'x': + eval2_file = optarg; + break; + } + + if (debug_level >= 10) + std::cerr << "# ltype -l = " << ltype + << " (" << loss_type_name[ltype] << ")" + << " regularization -c = " << c0 + << ", c00 -C = " << c00 + << ", power -p = " << p + << ", scale -s = " << s + << ", tol -t = " << tol + << ", random init -r = " << r + << ", Pyx_factor -F = " << Pyx_factor + << ", Px_propto_g -G = " << Px_propto_g + << ", maxruns -m = " << maxruns + << ", nseparators -n = " << nseparators + << ", feat_file -f = " << feat_file + << ", weights_file -o = " << weights_file + << ", eval_file -e = " << eval_file + << ", eval2_file -x = " << eval2_file + << std::endl; + + // I discovered a couple of years after I wrote this program that popen + // uses fork, which doubles your virtual memory for a short instant! + + Estimator1 e(ltype, c0, c00, p, r, s, tol, true, weights_file); + + if (!feat_file.empty()) + e.read_featureclasses(feat_file.c_str(), nseparators, ":"); + + // Read in eval data first, as that way we may squeeze everything into 4GB + + corpusflags_type corpusflags = { Pyx_factor, Px_propto_g }; + + corpus_type* evaldata = NULL; + if (!eval_file.empty()) { + evaldata = read_corpus_file(&corpusflags, eval_file.c_str()); + if (debug_level >= 10) + std::cerr << "# read eval_file = " << eval_file + << ", nsentences = " << evaldata->nsentences + << std::endl; + } + + corpus_type* evaldata2 = NULL; + if (!eval2_file.empty()) { + evaldata2 = read_corpus_file(&corpusflags, eval2_file.c_str()); + if (debug_level >= 10) + std::cerr << "# read eval2_file = " << eval2_file + << ", nsentences = " << evaldata2->nsentences + << std::endl; + } + + corpus_type* traindata = read_corpus(&corpusflags, stdin); + int nx = traindata->nfeatures; + + if (errno != 0) { + perror("## cvlm-owlqn, after reading main corpus, nonzero errno "); + errno = 0; + } + + std::cerr << "# " << nx << " features in training data, " << resource_usage() << std::endl; + + if (evaldata == NULL) + evaldata = traindata; + + e.set_data(traindata, evaldata, evaldata2); + e.estimate(maxruns); + +} // main() diff --git a/second-stage/programs/wlle/powell.h b/second-stage/programs/wlle/powell.h new file mode 100644 index 0000000..0f80e21 --- /dev/null +++ b/second-stage/programs/wlle/powell.h @@ -0,0 +1,575 @@ +//! powell.h +//! +//! Powell's algorithm for minimizing an n-dimensional function +//! without derivatives, based on Numerical Recipes in C++ code. +//! +//! Mark Johnson, 29th September 2002, last modified 14th August 2008 +//! +//! The main routine is powell::minimize(). Its calling sequence +//! is: +//! +//! Float minimize( // => value of f at minimum +//! p, // <= starting x value, => minimizing x value +//! f, // <= function object being minimized +//! initial_step, // <= initial step length +//! cntrl) // <= control{} structure +//! +//! +//! There is also a specialized version for 1-d minimization +//! +//! Float minimize1d( // => minimizing x value +//! p, // <= starting x value +//! f, // <= function object being minimized +//! initial_step, // <= initial step length +//! cntrl) // <= control{} structure + +#ifndef POWELL_H +#define POWELL_H + +#include +#include +#include +#include +#include +#include +#include + +namespace powell { + + typedef double Float; + typedef std::vector Floats; + typedef std::vector Array; + + template + inline void error_abort(const T& t) { + std::cerr << "Error in powell.h: " << t << std::endl; + abort(); + } + + //! The control class gives the user a way to monitor and + //! control the minimization process. You can specialize its + //! behaviour by deriving your own monitor from this class + //! and changing its behaviour. + // + class control { + + private: + Float tol; //!< Stopping function tolerance + Float linmin_tol; //!< Line minimization stopping tolerance + int max_nfeval; //!< Max number of function evaluations + int linmin_max_nfeval_; //!< Max number of function evaluations in brent() + int debug; //!< controls tracing, etc + + public: + Float linmin_rel_tol() const { return linmin_tol; } + Float linmin_abs_tol() const { return linmin_tol; } + int linmin_max_nfeval() const { return linmin_max_nfeval_; } + Float linmin_xinit() const { return 1.0; } + size_t cache_size() const { return 20; } //!< function cache size + + // control() sets the defaults for the program + // + control(Float tol = 1e-7, //!< stopping tolerance + Float linmin_tol = 1e-7, //!< line minimization tolerance + int max_nfeval = 0, //!< max number of fn evalns, + //!< if neg, halt w/ error + int linmin_max_nfeval_ = 0, //!< max number of fn evalns + //!< per line minimization + size_t debug = 0) + : tol(tol), + linmin_tol(linmin_tol), + max_nfeval(max_nfeval), + linmin_max_nfeval_(linmin_max_nfeval_), + debug(debug) + { } // control() + + //! operator() is called at each powell iteration + //! If operator() returns true, the powell routine halts. + //! Specialize its behaviour if you want custom tracing + // + bool operator() (const Floats& ps, + const Float fx, const Float fx_last, + const int iteration, + const int nfeval) const + { + const Float TINY = 1.0e-25; + if (debug >= 1000) { // print tracing information + if (iteration == 1) + std::cout << "#" << std::setw(7) << "iter" << std::setw(10) << "f(x)" + << std::setw(10) << "nfeval" << std::endl; + std::cout << std::setw(8) << iteration << ' ' << std::setw(9) << fx + << ' ' << std::setw(9) << nfeval << std::endl; + + if (debug >= 100000) { + std::cout << " x = (" << ps[0]; + for (size_t j = 1; j < ps.size(); ++j) + std::cerr << ' ' << ps[j]; + std::cerr << ')' << std::endl; + } + } + + if (fx_last < fx) + std::cerr << " *** Error in powell::powell() iteration " << iteration + << ", fx = " << fx << " > fx_last = " << fx_last << std::endl; + + // This is the termination criterion used by Numerical Recipies + // + if (2.0*fabs(fx-fx_last) <= tol*(fabs(fx_last)+fabs(fx))+TINY) + return true; + + if (max_nfeval != 0 && nfeval >= abs(max_nfeval)) { + if (max_nfeval > 0) { + if (debug >= 100) + std::cerr << "*** powell::powell() returning at iteration " << iteration + << " in with error " << fx_last-fx + << std::endl; + return true; + } + else + error_abort("Too many iterations in powell::powell"); + } + + return false; + } // operator() + }; // control{} + + + template + inline T SQR(T x) { return x*x; } + + inline void shft3(Float& a, Float& b, Float& c, const Float& d) + { + a=b; + b=c; + c=d; + } + + inline Float sign(Float a, Float b) { + return ((b) > 0.0 ? fabs(a) : -fabs(a)); + } + + inline void shift(Float& a, Float& b, Float& c, Float d) { + a = b; b = c; c = d; + } + + //! mnbrak() + //! + //! Given a function f, and given distinct initial points ax and bx, + //! this routine searches in the downhill direction (defined by the + //! function as evaluated at the initial points) and returns new points + //! ax, bx, cx that bracket a minimum of the function. Also returned + //! are the function values at the three points, fa, fb, and fc. + //! + //! The arguments to f() are: + //! x <= point at which to evaluate f + //! df => derivative of f at x + //! f() => value of f at x + //! + //! Here GOLD is the default ratio by which successive intervals are + //! magnified; GLIMIT is the maximum magnification allowed for a + //! parabolic-fit step. + // + template + void mnbrak(Float& ax, Float& bx, Float& cx, + Float& fa, Float& fb, Float& fc, + F& f) + { + const float GOLD = 1.618034; + const float GLIMIT = 100.0; + const double TINY = 1.0e-20; + + Float ulim, u, r, q, fu; + fa = f(ax); + fb = f(bx); + if (fb > fa) { + // Switch roles of a and b so that we can go downhill in the + // direction from a to b. + std::swap(ax, bx); + std::swap(fa, fb); + } + cx = bx+GOLD*(bx-ax); // First guess for c. + fc = f(cx); + while (fb > fc) { // Keep returning here until we bracket. + r = (bx-ax)*(fb- fc); + // Compute u by parabolic extrapolation from a; b; c. + // TINY is used to prevent any possible division by zero. + q = (bx-cx)*(fb-fa); + u = bx-((bx-cx)*q-(bx-ax)*r)/(2.0*sign(std::max(fabs(q-r),TINY),q-r)); + ulim = bx+GLIMIT*(cx-bx); + // We won't go farther than this. Test various possibilities: + if ((bx-u)*(u-cx) > 0.0) { + // Parabolic u is between b and c: try it. + fu = f(u); + if (fu < fc) { + // Got a minimum between b and c. + ax = bx; + bx = u; + fa = fb; + fb = fu; + return; + } + else if (fu > fb) { + // Got a minimum between between a and u. */ + cx = u; + fc = fu; + return; + } + u = cx+GOLD*(cx-bx); + // Parabolic fit was no use. Use default magnification. + fu = f(u); + } + else if ((cx-u)*(u-ulim) > 0.0) { + // Parabolic fit is between c and its allowed limit. + fu = f(u); + if (fu < fc) { + bx = cx; cx = u; u = cx+GOLD*(cx-bx); + fb = fc; fc = fu; fu = f(u); + } + } + else if ((u-ulim)*(ulim-cx) >= 0.0) { + // Limit parabolic u to maximum allowed value. + u = ulim; + fu = f(u); + } + else { + // Reject parabolic u, use default magnification. + u = cx+GOLD*(cx-bx); + fu = f(u); + } + // Eliminate oldest point and continue. + ax = bx; bx = cx; cx = u; + fa = fb; fb = fc; fc = fu; + } + } // mnbrak() + + //! brent() was modified so that fxmin is the smallest f() seen so far, + //! and xmin is the value of x at which fxmin was encountered. + // + template + Float brent(const Float ax, const Float bx, const Float cx, F& f, + C& control, Float &xmin) + { + const Float CGOLD = 0.3819660; // Golden ratio + Float a, b, d=0.0, etemp, fu, fv, fw, fx; + Float p, q, r, u, v, w, x, xm; + Float e=0.0; + Float fxmin; + + a = (ax < cx ? ax : cx); + b = (ax > cx ? ax : cx); + fw = fv = fx = f(x = w = v = bx); + xmin = x; + fxmin = fx; + size_t max_nfeval = f.nfeval() + control.linmin_max_nfeval(); + for (size_t iter = 0; iter < 200; iter++) { + xm = 0.5*(a+b); + Float tol1 = control.linmin_rel_tol() * fabs(x) + control.linmin_abs_tol(); + Float tol2 = 2.0*tol1; + if (fabs(x-xm) <= (tol2-0.5*(b-a))) { + // xmin = x; + // return fx; + return fxmin; + } + if (control.linmin_max_nfeval() != 0 && f.nfeval() >= max_nfeval) { + // xmin = x; + // return fx; + return fxmin; + } + if (fabs(e) > tol1) { + r = (x-w)*(fx-fv); + q = (x-v)*(fx-fw); + p = (x-v)*q-(x-w)*r; + q = 2.0*(q-r); + if (q > 0.0) + p = -p; + q = fabs(q); + etemp=e; + e=d; + if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) + d=CGOLD*(e=(x >= xm ? a-x : b-x)); + else { + d=p/q; + u=x+d; + if (u-a < tol2 || b-u < tol2) + d=sign(tol1,xm-x); + } + } else { + d=CGOLD*(e=(x >= xm ? a-x : b-x)); + } + u=(fabs(d) >= tol1 ? x+d : x+sign(tol1,d)); + fu=f(u); + if (fu < fxmin) { + fxmin = fu; + xmin = u; + } + if (fu <= fx) { + if (u >= x) a=x; else b=x; + shift(v, w, x, u); + shift(fv, fw, fx, fu); + } else { + if (u < x) a=u; else b=u; + if (fu <= fw || w == x) { + v=w; + w=u; + fv=fw; + fw=fu; + } else if (fu <= fv || v == x || v == w) { + v=u; + fv=fu; + } + } + } + // xmin = x; + // return fx; + return fxmin; + } // brent() + + template + struct f1cache { + const size_t cache_size; //!< number of function evaluations to cache + F& f; //!< original function + Floats xs; //!< cached x values + Floats fs; //!< cached f vector + size_t last; //!< last position inserted + size_t nfeval_; //!< number of function evaluations + + f1cache(F& f, size_t cache_size=2) + : cache_size(cache_size), f(f), last(cache_size-1), nfeval_(0) { } + + Float operator() (Float x) { + for (size_t i = 0; i < cache_size; ++i) { + if (i >= xs.size()) { + Float fx = f(x); + xs.push_back(x); + fs.push_back(fx); + ++nfeval_; + return fx; + } + if (x == xs[i]) + return fs[i]; + } + Float fx = f(x); + if (++last >= xs.size()) + last = 0; + xs[last] = x; + fs[last] = fx; + ++nfeval_; + return fx; + } // f1cache::operator() + + size_t nfeval() const { return nfeval_; } + + }; // f1cache{} + + //! minimize1d() is the 1d-minimization routine. It returns the minimizing x value. + // + template + Float minimize1d(Float p, F& f, Float initial_step = 1.0, Float tol=1e-7, int max_nfeval=100) + { + control c(tol, tol, max_nfeval, max_nfeval); + f1cache func(f, c.cache_size()); + Float a = p; + Float x = a + initial_step; + Float b, fa, fx, fb, xmin; + + mnbrak(a, x, b, fa, fx, fb, func); + brent(a, x, b, func, c, xmin); // brent() returns min_f, if you want it + return xmin; + } // minimize1d() + + //! linmin_f{} maps the n-dimensional function into a 1-d function + //! in direction p + // + template + struct linmin_f { /* structure to communicate with linmin */ + F& f; + Floats& p; + Floats& xi; + Floats xt; + + linmin_f(F& f, Floats& p, Floats& xi) + : f(f), p(p), xi(xi), xt(p.size()) { } + + Float operator() (Float x) { + const size_t n = p.size(); + for (size_t j = 0; j < n; ++j) + xt[j] = p[j] + x*xi[j]; + Float fx = f(xt); + return fx; + } + + size_t nfeval() const { return f.nfeval; } + + }; // linmin_f{} + + template + Float linmin(Floats &p, Floats &xi, F& f, C& control) + { + assert(p.size() == xi.size()); + linmin_f f1dim(f, p, xi); + + Float a = 0.0; + Float x = control.linmin_xinit(); + Float b, fa, fx, fb, xmin; + + mnbrak(a, x, b, fa, fx, fb, f1dim); + Float fret = brent(a, x, b, f1dim, control, xmin); + for (size_t j = 0; j < p.size(); j++) { + xi[j] *= xmin; + p[j] += xi[j]; + } + return fret; + } // linmin() + + template + struct fcache { + typedef std::vector Fss; + const size_t cache_size; //!< number of function evaluations to cache + F& f; //!< original function + Fss xs; //!< cached x vectors + Floats fs; //!< cached f vector + size_t last; //!< last position inserted + size_t nfeval; //!< number of function evaluations + + fcache(F& f, size_t cache_size=2) + : cache_size(cache_size), f(f), last(cache_size-1), nfeval(0) { } + + Float operator() (const Floats& x) { + for (size_t i = 0; i < cache_size; ++i) { + if (i >= xs.size()) { + Float fx = f(x); + xs.push_back(x); + fs.push_back(fx); + ++nfeval; + return fx; + } + if (x == xs[i]) + return fs[i]; + } + Float fx = f(x); + if (++last >= xs.size()) + last = 0; + xs[last] = x; + fs[last] = fx; + ++nfeval; + return fx; + } // fcache::operator() + + }; // fcache{} + + // Minimization of a function func of n variables. Input consists of an initial + // starting point p[1..n]; an initial matrix xi[1..n][1..n], whose columns contain + // the initial set of directions (usually the n unit vectors); and ftol, the + // fractional tolerance in the function value such that failure to decrease by + // more than this amount on one iteration signals doneness. On output, p is set + // to the best point found, xi is the then-current direction set, fret is the + // returned function value at p, and iter is the number of iterations taken. + // The routine linmin is used. + // + template + Float minimize_n(Floats& p, F& f, Float initial_step, Control& cntrl) + { + fcache func(f, cntrl.cache_size()); + size_t n = p.size(); + + Array xi(n); // array of conjugate directions + for (size_t i = 0; i < n; ++i) { // initialize to unit vectors + xi[i].resize(n); + xi[i][i] = initial_step; + } + + Float del,fp,fptt,t; + + Floats pt(p); // Save the initial point + Floats ptt(n), xit(n); + + Float fret = func(p); + + for (size_t iter = 1; ; ++iter) { + fp = fret; + size_t ibig=0; + del = 0.0; // Will be the biggest function decrease. + for (size_t i = 0; i < n; i++) { // In each iteration, loop over all directions. + for (size_t j = 0; j < n; j++) // Copy the direction, + xit[j] = xi[j][i]; + fptt = fret; + fret = linmin(p, xit, func, cntrl); // minimize along it, + if (fptt-fret > del) { // and record it if it is the largest decrease + del = fptt-fret; // so far. + ibig = i+1; + } + } + + if (cntrl(p, fret, fp, iter, func.nfeval)) + return fret; // Normal termination + + for (size_t j = 0; j < n; j++) { // Construct the extrapolated point and the + ptt[j] = 2.0*p[j]-pt[j]; // average direction moved. Save the + xit[j] = p[j]-pt[j]; // old starting point. + pt[j] = p[j]; + } + + fptt = func(ptt); // Function value at extrapolated point. + + if (fptt < fp) { + t=2.0*(fp-2.0*fret+fptt)*SQR(fp-fret-del)-del*SQR(fp-fptt); + if (t < 0.0) { + fret = linmin(p, xit, func, cntrl); + for (size_t j = 0; j < n; j++) { + xi[j][ibig-1] = xi[j][n-1]; + xi[j][n-1] = xit[j]; + } + } + } + } + } // minimize_n() + + //! minimize() calls minimize_n() if p.size() > 1, otherwise it does a one-dimensional + //! minimization. + // + template + Float minimize(Floats& p, F& f, Float initial_step, Control& control) { + assert(p.size() > 0); + if (p.size() > 1) + return minimize_n(p, f, initial_step, control); + else { // 1-d minimization + fcache func(f, control.cache_size()); + Floats xi(1, 1); + return linmin(p, xi, func, control); + } + } // minimize() + + //! minimize() is a wrapper that calls the main minimization routine with a default control. + // + template + Float minimize(Floats& p, F& f, Float initial_step = 1.0) + { + control c; + return minimize(p, f, initial_step, c); + } // minimize() + +} // namespace powell + +#endif // POWELL_H + +/* + +// Code for testing the 1-d minimization routines + +#include +#include "powell.h" + +struct f_type { + double operator() (double x) const { + double fx = sin(x); + std::cout << "x = " << x << ", fx = " << fx << std::endl; + return fx; + } +}; + +int main(int argc, char** argv) { + f_type f; + double xmin = powell::minimize1d(0, f, 0.1, 1e-5, 10); + std::cout << "xmin = " << xmin << std::endl; +} +*/ diff --git a/second-stage/programs/wlle/utility.h b/second-stage/programs/wlle/utility.h index a6aa90c..19f48bc 100644 --- a/second-stage/programs/wlle/utility.h +++ b/second-stage/programs/wlle/utility.h @@ -894,6 +894,14 @@ inline std::ostream& operator<< (std::ostream& os, const boost::shared_ptr& s struct resource_usage { }; #ifndef __i386 +#define NO_PROC_SELF_STAT +#endif + +#ifdef __APPLE__ +#define NO_PROC_SELF_STAT +#endif + +#ifdef NO_PROC_SELF_STAT inline std::ostream& operator<< (std::ostream& os, resource_usage r) { return os; @@ -902,21 +910,24 @@ inline std::ostream& operator<< (std::ostream& os, resource_usage r) inline std::ostream& operator<< (std::ostream& os, resource_usage r) { FILE* fp = fopen("/proc/self/stat", "r"); - assert(fp); - int utime; - int stime; - unsigned int vsize; - unsigned int rss; - int result = - fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" - "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); - assert(result == 4); - fclose(fp); - // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss -; - // return s << "utime = " << utime << ", vsize = " << vsize; - return os << "utime " << float(utime)/1.0e2 << "s, vsize " - << float(vsize)/1048576.0 << " Mb."; + // Don't fail if we can't read that (such as on a Mac), just return. + if (fp == NULL) { + return os; + } else { + int utime; + int stime; + unsigned int vsize; + unsigned int rss; + int result = + fscanf(fp, "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %d %d %*d %*d %*d %*d" + "%*u %*u %*d %u %u", &utime, &stime, &vsize, &rss); + assert(result == 4); + fclose(fp); + // s << "utime = " << utime << ", stime = " << stime << ", vsize = " << vsize << ", rss = " << rss; + // return s << "utime = " << utime << ", vsize = " << vsize; + return os << "utime " << float(utime)/1.0e2 << "s, vsize " + << float(vsize)/1048576.0 << " Mb."; + } } #endif