IBM
diff --git a/‎vision/tensorrt-samples/README.md
+38 b/‎vision/tensorrt-samples/README.md
+38
diff --git a/‎vision/tensorrt-samples/samples/Makefile
+64 b/‎vision/tensorrt-samples/samples/Makefile
+64
diff --git a/‎vision/tensorrt-samples/samples/Makefile.config
+213 b/‎vision/tensorrt-samples/samples/Makefile.config
+213
diff --git a/‎vision/tensorrt-samples/samples/README.md
+36 b/‎vision/tensorrt-samples/samples/README.md
+36
@@ -0,0 +1,38 @@
+# vision-tensorrt-samples
+
+### Common Assumptions
+
+* The repository contains samples based on Nvidia's TensorRT c/c++ samples modified for ease of use. Target platforms are Power with GPUs and Nvidia Jetsons both native Linux or within docker containers.
+
+- Inputs can be arbitrary image files in terms of extension and resolution passed in as command line arguments
+- Outputs are list of classes and bboxes per image and debug images with bbox marked
+- Model can be arbitrary as long as it matches the model type (SSD, FRCNN for now, Yolo and Googlenet soon to follow) and that <model_name>_trt.prototxt and <model_name>.caffemodel are present and names adjusted in source code
+- Batch size can be arbitrary as long as it fits the device memory, adjustable in source code
+- Floating point precision can be arbitrary which affects the accuracy, speed and memory footprint, adjustable in source code
+- Number and names of classes can be arbitrary based on model, also adjustable in source code. The names could be read from label file but due to variations in syntax, left out for now
+- Number of classes is always one more than the label file since there is one background class.
+- Confidence level can be adjusted which determines the number of object recognized
+
+### Common Use
+
+* The samples first have to be compiled from source and ran from bin directory with command line parameters
+- It is assumed that cuda, cudnn, tensorrt, gcc, opencv are preinstalled and environment variables set (see below).
+- Copy the samples and make files over the respective TensorRT sample directories
+- Modify the code to match desired model, batch size, floating point precision, image folder, names of classes
+- Compile the source code via make from respective samples directory
+- Run the binary (release or debug) from the bin folder and pass in the file names as "name.ext" "name.ext" without the folder path.
+- On initial run, if TensorRT engine for the model has not been run before, it will take a little while to parse and serialize it to file
+- On subsequent runs if no changes were made to the model or engine parameters, the engine will be deserialized from an earlier saved one
+
+### Common Prerequisites
+
+* Following are prerequisite steps to have the correct native or docker environment both for Power and Jetson, build and runtime
+
+- If building on Power within docker best is to start with nvidia/cuda-ppc64le:10.1-cudnn7-devel-ubuntu18.04 docker and add latest TensorRT SDK (currently 5.1.3.2, cuda 10.1, cudnn 7.5, for Power)
+- Install or build opencv version 3.3.1 and above
+- If building on Jetson TX2 native, follow the steps described in Nvidia Jetpack installation. This requires an Ubuntu host machine to initially flash the board via Jetpack Manager (currently 4.2.2). All the prerequisites, if checked, during installation are preinstalled and ready for use.
+-  Note that Host Machine needs to be unchecked and TensorFlow can be unchecked and Jetson TX2 checked.
+
+- Please follow the rest of the prerequisite instructions from the Nvidia samples README.md
+
+
@@ -0,0 +1,64 @@
+SHELL=/bin/bash -o pipefail
+TARGET?=$(shell uname -m)
+LIBDIR?=lib
+VERBOSE?=0
+ifeq ($(VERBOSE), 1)
+AT=
+else
+AT=@
+endif
+CUDA_TRIPLE=x86_64-linux
+CUBLAS_TRIPLE=x86_64-linux-gnu
+DLSW_TRIPLE=x86_64-linux-gnu
+ifeq ($(TARGET), aarch64)
+CUDA_TRIPLE=aarch64-linux
+CUBLAS_TRIPLE=aarch64-linux-gnu
+DLSW_TRIPLE=aarch64-linux-gnu
+endif
+ifeq ($(TARGET), qnx)
+CUDA_TRIPLE=aarch64-qnx
+CUBLAS_TRIPLE=aarch64-qnx-gnu
+DLSW_TRIPLE=aarch64-unknown-nto-qnx
+endif
+ifeq ($(TARGET), ppc64le)
+CUDA_TRIPLE=ppc64le-linux
+CUBLAS_TRIPLE=ppc64le-linux
+DLSW_TRIPLE=ppc64le-linux
+endif
+ifeq ($(TARGET), android64)
+DLSW_TRIPLE=aarch64-linux-androideabi
+CUDA_TRIPLE=$(DLSW_TRIPLE)
+CUBLAS_TRIPLE=$(DLSW_TRIPLE)
+endif
+export TARGET
+export VERBOSE
+export LIBDIR
+export CUDA_TRIPLE
+export CUBLAS_TRIPLE
+export DLSW_TRIPLE
+samples=sampleCharRNN sampleFasterRCNN sampleGoogleNet sampleINT8 sampleINT8API sampleMLP sampleMNIST sampleMNISTAPI sampleMovieLens sampleOnnxMNIST samplePlugin sampleSSD sampleUffMNIST sampleUffSSD trtexec
+
+# sampleMovieLensMPS should only be compiled for Linux targets.
+# sample uses Linux specific shared memory and IPC libraries.
+ifeq ($(TARGET),x86_64)
+samples += sampleMovieLensMPS
+endif
+
+.PHONY: all clean help
+all:
+	$(AT)$(foreach sample,$(samples), $(MAKE) -C $(sample) &&) :
+
+clean:
+	$(AT)$(foreach sample,$(samples), $(MAKE) clean -C $(sample) &&) :
+
+help:
+	$(AT)echo "Sample building help menu."
+	$(AT)echo "Samples:"
+	$(AT)$(foreach sample,$(samples), echo "\t$(sample)" &&) :
+	$(AT)echo "\nCommands:"
+	$(AT)echo "\tall - build all samples."
+	$(AT)echo "\tclean - clean all samples."
+	$(AT)echo "\nVariables:"
+	$(AT)echo "\tTARGET - Specify the target to build for."
+	$(AT)echo "\tVERBOSE - Specify verbose output."
+	$(AT)echo "\tCUDA_INSTALL_DIR - Directory where cuda installs to."
@@ -0,0 +1,213 @@
+.SUFFIXES:
+CUDA_TRIPLE?=x86_64-linux
+CUBLAS_TRIPLE?=x86_64-linux-gnu
+DLSW_TRIPLE?=x86_64-linux-gnu
+TARGET?=$(shell uname -m)
+ifeq ($(CUDA_INSTALL_DIR),)
+$(warning CUDA_INSTALL_DIR variable is not specified, using /usr/local/cuda by default, use CUDA_INSTALL_DIR=<cuda_directory> to change.)
+endif
+ifeq ($(CUDNN_INSTALL_DIR),)
+$(warning CUDNN_INSTALL_DIR variable is not specified, using $$CUDA_INSTALL_DIR by default, use CUDNN_INSTALL_DIR=<cudnn_directory> to change.)
+endif
+CUDA_INSTALL_DIR?=/usr/local/cuda
+CUDNN_INSTALL_DIR?=$(CUDA_INSTALL_DIR)
+CUDA_LIBDIR=lib
+CUDNN_LIBDIR=lib64
+ifeq ($(TARGET), aarch64)
+ifeq ($(shell uname -m), aarch64)
+CUDA_LIBDIR=lib64
+CC = g++
+else
+CC = aarch64-linux-gnu-g++
+endif
+CUCC =$(CUDA_INSTALL_DIR)/bin/nvcc -m64 -ccbin $(CC)
+else ifeq ($(TARGET), x86_64)
+CUDA_LIBDIR=lib64
+CC = g++
+CUCC =$(CUDA_INSTALL_DIR)/bin/nvcc -m64
+else ifeq ($(TARGET), ppc64le)
+CUDA_LIBDIR=lib64
+CC = g++
+CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64
+else ifeq ($(TARGET), qnx)
+CC = ${QNX_HOST}/usr/bin/aarch64-unknown-nto-qnx7.0.0-g++
+CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64 -ccbin $(CC)
+else ifeq ($(TARGET), android64)
+ifeq ($(ANDROID_CC),)
+$(error ANDROID_CC must be set to the clang compiler to build for android 64bit, for example /path/to/my-toolchain/bin/aarch64-linux-android-clang++)
+endif
+CUDA_LIBDIR=lib
+ANDROID_FLAGS=-DANDROID -D_GLIBCXX_USE_C99=1 -Wno-sign-compare -D__aarch64__ -Wno-strict-aliasing -Werror -pie -fPIE -Wno-unused-command-line-argument
+COMMON_FLAGS+=$(ANDROID_FLAGS)
+COMMON_LD_FLAGS+=$(ANDROID_FLAGS)
+CC=$(ANDROID_CC)
+CUCC = $(CUDA_INSTALL_DIR)/bin/nvcc -m64 -ccbin $(CC) --compiler-options="-DANDROID -D_GLIBCXX_USE_C99=1 -Wno-sign-compare"
+ANDROID=1
+else ########
+$(error Auto-detection of platform failed. Please specify one of the following arguments to make: TARGET=[aarch64|x86_64|qnx|android64])
+endif
+
+ifdef VERBOSE
+AT=
+else
+AT=@
+endif
+
+AR = ar cr
+ECHO = @echo
+
+SHELL=/bin/sh
+
+ROOT_PATH=../..
+OUT_PATH=$(ROOT_PATH)/bin
+OUTDIR=$(OUT_PATH)
+
+define concat
+$1$2$3$4$5$6$7$8
+endef
+
+#$(call make-depend,source-file,object-file,depend-file)
+define make-depend
+  $(AT)$(CC) -MM -MF $3 -MP -MT $2 $(COMMON_FLAGS) $1
+endef
+
+#$(call make-cuda-depend,source-file,object-file,depend-file,flags)
+define make-cuda-depend
+  $(AT)$(CUCC) -M -MT $2 $4 $1 > $3
+endef
+
+#########################
+INCPATHS=
+LIBPATHS=
+# add cross compile directories
+ifneq ($(shell uname -m), $(TARGET))
+INCPATHS += -I"/usr/include/$(DLSW_TRIPLE)" -I"/usr/include/$(CUBLAS_TRIPLE)"
+LIBPATHS += -L"../lib/stubs" -L"../../lib/stubs" -L"/usr/lib/$(DLSW_TRIPLE)/stubs" -L"/usr/lib/$(DLSW_TRIPLE)" -L"/usr/lib/$(CUBLAS_TRIPLE)/stubs" -L"/usr/lib/$(CUBLAS_TRIPLE)" -L"$(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/$(CUDA_LIBDIR)/stubs" -L"$(CUDA_INSTALL_DIR)/targets/$(CUDA_TRIPLE)/$(CUDA_LIBDIR)"
+endif
+INCPATHS += -I"../common" -I"$(CUDA_INSTALL_DIR)/include" -I"$(CUDNN_INSTALL_DIR)/include" -I"../include" -I"../../include"
+LIBPATHS += -L"$(CUDA_INSTALL_DIR)/$(CUDA_LIBDIR)" -L"$(CUDNN_INSTALL_DIR)/$(CUDNN_LIBDIR)" -L"../lib" -L"../../lib"
+
+.SUFFIXES:
+vpath %.h $(EXTRA_DIRECTORIES)
+vpath %.cpp $(EXTRA_DIRECTORIES)
+
+COMMON_FLAGS += -Wall -std=c++11 $(INCPATHS)
+ifneq ($(ANDROID),1)
+COMMON_FLAGS += -D_REENTRANT
+endif
+ifeq ($(TARGET), qnx)
+COMMON_FLAGS += -D_POSIX_C_SOURCE=200112L -D_QNX_SOURCE -D_FILE_OFFSET_BITS=64 -fpermissive
+endif
+
+COMMON_LD_FLAGS += $(LIBPATHS) -L$(OUTDIR)
+
+OBJDIR    =$(call concat,$(OUTDIR),/chobj)
+DOBJDIR   =$(call concat,$(OUTDIR),/dchobj)
+
+COMMON_LIBS = -lcudnn -lcublas -lcudart -lopencv_dnn -lopencv_ml -lopencv_objdetect -lopencv_shape -lopencv_stitching -lopencv_superres -lopencv_videostab -lopencv_calib3d -lopencv_features2d -lopencv_highgui -lopencv_videoio -lopencv_imgcodecs -lopencv_video -lopencv_photo -lopencv_imgproc -lopencv_flann -lopencv_core -ldl -lm -lpthread -lrt -ltbb
+
+ifneq ($(TARGET), qnx)
+ifneq ($(ANDROID),1)
+COMMON_LIBS += -lrt -ldl -lpthread
+endif
+endif
+ifeq ($(ANDROID),1)
+COMMON_LIBS += -lculibos -llog
+endif
+
+LIBS  =-lnvinfer -lnvparsers -lnvinfer_plugin -lnvonnxparser $(COMMON_LIBS)
+DLIBS =-lnvinfer -lnvparsers -lnvinfer_plugin -lnvonnxparser $(COMMON_LIBS)
+OBJS   =$(patsubst %.cpp, $(OBJDIR)/%.o, $(wildcard *.cpp $(addsuffix /*.cpp, $(EXTRA_DIRECTORIES))))
+DOBJS  =$(patsubst %.cpp, $(DOBJDIR)/%.o, $(wildcard *.cpp $(addsuffix  /*.cpp, $(EXTRA_DIRECTORIES))))
+CUOBJS =$(patsubst %.cu, $(OBJDIR)/%.o, $(wildcard *.cu $(addsuffix  /*.cu, $(EXTRA_DIRECTORIES))))
+CUDOBJS =$(patsubst %.cu, $(DOBJDIR)/%.o, $(wildcard *.cu $(addsuffix  /*.cu, $(EXTRA_DIRECTORIES))))
+
+CFLAGS=$(COMMON_FLAGS)
+CFLAGSD=$(COMMON_FLAGS) -g
+LFLAGS=$(COMMON_LD_FLAGS)
+LFLAGSD=$(COMMON_LD_FLAGS)
+
+all: debug release
+release : $(OUTDIR)/$(OUTNAME_RELEASE)
+
+debug   : $(OUTDIR)/$(OUTNAME_DEBUG)
+
+test: test_debug test_release
+
+test_debug:
+	$(AT)cd $(OUTDIR) && ./$(OUTNAME_DEBUG)
+
+test_release:
+	$(AT)cd $(OUTDIR) && ./$(OUTNAME_RELEASE)
+
+ifdef MAC
+$(OUTDIR)/$(OUTNAME_RELEASE) : $(OBJS) $(CUOBJS)
+	$(ECHO) Linking: $@
+	$(AT)$(CC) -o $@ $^ $(LFLAGS) $(LIBS)
+	# Copy every EXTRA_FILE of this sample to bin dir
+	$(foreach EXTRA_FILE,$(EXTRA_FILES), cp -f $(EXTRA_FILE) $(OUTDIR)/$(EXTRA_FILE); )
+
+$(OUTDIR)/$(OUTNAME_DEBUG) : $(DOBJS) $(CUDOBJS)
+	$(ECHO) Linking: $@
+	$(AT)$(CC) -o $@ $^ $(LFLAGSD) $(DLIBS)
+else
+$(OUTDIR)/$(OUTNAME_RELEASE) : $(OBJS) $(CUOBJS)
+	$(ECHO) Linking: $@
+	$(AT)$(CC) -o $@ $^ $(LFLAGS) -Wl,--start-group $(LIBS) -Wl,--end-group
+	# Copy every EXTRA_FILE of this sample to bin dir
+	$(foreach EXTRA_FILE,$(EXTRA_FILES), cp -f $(EXTRA_FILE) $(OUTDIR)/$(EXTRA_FILE); )
+
+$(OUTDIR)/$(OUTNAME_DEBUG) : $(DOBJS) $(CUDOBJS)
+	$(ECHO) Linking: $@
+	$(AT)$(CC) -o $@ $^ $(LFLAGSD) -Wl,--start-group $(DLIBS) -Wl,--end-group
+endif
+
+$(OBJDIR)/%.o: %.cpp
+	$(AT)if [ ! -d $(OBJDIR) ]; then mkdir -p $(OBJDIR); fi
+	$(foreach XDIR,$(EXTRA_DIRECTORIES), if [ ! -d $(OBJDIR)/$(XDIR) ]; then mkdir -p $(OBJDIR)/$(XDIR); fi;) :
+	$(call make-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling: $<
+	$(AT)$(CC) $(CFLAGS) -c -o $@ $<
+
+$(DOBJDIR)/%.o: %.cpp
+	$(AT)if [ ! -d $(DOBJDIR) ]; then mkdir -p $(DOBJDIR); fi
+	$(foreach XDIR,$(EXTRA_DIRECTORIES), if [ ! -d $(OBJDIR)/$(XDIR) ]; then mkdir -p $(DOBJDIR)/$(XDIR); fi;) :
+	$(call make-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling: $<
+	$(AT)$(CC) $(CFLAGSD) -c -o $@ $<
+
+######################################################################### CU
+$(OBJDIR)/%.o: %.cu
+	$(AT)if [ ! -d $(OBJDIR) ]; then mkdir -p $(OBJDIR); fi
+	$(foreach XDIR,$(EXTRA_DIRECTORIES), if [ ! -d $(OBJDIR)/$(XDIR) ]; then mkdir -p $(OBJDIR)/$(XDIR); fi;) :
+	$(call make-cuda-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling CUDA release: $<
+	$(AT)$(CUCC) $(CUFLAGS) -c -o $@ $<
+
+$(DOBJDIR)/%.o: %.cu
+	$(AT)if [ ! -d $(DOBJDIR) ]; then mkdir -p $(DOBJDIR); fi
+	$(foreach XDIR,$(EXTRA_DIRECTORIES), if [ ! -d $(DOBJDIR)/$(XDIR) ]; then mkdir -p $(DOBJDIR)/$(XDIR); fi;) :
+	$(call make-cuda-depend,$<,$@,$(subst .o,.d,$@))
+	$(ECHO) Compiling CUDA debug: $<
+	$(AT)$(CUCC) $(CUFLAGSD) -c -o $@ $<
+
+clean:
+	$(ECHO) Cleaning...
+	$(AT)-rm -rf $(OBJDIR) $(DOBJDIR) $(OUTDIR)/$(OUTNAME_RELEASE) $(OUTDIR)/$(OUTNAME_DEBUG)
+
+ifneq "$(MAKECMDGOALS)" "clean"
+  -include $(OBJDIR)/*.d $(DOBJDIR)/*.d
+
+ifeq ($(DO_CUDNN_CHECK), 1)
+# To display newlines in the message.
+define _cudnn_missing_newline_5020fd0
+
+
+endef
+SHELL=/bin/bash
+CUDNN_CHECK = $(shell echo -e '\#include <cudnn.h>\nint main(){ cudnnCreate(nullptr); return 0; }' | $(CC) -xc++ -o /dev/null $(CFLAGS) $(LFLAGS) - $(COMMON_LIBS) 2> /dev/null && echo 'passed_cudnn_exists_check')
+ifneq ($(CUDNN_CHECK), passed_cudnn_exists_check)
+$(error $(_cudnn_missing_newline_5020fd0)$(_cudnn_missing_newline_5020fd0)This sample requires CUDNN, but it could not be found.$(_cudnn_missing_newline_5020fd0)Please install CUDNN from https://developer.nvidia.com/cudnn or specify CUDNN_INSTALL_DIR when compiling.$(_cudnn_missing_newline_5020fd0)For example, `make CUDNN_INSTALL_DIR=/path/to/CUDNN/` where /path/to/CUDNN/ contains include/ and lib/ subdirectories.$(_cudnn_missing_newline_5020fd0)$(_cudnn_missing_newline_5020fd0))
+endif # ifneq ($(CUDNN_CHECK), passed_cudnn_exists_check)
+endif # ifeq ($(DO_CUDNN_CHECK), 1)
+endif # ifneq "$(MAKECMDGOALS)" "clean"
@@ -0,0 +1,36 @@
+### Common Assumptions
+
+* The repository contains samples based on Nvidia's TensorRT c/c++ samples modified for ease of use. Target platforms are Power with GPUs and Nvidia Jetsons both native Linux or within docker containers.
+
+- Inputs can be arbitrary image files in terms of extension and resolution passed in as command line arguments
+- Outputs are list of classes and bboxes per image and debug images with bbox marked
+- Model can be arbitrary as long as it matches the model type (SSD, FRCNN for now, Yolo, Googlenet soon to follow) and that <model_name>_trt.prototxt and <model_name>.caffemodel are present and names adjusted in source code
+- Batch size can be arbitrary as long as it fits the device memory, adjustable in source code
+- Floating point precision can be arbitrary which affect the accuracy, speed and memory footprint, adjustable in source code
+- Number and names of classes can be arbitrary, also adjustable in source code. The names could be read from label file but due to variations in syntax, left for later revision.
+- Number of classes is always one more than label file since there is one background class.
+- Confidence level can be adjusted which determines the number of object recognized
+
+### Common Use
+
+* The samples first have to be compiled and ran from bin directory with command line parameters
+- It is assumed that cuda, cudnn, tensorrt, gcc, opencv are preinstalled and environment variables set (see below).
+- Copy the samples over the respective TensorRT sample directories
+- Modify the code to match desired model, batch size, floating point precision, image folder, names of classes
+- Compile the source code via make from respective samples directory
+- Run the binary (release or debug) from the bin folder and pass in the file names as "name.ext" "name.ext" etc.
+- On initial run if TensorRT engine for the model has not been run before, it will take a little time to parse and serialize it to file
+- On subsequent runs if no changes were made to the model, the engine will be deserialized from earlier saved one
+
+### Common Prerequisites
+
+* Following are prerequisite steps to have the correct native or docker environment both for Power and Jetson, build and runtime
+
+- If building on Power within docker best is to start with nvidia/cuda-ppc64le:10.1-cudnn7-devel-ubuntu18.04 docker and add latest TensorRT SDK (currently 5.1.3.2, cuda 10.1, cudnn 7.5 for Power)
+- Install or build opencv version 3.3.1 and above
+- If building on Jetson TX2 native, follow the steps described in Nvidia Jetpack installation. This requires an Ubuntu host machine to initially flash the board via Jetpack manager (currently 4.2.2). All the prerequisites, if checked, during installation are preinstalled and ready for use.
+- Note that Host Machine needs to be unchecked and TensorFlow can be unchecked and Jetson TX2 checked.
+
+- Please follow the rest of the instructions from the Nvidia samples README.md
+
+