From e1502138ae73d504165569cb2b67300635e3bb5c Mon Sep 17 00:00:00 2001 From: Anis Ahmad Date: Thu, 25 Jan 2018 22:18:41 +0600 Subject: [PATCH] Releasing the first working version --- .gitignore | 3 + README.md | 4 +- main.go | 168 +++++++++++++++++++++++++++++++++++++++++++++++++++++ util.go | 45 ++++++++++++++ 4 files changed, 219 insertions(+), 1 deletion(-) create mode 100644 main.go create mode 100644 util.go diff --git a/.gitignore b/.gitignore index a1338d6..7a056d7 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,6 @@ # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 .glide/ + +# Project specific +merge2pdf \ No newline at end of file diff --git a/README.md b/README.md index 6f16cef..b3bc39d 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,9 @@ If you fix a bug or want to add/improve a feature, and it's alligned to the focus (merging with ease) of this tool, I will be glad to accept your PR. -Thanks +### Thanks + +This tool was made using the beautiful [Unidoc](https://unidoc.io/) library. Thanks to **Unidoc**. --- > "This is the Book about which there is no doubt, a guidance for those conscious of Allah" - [Al-Quran](http://quran.com) diff --git a/main.go b/main.go new file mode 100644 index 0000000..c8de806 --- /dev/null +++ b/main.go @@ -0,0 +1,168 @@ +package main + +import ( + "errors" + "fmt" + "io" + "os" + "strconv" + "strings" + + unicommon "github.com/unidoc/unidoc/common" + pdf "github.com/unidoc/unidoc/pdf/model" +) + +func init() { + // Debug log level. + unicommon.SetLogger(unicommon.NewConsoleLogger(unicommon.LogLevelDebug)) +} + +func main() { + if len(os.Args) < 3 { + fmt.Printf("Requires at least 3 arguments: output_path and 2 input paths(and optional page numbers) \n") + fmt.Printf("Usage: merge2pdf output.pdf input1.pdf input2.pdf~1,2,3 ...\n") + os.Exit(0) + } + + outputPath := os.Args[1] + inputPaths := []string{} + inputPages := [][]int{} + + // Sanity check the input arguments. + for _, arg := range os.Args[2:] { + //inputPaths = append(inputPaths, arg) + + fileInputParts := strings.Split(arg, "~") + inputPaths = append(inputPaths, fileInputParts[0]) + pages := []int{} + + if len(fileInputParts) > 1 { + for _, e := range strings.Split(fileInputParts[1], ",") { + pageNo, err := strconv.Atoi(strings.Trim(e, " \n")) + if err != nil { + fmt.Errorf("Invalid format! Example of a file input with page numbers: path/to/abc.pdf~1,2,3,5,6") + os.Exit(1) + } + pages = append(pages, pageNo) + } + } + + inputPages = append(inputPages, pages) + } + + // fmt.Println(inputPages) + // os.Exit(1) + + err := mergePdf(inputPaths, inputPages, outputPath) + if err != nil { + fmt.Printf("Error: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Complete, see output file: %s\n", outputPath) +} + +func mergePdf(inputPaths []string, inputPages [][]int, outputPath string) error { + pdfWriter := pdf.NewPdfWriter() + + for i, inputPath := range inputPaths { + + f, err := os.Open(inputPath) + if err != nil { + return err + } + defer f.Close() + + fileType, typeError := getFileType(f) + if typeError != nil { + return nil + } + + if fileType == "directory" { + // @TODO : Read all files in directory + return errors.New(inputPath + " is a drectory.") + } else if fileType == "application/pdf" { + err := addPdfPages(f, inputPages[i], &pdfWriter) + if err != nil { + return err + } + } else if ok, _ := in_array(fileType, []string{"image/jpg", "image/jpeg", "image/png"}); ok { + return errors.New(inputPath + " Images is not supproted yet.") + } + + } + + fWrite, err := os.Create(outputPath) + if err != nil { + return err + } + defer fWrite.Close() + + err = pdfWriter.Write(fWrite) + if err != nil { + return err + } + + return nil +} + +func getReader(rs io.ReadSeeker) (*pdf.PdfReader, error) { + + pdfReader, err := pdf.NewPdfReader(rs) + if err != nil { + return nil, err + } + + isEncrypted, err := pdfReader.IsEncrypted() + if err != nil { + return nil, err + } + + if isEncrypted { + auth, err := pdfReader.Decrypt([]byte("")) + if err != nil { + return nil, err + } + if !auth { + return nil, errors.New("Cannot merge encrypted, password protected document") + } + } + + return pdfReader, nil +} + +func addPdfPages(file io.ReadSeeker, pages []int, writer *pdf.PdfWriter) error { + pdfReader, err := getReader(file) + if err != nil { + return err + } + + if len(pages) > 0 { + for _, pageNo := range pages { + if page, pageErr := pdfReader.GetPage(pageNo); pageErr != nil { + return pageErr + } else { + err = writer.AddPage(page) + } + } + } else { + numPages, err := pdfReader.GetNumPages() + if err != nil { + return err + } + for i := 0; i < numPages; i++ { + pageNum := i + 1 + + page, err := pdfReader.GetPage(pageNum) + if err != nil { + return err + } + + if err = writer.AddPage(page); err != nil { + return err + } + } + } + + return nil +} diff --git a/util.go b/util.go new file mode 100644 index 0000000..bc87257 --- /dev/null +++ b/util.go @@ -0,0 +1,45 @@ +package main + +import ( + "net/http" + "os" + "reflect" +) + +func in_array(val interface{}, array interface{}) (exists bool, index int) { + exists = false + index = -1 + + switch reflect.TypeOf(array).Kind() { + case reflect.Slice: + s := reflect.ValueOf(array) + + for i := 0; i < s.Len(); i++ { + if reflect.DeepEqual(val, s.Index(i).Interface()) == true { + index = i + exists = true + return + } + } + } + + return +} + +func getFileType(file *os.File) (string, error) { + // Only the first 512 bytes are used to sniff the content type. + if info, stateErr := file.Stat(); stateErr != nil { + return "error", stateErr + } else if info.IsDir() { + return "directory", nil + } else { + buffer := make([]byte, 512) + _, readError := file.Read(buffer) + if readError != nil { + return "error", readError + } + + // Always returns a valid content-type and "application/octet-stream" if no others seemed to match. + return http.DetectContentType(buffer), nil + } +}