diff --git a/README.md b/README.md index 1e2918c..e6bd0e0 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,25 @@ Splits into two PDFs, the first having 5 pages and second has rest (pdf/split-pdf-at :input "test/pdfs/multi-page.pdf" :split 5) ``` +### Export a PDF to an image +```clojure + (require '[pdfboxing.image :as image]) +``` +Export a thumbnail of a PDF +```clojure + (image/export-to-image :input "test/pdfs/multi-page.pdf") +``` + +Export a thumbnail with custom DPI (default is 300) +```clojure + (image/export-to-image :input "test/pdfs/multi-page.pdf" :dpi 72) +``` + +Export a thumbnail of custom page index (defaults to first page) +```clojure + (image/export-to-image :input "test/pdfs/multi-page.pdf" :page-idx 1) +``` + ### List form fields of a PDF To list fields and values: diff --git a/src/pdfboxing/common.clj b/src/pdfboxing/common.clj index a7d36e4..6eff771 100644 --- a/src/pdfboxing/common.clj +++ b/src/pdfboxing/common.clj @@ -5,6 +5,10 @@ (org.apache.pdfbox.io RandomAccessFile) (org.apache.pdfbox.pdfparser PDFParser))) +(defn throw-exception + [message] + (throw (IllegalArgumentException. message))) + (defn try-get-as-pdf "Try and get the pdf-file-or-path as a PDF. Returns nil if pdf-file-or-path could not be loaded as a PDF." diff --git a/src/pdfboxing/image.clj b/src/pdfboxing/image.clj new file mode 100644 index 0000000..6284b2e --- /dev/null +++ b/src/pdfboxing/image.clj @@ -0,0 +1,36 @@ +(ns pdfboxing.image + (:require [clojure.spec.alpha :as s] + [pdfboxing.common :as common] + [pdfboxing.info :as info]) + (:import [org.apache.pdfbox.rendering PDFRenderer ImageType] + [org.apache.pdfbox.pdmodel PDDocument] + [java.awt.image BufferedImage])) + +(s/def ::input #(or (instance? PDDocument %) + (common/is-pdf? %))) +(s/def ::dpi int?) +(s/def ::page-idx int?) +(s/def ::export-to-image-config + (s/keys :req-un [::input] + :opt-un [::dpi ::page-idx])) + +(s/def ::export-to-image-ret #(instance? BufferedImage %)) + +(defn- page-idx-in-bounds + [page-idx input] + (if (<= 0 page-idx (dec (info/page-number input))) + true + (common/throw-exception "Page index out of bounds"))) + +(defn export-to-image + "Export PDF or PDDocument into BufferedImage + Only one page will be exported (first by default). + Split the document first if you want one image for each page." + [& {:keys [input dpi page-idx] + :or {dpi 300 page-idx 0} + :as config}] + {:pre [(s/valid? ::export-to-image-config config) + (page-idx-in-bounds page-idx input)] + :post [(s/valid? ::export-to-image-ret %)]} + (with-open [doc (common/obtain-document input)] + (.renderImageWithDPI (PDFRenderer. doc) page-idx dpi ImageType/RGB))) diff --git a/src/pdfboxing/merge.clj b/src/pdfboxing/merge.clj index 1b90326..e67e1ed 100644 --- a/src/pdfboxing/merge.clj +++ b/src/pdfboxing/merge.clj @@ -8,36 +8,35 @@ (org.apache.pdfbox.pdmodel.common PDRectangle) (org.apache.pdfbox.pdmodel.graphics.image PDImageXObject))) -(defn throw-exception - [message] - (throw (IllegalArgumentException. message))) - (defn check-if-present "Check if the input & output file names where supplied" [input output] (when (some true? (map empty? [input output])) - (throw-exception "argument can't be empty"))) + (common/throw-exception "argument can't be empty"))) (defn check-for-pdfs "Check if all the files supplied are actual PDFs." [files] (if (some false? (map common/is-pdf? files)) - (throw-exception "the files supplied need to be PDFs") + (common/throw-exception "the files supplied need to be PDFs") true)) (defn arg-check [output input] (check-if-present input output) (if (sequential? input) (check-for-pdfs input) - (throw-exception "input - needs to be sequential"))) + (common/throw-exception "input - needs to be sequential"))) (defn merge-pdfs "merge multiple PDFs into output file" [& {:keys [output input]}] {:pre [(arg-check output input)]} (let [merger (PDFMergerUtility.)] - (doseq [f input] - (.addSource merger (FileInputStream. (File. f)))) + (doseq [f input + :let [file (if (string? f) + (File. f) + f)]] + (.addSource merger (FileInputStream. file))) (.setDestinationFileName merger output) (.mergeDocuments merger))) diff --git a/src/pdfboxing/split.clj b/src/pdfboxing/split.clj index 29c598b..70c7524 100644 --- a/src/pdfboxing/split.clj +++ b/src/pdfboxing/split.clj @@ -2,19 +2,20 @@ (:require [clojure.string :as s] [pdfboxing.common :as common] [pdfboxing.merge :as merge]) - (:import (org.apache.pdfbox.multipdf PDFMergerUtility Splitter))) + (:import [org.apache.pdfbox.multipdf PDFMergerUtility Splitter] + [java.io File])) (defn check-if-integer [coll] (if (every? integer? coll) true - (merge/throw-exception ":start and :end may only be integers"))) + (common/throw-exception ":start and :end may only be integers"))) (defn arg-check [input start end split] (let [int-args [start end split]] - (if (string? input) + (if (or (string? input) (instance? File input)) (merge/check-for-pdfs [input]) - (merge/throw-exception "input must be a string")) + (common/throw-exception "input must be a string")) (check-if-integer (filter (complement nil?) int-args)))) (defn pddocument->byte-array @@ -53,7 +54,8 @@ (into [] (.split splitter doc))))) (defn split-pdf-at - "Splits a pdf into two documents and writes them to disk" + "Splits a pdf into two documents and writes them to disk + If the split key is not provided then it will split the document approx. in half." [& {:keys [input split]}] (let [base-name (first (s/split input #".pdf")) f-names (for [x (range 1 3)] (str base-name "-" x ".pdf")) diff --git a/test/pdfboxing/image_test.clj b/test/pdfboxing/image_test.clj new file mode 100644 index 0000000..b4996a5 --- /dev/null +++ b/test/pdfboxing/image_test.clj @@ -0,0 +1,16 @@ +(ns pdfboxing.image-test + (:require [clojure.test :refer [deftest is]] + [pdfboxing.image :as image]) + (:import [java.awt.image BufferedImage])) + +(deftest export-to-image + (let [file "test/pdfs/multi-page.pdf" + exporting-outcome (image/export-to-image :input file) + exporting-outcome-other-page (image/export-to-image :input file :page-idx 1) + exporting-outcome-small-dpi (image/export-to-image :input file :dpi 72)] + (is (instance? BufferedImage exporting-outcome)) + (is (instance? BufferedImage exporting-outcome-other-page)) + (is (thrown? IllegalArgumentException (image/export-to-image :input file :page-idx 100))) + (is (instance? BufferedImage exporting-outcome-small-dpi)) + (is (not= exporting-outcome exporting-outcome-other-page)) + (is (> (.getWidth exporting-outcome) (.getWidth exporting-outcome-small-dpi))))) diff --git a/test/pdfboxing/merge_test.clj b/test/pdfboxing/merge_test.clj index 945ac51..fe0e92d 100644 --- a/test/pdfboxing/merge_test.clj +++ b/test/pdfboxing/merge_test.clj @@ -1,8 +1,9 @@ (ns pdfboxing.merge-test (:require [clojure.java.io :as io] - [clojure.test :refer [deftest is]] + [clojure.test :refer [deftest is testing]] [pdfboxing.common :as common] - [pdfboxing.merge :refer [arg-check merge-pdfs]])) + [pdfboxing.merge :refer [arg-check merge-pdfs]]) + (:import [java.io File])) (deftest input-output-argument-check (is (thrown? IllegalArgumentException (arg-check))) @@ -21,7 +22,14 @@ :input ["test/pdfs/clojure-1.pdf" "test/pdfs/clojure-2.pdf"]) merged-pdf-file (.exists (io/as-file file))] (is (true? merged-pdf-file)) - (is (true? (common/is-pdf? file))))) + (is (true? (common/is-pdf? file))) + + (testing "Accepts both file paths and File instances as an input" + (let [merging-outcome (merge-pdfs :output file + :input [(File. "test/pdfs/clojure-1.pdf") "test/pdfs/clojure-2.pdf"]) + merged-pdf-file (.exists (io/as-file file))] + (is (true? merged-pdf-file)) + (is (true? (common/is-pdf? file))))))) ;; clean up (defn clean-up [file]