Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for exporting pdf to image #56

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,25 @@ Splits into two PDFs, the first having 5 pages and second has rest
(pdf/split-pdf-at :input "test/pdfs/multi-page.pdf" :split 5)
```

### Export a PDF to an image
```clojure
(require '[pdfboxing.image :as image])
```
Export a thumbnail of a PDF
```clojure
(image/export-to-image :input "test/pdfs/multi-page.pdf")
```

Export a thumbnail with custom DPI (default is 300)
```clojure
(image/export-to-image :input "test/pdfs/multi-page.pdf" :dpi 72)
```

Export a thumbnail of custom page index (defaults to first page)
```clojure
(image/export-to-image :input "test/pdfs/multi-page.pdf" :page-idx 1)
```

### List form fields of a PDF

To list fields and values:
Expand Down
4 changes: 4 additions & 0 deletions src/pdfboxing/common.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
(org.apache.pdfbox.io RandomAccessFile)
(org.apache.pdfbox.pdfparser PDFParser)))

(defn throw-exception
[message]
(throw (IllegalArgumentException. message)))

(defn try-get-as-pdf
"Try and get the pdf-file-or-path as a PDF.
Returns nil if pdf-file-or-path could not be loaded as a PDF."
Expand Down
36 changes: 36 additions & 0 deletions src/pdfboxing/image.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
(ns pdfboxing.image
(:require [clojure.spec.alpha :as s]
[pdfboxing.common :as common]
[pdfboxing.info :as info])
(:import [org.apache.pdfbox.rendering PDFRenderer ImageType]
[org.apache.pdfbox.pdmodel PDDocument]
[java.awt.image BufferedImage]))

(s/def ::input #(or (instance? PDDocument %)
(common/is-pdf? %)))
(s/def ::dpi int?)
(s/def ::page-idx int?)
(s/def ::export-to-image-config
(s/keys :req-un [::input]
:opt-un [::dpi ::page-idx]))

(s/def ::export-to-image-ret #(instance? BufferedImage %))

(defn- page-idx-in-bounds
[page-idx input]
(if (<= 0 page-idx (dec (info/page-number input)))
true
(common/throw-exception "Page index out of bounds")))

(defn export-to-image
"Export PDF or PDDocument into BufferedImage
Only one page will be exported (first by default).
Split the document first if you want one image for each page."
[& {:keys [input dpi page-idx]
:or {dpi 300 page-idx 0}
:as config}]
{:pre [(s/valid? ::export-to-image-config config)
(page-idx-in-bounds page-idx input)]
:post [(s/valid? ::export-to-image-ret %)]}
(with-open [doc (common/obtain-document input)]
(.renderImageWithDPI (PDFRenderer. doc) page-idx dpi ImageType/RGB)))
17 changes: 8 additions & 9 deletions src/pdfboxing/merge.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,35 @@
(org.apache.pdfbox.pdmodel.common PDRectangle)
(org.apache.pdfbox.pdmodel.graphics.image PDImageXObject)))

(defn throw-exception
[message]
(throw (IllegalArgumentException. message)))

(defn check-if-present
"Check if the input & output file names where supplied"
[input output]
(when (some true? (map empty? [input output]))
(throw-exception "argument can't be empty")))
(common/throw-exception "argument can't be empty")))

(defn check-for-pdfs
"Check if all the files supplied are actual PDFs."
[files]
(if (some false? (map common/is-pdf? files))
(throw-exception "the files supplied need to be PDFs")
(common/throw-exception "the files supplied need to be PDFs")
true))

(defn arg-check [output input]
(check-if-present input output)
(if (sequential? input)
(check-for-pdfs input)
(throw-exception "input - needs to be sequential")))
(common/throw-exception "input - needs to be sequential")))

(defn merge-pdfs
"merge multiple PDFs into output file"
[& {:keys [output input]}]
{:pre [(arg-check output input)]}
(let [merger (PDFMergerUtility.)]
(doseq [f input]
(.addSource merger (FileInputStream. (File. f))))
(doseq [f input
:let [file (if (string? f)
(File. f)
f)]]
(.addSource merger (FileInputStream. file)))
(.setDestinationFileName merger output)
(.mergeDocuments merger)))

Expand Down
12 changes: 7 additions & 5 deletions src/pdfboxing/split.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
(:require [clojure.string :as s]
[pdfboxing.common :as common]
[pdfboxing.merge :as merge])
(:import (org.apache.pdfbox.multipdf PDFMergerUtility Splitter)))
(:import [org.apache.pdfbox.multipdf PDFMergerUtility Splitter]
[java.io File]))

(defn check-if-integer
[coll]
(if (every? integer? coll)
true
(merge/throw-exception ":start and :end may only be integers")))
(common/throw-exception ":start and :end may only be integers")))

(defn arg-check [input start end split]
(let [int-args [start end split]]
(if (string? input)
(if (or (string? input) (instance? File input))
(merge/check-for-pdfs [input])
(merge/throw-exception "input must be a string"))
(common/throw-exception "input must be a string"))
(check-if-integer (filter (complement nil?) int-args))))

(defn pddocument->byte-array
Expand Down Expand Up @@ -53,7 +54,8 @@
(into [] (.split splitter doc)))))

(defn split-pdf-at
"Splits a pdf into two documents and writes them to disk"
"Splits a pdf into two documents and writes them to disk
If the split key is not provided then it will split the document approx. in half."
[& {:keys [input split]}]
(let [base-name (first (s/split input #".pdf"))
f-names (for [x (range 1 3)] (str base-name "-" x ".pdf"))
Expand Down
16 changes: 16 additions & 0 deletions test/pdfboxing/image_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
(ns pdfboxing.image-test
(:require [clojure.test :refer [deftest is]]
[pdfboxing.image :as image])
(:import [java.awt.image BufferedImage]))

(deftest export-to-image
(let [file "test/pdfs/multi-page.pdf"
exporting-outcome (image/export-to-image :input file)
exporting-outcome-other-page (image/export-to-image :input file :page-idx 1)
exporting-outcome-small-dpi (image/export-to-image :input file :dpi 72)]
(is (instance? BufferedImage exporting-outcome))
(is (instance? BufferedImage exporting-outcome-other-page))
(is (thrown? IllegalArgumentException (image/export-to-image :input file :page-idx 100)))
(is (instance? BufferedImage exporting-outcome-small-dpi))
(is (not= exporting-outcome exporting-outcome-other-page))
(is (> (.getWidth exporting-outcome) (.getWidth exporting-outcome-small-dpi)))))
14 changes: 11 additions & 3 deletions test/pdfboxing/merge_test.clj
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
(ns pdfboxing.merge-test
(:require [clojure.java.io :as io]
[clojure.test :refer [deftest is]]
[clojure.test :refer [deftest is testing]]
[pdfboxing.common :as common]
[pdfboxing.merge :refer [arg-check merge-pdfs]]))
[pdfboxing.merge :refer [arg-check merge-pdfs]])
(:import [java.io File]))

(deftest input-output-argument-check
(is (thrown? IllegalArgumentException (arg-check)))
Expand All @@ -21,7 +22,14 @@
:input ["test/pdfs/clojure-1.pdf" "test/pdfs/clojure-2.pdf"])
merged-pdf-file (.exists (io/as-file file))]
(is (true? merged-pdf-file))
(is (true? (common/is-pdf? file)))))
(is (true? (common/is-pdf? file)))

(testing "Accepts both file paths and File instances as an input"
(let [merging-outcome (merge-pdfs :output file
:input [(File. "test/pdfs/clojure-1.pdf") "test/pdfs/clojure-2.pdf"])
merged-pdf-file (.exists (io/as-file file))]
(is (true? merged-pdf-file))
(is (true? (common/is-pdf? file)))))))

;; clean up
(defn clean-up [file]
Expand Down