clj-htmltopdf/src/clj_htmltopdf/core.clj

110 lines
4 KiB
Clojure
Raw Normal View History

2017-04-01 14:58:23 -04:00
(ns clj-htmltopdf.core
(:require
[clojure.java.io :as io]
[hiccup.page :as h]
2017-08-17 19:46:21 -04:00
[clj-htmltopdf.objects :as obj]
[clj-htmltopdf.options :as opt]
[clj-htmltopdf.watermark :as w]
[clj-htmltopdf.utils :as u])
2017-04-01 14:58:23 -04:00
(:import
2017-05-04 21:02:31 -04:00
[java.io InputStream OutputStream PipedInputStream PipedOutputStream]
[java.net URLConnection]
[java.util Base64]
2017-04-01 14:58:23 -04:00
[com.openhtmltopdf DOMBuilder]
[com.openhtmltopdf.pdfboxout PdfRendererBuilder]
[com.openhtmltopdf.util XRLog]
[org.jsoup Jsoup]
2017-04-02 12:52:01 -04:00
[org.jsoup.nodes Document]))
2017-04-01 14:58:23 -04:00
(defn ->inline-image
"Reads an image file and encodes it as a base64 string suitable for use in a data url for displaying
inline images in <img> tags or for use in CSS."
[image-file]
(try
(let [image-file (io/file image-file)
is (io/input-stream image-file)
mime-type (URLConnection/guessContentTypeFromStream is)
image-bytes (byte-array (.length image-file))]
(with-open [is is]
(.reset is)
(.read is image-bytes))
(let [b64-str (.encodeToString (Base64/getEncoder) image-bytes)]
(if (nil? mime-type)
(str "data:" b64-str)
(str "data:" mime-type ";base64," b64-str))))
(catch Exception ex
(throw (Exception. (str "Exception converting image to inline base64 string: " image-file) ex)))))
2017-04-02 12:52:01 -04:00
(defn- read-html-string
^String [in]
2017-04-02 12:24:58 -04:00
(cond
(string? in) in
(sequential? in) (h/html5 {} in)
:else (with-open [r (io/reader in)]
(slurp r))))
2017-04-01 14:58:23 -04:00
(defn- ->output-stream
2017-04-02 12:52:01 -04:00
^OutputStream [out]
2017-04-02 12:24:58 -04:00
(if (instance? OutputStream out)
out
(io/output-stream out)))
2017-04-01 14:58:23 -04:00
(defn- configure-logging!
2017-04-02 12:52:01 -04:00
[options]
(if (:logging? options)
(do
(if-let [logger (:logger options)]
(XRLog/setLoggerImpl logger))
(XRLog/setLoggingEnabled true))
(XRLog/setLoggingEnabled false)))
(defn- prepare-html
2017-04-02 12:52:01 -04:00
[in options]
(let [html (read-html-string in)
html-doc (Jsoup/parse html)]
(opt/inject-options-into-html! html-doc options)
2017-04-02 13:20:24 -04:00
(if (get-in options [:debug :display-html?])
(println (str html-doc)))
2017-04-02 12:52:01 -04:00
html-doc))
(defn- write-pdf!
[^Document html-doc options]
2017-08-17 19:46:21 -04:00
(let [builder (PdfRendererBuilder.)
base-uri (opt/->base-uri options)]
2017-08-17 19:46:21 -04:00
(obj/set-object-drawer-factory builder options)
2017-04-02 12:52:01 -04:00
(.withW3cDocument builder (DOMBuilder/jsoup2DOM html-doc) base-uri)
2017-05-04 21:02:31 -04:00
(let [piped-in (PipedInputStream.)
piped-out (PipedOutputStream. piped-in)
renderer (future
(try
(with-open [os piped-out]
(.toStream builder os)
(.run builder))
(catch Exception ex
(throw (Exception. "Exception while rendering PDF" ex)))))]
{:pdf piped-in
:renderer renderer})))
2017-05-04 21:02:31 -04:00
2017-04-01 14:58:23 -04:00
(defn ->pdf
"Renders HTML to a PDF document. The HTML to be rendered is provided via the 'in' argument which can be provided as a
file, string, or Hiccup-style HTML. The PDF will be output to the 'out' argument which will be coerced to an
OutputStream (via clojure.java.io/output-stream). The resulting OutputBuffer is also returned when rendering has
finished."
2017-04-01 14:58:23 -04:00
[in out & [options]]
(let [options (opt/get-final-options options)
2017-04-02 12:52:01 -04:00
html-doc (prepare-html in options)]
(configure-logging! options)
(let [{:keys [pdf renderer]} (write-pdf! html-doc options)
out (->output-stream out)
result (if (:watermark options)
(w/write-watermark! pdf out options)
(with-open [os out]
(io/copy pdf os)
os))]
; this is a little weird, but because of the whole piped stream thing in write-pdf!, we need to render the
; PDF in a future. if something throws an exception during rendering, it would otherwise get eaten silently by
; the future... except if we deref the future! thus the explicit call to deref it here
(deref renderer)
result)))