clj-htmltopdf/src/clj_htmltopdf/core.clj

115 lines
4.2 KiB
Clojure
Raw Normal View History

2017-04-01 14:58:23 -04:00
(ns clj-htmltopdf.core
(:require
[clojure.java.io :as io]
[hiccup.page :as h]
2017-08-17 19:46:21 -04:00
[clj-htmltopdf.objects :as obj]
[clj-htmltopdf.options :as opt]
[clj-htmltopdf.watermark :as w]
[clj-htmltopdf.utils :as u])
2017-04-01 14:58:23 -04:00
(:import
2017-05-04 21:02:31 -04:00
[java.io InputStream OutputStream PipedInputStream PipedOutputStream]
[java.net URLConnection]
[java.util Base64]
2017-04-01 14:58:23 -04:00
[com.openhtmltopdf DOMBuilder]
[com.openhtmltopdf.pdfboxout PdfRendererBuilder]
2017-08-18 09:32:23 -04:00
[com.openhtmltopdf.svgsupport BatikSVGDrawer]
2017-04-01 14:58:23 -04:00
[com.openhtmltopdf.util XRLog]
[org.apache.commons.io IOUtils]
[org.jsoup Jsoup]
2017-04-02 12:52:01 -04:00
[org.jsoup.nodes Document]))
2017-04-01 14:58:23 -04:00
(defn embed-image
"Reads an image (provided as a filename, InputStream or byte array) and encodes it as a base64 string suitable for
use in a data url for displaying inline images in <img> tags or for use in CSS."
[image]
(try
(let [is (if-not (instance? InputStream image)
(io/input-stream image)
image)
mime-type (URLConnection/guessContentTypeFromStream is)
image-bytes (if (u/byte-array? image) image (IOUtils/toByteArray ^InputStream is))]
(let [b64-str (.encodeToString (Base64/getEncoder) image-bytes)]
(if (nil? mime-type)
(str "data:" b64-str)
(str "data:" mime-type ";base64," b64-str))))
(catch Exception ex
(throw (ex-info "Exception converting image to inline base64 string: " {:image image} ex)))))
2017-04-02 12:52:01 -04:00
(defn- read-html-string
^String [in]
2017-04-02 12:24:58 -04:00
(cond
(string? in) in
(sequential? in) (h/html5 {} in)
:else (with-open [r (io/reader in)]
(slurp r))))
2017-04-01 14:58:23 -04:00
(defn- ->output-stream
2017-04-02 12:52:01 -04:00
^OutputStream [out]
2017-04-02 12:24:58 -04:00
(if (instance? OutputStream out)
out
(io/output-stream out)))
2017-04-01 14:58:23 -04:00
(defn- configure-logging!
2017-04-02 12:52:01 -04:00
[options]
(if (:logging? options)
(do
(if-let [logger (:logger options)]
(XRLog/setLoggerImpl logger))
(XRLog/setLoggingEnabled true))
(XRLog/setLoggingEnabled false)))
(defn- prepare-html
2017-04-02 12:52:01 -04:00
[in options]
(let [html (read-html-string in)
html-doc (Jsoup/parse html)]
(opt/inject-options-into-html! html-doc options)
2017-04-02 13:20:24 -04:00
(if (get-in options [:debug :display-html?])
(println (str html-doc)))
2017-04-02 12:52:01 -04:00
html-doc))
(defn- write-pdf!
[^Document html-doc options]
2017-08-17 19:46:21 -04:00
(let [builder (PdfRendererBuilder.)
base-uri (opt/->base-uri options)]
2017-08-17 19:46:21 -04:00
(obj/set-object-drawer-factory builder options)
2017-08-18 09:32:23 -04:00
(.useSVGDrawer builder (BatikSVGDrawer.))
2017-04-02 12:52:01 -04:00
(.withW3cDocument builder (DOMBuilder/jsoup2DOM html-doc) base-uri)
2017-05-04 21:02:31 -04:00
(let [piped-in (PipedInputStream.)
piped-out (PipedOutputStream. piped-in)
renderer (future
(try
(with-open [os piped-out]
(.toStream builder os)
(.run builder))
(catch Exception ex
(throw (Exception. "Exception while rendering PDF" ex)))))]
{:pdf piped-in
:renderer renderer})))
2017-05-04 21:02:31 -04:00
2017-04-01 14:58:23 -04:00
(defn ->pdf
2017-08-29 10:57:23 -04:00
"Renders HTML to a PDF document.
in - HTML to be rendered, provided as: File or InputStream object for an html file, string containing HTML, or
Hiccup-style HTML
out - where to save the PDF to: PDF filename, or an OutputStream
options - optional map of options to control PDF document styling and other properties
Returns the OutputStream that the PDF was rendered to."
2017-04-01 14:58:23 -04:00
[in out & [options]]
(let [options (opt/get-final-options options)
2017-04-02 12:52:01 -04:00
html-doc (prepare-html in options)]
(configure-logging! options)
(let [{:keys [pdf renderer]} (write-pdf! html-doc options)
out (->output-stream out)
result (if (:watermark options)
(w/write-watermark! pdf out options)
(with-open [os out]
(io/copy pdf os)
os))]
; this is a little weird, but because of the whole piped stream thing in write-pdf!, we need to render the
; PDF in a future. if something throws an exception during rendering, it would otherwise get eaten silently by
; the future... except if we deref the future! thus the explicit call to deref it here
(deref renderer)
result)))