clj-htmltopdf/src/clj_htmltopdf/core.clj

130 lines
4.6 KiB
Clojure

(ns clj-htmltopdf.core
(:require
[clojure.java.io :as io]
[hiccup.page :as h]
[clj-htmltopdf.objects :as obj]
[clj-htmltopdf.options :as opt]
[clj-htmltopdf.watermark :as w]
[clj-htmltopdf.utils :as u])
(:import
[java.io InputStream OutputStream PipedInputStream PipedOutputStream]
[java.net URLConnection]
[java.util Base64]
[com.openhtmltopdf.pdfboxout PdfRendererBuilder]
[com.openhtmltopdf.svgsupport BatikSVGDrawer]
[com.openhtmltopdf.util XRLog]
[org.apache.commons.io IOUtils]
[org.jsoup Jsoup]
[org.jsoup.helper W3CDom]
[org.jsoup.nodes Document]
[org.jsoup.parser Parser ParseSettings]))
(defn embed-image
"Reads an image (provided as a filename, InputStream or byte array) and encodes it as a base64 string suitable for
use in a data url for displaying inline images in <img> tags or for use in CSS."
[image]
(try
(let [is (if-not (instance? InputStream image)
(io/input-stream image)
image)
mime-type (URLConnection/guessContentTypeFromStream is)
image-bytes (if (u/byte-array? image) image (IOUtils/toByteArray ^InputStream is))]
(let [b64-str (.encodeToString (Base64/getEncoder) image-bytes)]
(if (nil? mime-type)
(str "data:" b64-str)
(str "data:" mime-type ";base64," b64-str))))
(catch Exception ex
(throw (ex-info "Exception converting image to inline base64 string: " {:image image} ex)))))
(defn- read-html-string
^String [in]
(cond
(string? in) in
(sequential? in) (h/html5 {} in)
:else (with-open [r (io/reader in)]
(slurp r))))
(defn- ->output-stream
^OutputStream [out]
(if (instance? OutputStream out)
out
(io/output-stream out)))
(defn- configure-logging!
[options]
(if (:logging? options)
(do
(if-let [logger (:logger options)]
(XRLog/setLoggerImpl logger))
(XRLog/setLoggingEnabled true))
(XRLog/setLoggingEnabled false)))
(defn- ->jsoup-parser
^Parser []
(doto
(Parser/htmlParser)
(.settings ParseSettings/preserveCase)))
(defn- jsoup->w3c
^org.w3c.dom.Document [^Document jsoup-doc]
(let [converter (W3CDom.)]
(.fromJsoup converter jsoup-doc)))
(defn- prepare-html
[in options]
(let [base-uri (opt/->base-uri options)
html (read-html-string in)
parser (->jsoup-parser)
html-doc (Jsoup/parse html base-uri parser)]
(opt/inject-options-into-html! html-doc options)
(if (get-in options [:debug :display-html?])
(println (str html-doc)))
html-doc))
(defn- write-pdf!
[^Document html-doc options]
(let [builder (PdfRendererBuilder.)
base-uri (opt/->base-uri options)
w3c-doc (jsoup->w3c html-doc)]
(opt/set-uri-resolver! builder options)
(obj/set-object-drawer-factory! builder options)
(.useSVGDrawer builder (BatikSVGDrawer.))
(.withW3cDocument builder w3c-doc base-uri)
(let [piped-in (PipedInputStream.)
piped-out (PipedOutputStream. piped-in)
renderer (future
(try
(with-open [os piped-out]
(.toStream builder os)
(.run builder))
(catch Exception ex
(throw (Exception. "Exception while rendering PDF" ex)))))]
{:pdf piped-in
:renderer renderer})))
(defn ->pdf
"Renders HTML to a PDF document.
in - HTML to be rendered, provided as: File or InputStream object for an html file, string containing HTML, or
Hiccup-style HTML
out - where to save the PDF to: PDF filename, or an OutputStream
options - optional map of options to control PDF document styling and other properties
Returns the OutputStream that the PDF was rendered to."
[in out & [options]]
(let [options (opt/get-final-options options)
html-doc (prepare-html in options)]
(configure-logging! options)
(let [{:keys [pdf renderer]} (write-pdf! html-doc options)
out (->output-stream out)
result (if (:watermark options)
(w/write-watermark! pdf out options)
(with-open [os out]
(io/copy pdf os)
os))]
; this is a little weird, but because of the whole piped stream thing in write-pdf!, we need to render the
; PDF in a future. if something throws an exception during rendering, it would otherwise get eaten silently by
; the future... except if we deref the future! thus the explicit call to deref it here
(deref renderer)
result)))