fixes to JSoup to W3C document conversion to allow for SVG usage

Open HTML to PDF's built-in support for this document tree conversion
breaks SVG support when using the Batik renderer. luckily, more recent
versions of JSoup include their own W3C document tree converter, so we
just use that one instead. and as a bonus, we're no longer using an
ancient version of JSoup.
This commit is contained in:
Gered 2018-05-18 09:58:34 -04:00
parent 8a74ba431f
commit 8ae9f65629
3 changed files with 23 additions and 8 deletions

View file

@ -5,10 +5,10 @@
:url "https://www.gnu.org/licenses/lgpl.html"}
:dependencies [[com.openhtmltopdf/openhtmltopdf-core "0.0.1-RC13"]
[com.openhtmltopdf/openhtmltopdf-jsoup-dom-converter "0.0.1-RC13"]
[com.openhtmltopdf/openhtmltopdf-pdfbox "0.0.1-RC13"]
[com.openhtmltopdf/openhtmltopdf-rtl-support "0.0.1-RC13"]
[com.openhtmltopdf/openhtmltopdf-svg-support "0.0.1-RC13"]
[org.jsoup/jsoup "1.11.3"]
[commons-io/commons-io "2.5"]
[hiccup "1.0.5"]]

View file

@ -10,13 +10,14 @@
[java.io InputStream OutputStream PipedInputStream PipedOutputStream]
[java.net URLConnection]
[java.util Base64]
[com.openhtmltopdf DOMBuilder]
[com.openhtmltopdf.pdfboxout PdfRendererBuilder]
[com.openhtmltopdf.svgsupport BatikSVGDrawer]
[com.openhtmltopdf.util XRLog]
[org.apache.commons.io IOUtils]
[org.jsoup Jsoup]
[org.jsoup.nodes Document]))
[org.jsoup.helper W3CDom]
[org.jsoup.nodes Document]
[org.jsoup.parser HtmlTreeBuilder Parser ParseSettings]))
(defn embed-image
"Reads an image (provided as a filename, InputStream or byte array) and encodes it as a base64 string suitable for
@ -58,10 +59,23 @@
(XRLog/setLoggingEnabled true))
(XRLog/setLoggingEnabled false)))
(defn- ->jsoup-parser
^Parser []
(doto
(Parser/htmlParser)
(.settings ParseSettings/preserveCase)))
(defn- jsoup->w3c
^org.w3c.dom.Document [^Document jsoup-doc]
(let [converter (W3CDom.)]
(.fromJsoup converter jsoup-doc)))
(defn- prepare-html
[in options]
(let [html (read-html-string in)
html-doc (Jsoup/parse html)]
(let [base-uri (opt/->base-uri options)
html (read-html-string in)
parser (->jsoup-parser)
html-doc (Jsoup/parse html base-uri parser)]
(opt/inject-options-into-html! html-doc options)
(if (get-in options [:debug :display-html?])
(println (str html-doc)))
@ -70,10 +84,11 @@
(defn- write-pdf!
[^Document html-doc options]
(let [builder (PdfRendererBuilder.)
base-uri (opt/->base-uri options)]
base-uri (opt/->base-uri options)
w3c-doc (jsoup->w3c html-doc)]
(obj/set-object-drawer-factory builder options)
(.useSVGDrawer builder (BatikSVGDrawer.))
(.withW3cDocument builder (DOMBuilder/jsoup2DOM html-doc) base-uri)
(.withW3cDocument builder w3c-doc base-uri)
(let [piped-in (PipedInputStream.)
piped-out (PipedOutputStream. piped-in)
renderer (future

View file

@ -46,7 +46,7 @@
final-options))
(defn ->base-uri
[options]
^String [options]
(str (:base-uri options)))
(defn ->page-size-css