From 8ae9f656292b4a12bda6808ab48f2d2c3fa779fe Mon Sep 17 00:00:00 2001 From: gered Date: Fri, 18 May 2018 09:58:34 -0400 Subject: [PATCH] fixes to JSoup to W3C document conversion to allow for SVG usage Open HTML to PDF's built-in support for this document tree conversion breaks SVG support when using the Batik renderer. luckily, more recent versions of JSoup include their own W3C document tree converter, so we just use that one instead. and as a bonus, we're no longer using an ancient version of JSoup. --- project.clj | 2 +- src/clj_htmltopdf/core.clj | 27 +++++++++++++++++++++------ src/clj_htmltopdf/options.clj | 2 +- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/project.clj b/project.clj index eb8d744..7aa35ed 100644 --- a/project.clj +++ b/project.clj @@ -5,10 +5,10 @@ :url "https://www.gnu.org/licenses/lgpl.html"} :dependencies [[com.openhtmltopdf/openhtmltopdf-core "0.0.1-RC13"] - [com.openhtmltopdf/openhtmltopdf-jsoup-dom-converter "0.0.1-RC13"] [com.openhtmltopdf/openhtmltopdf-pdfbox "0.0.1-RC13"] [com.openhtmltopdf/openhtmltopdf-rtl-support "0.0.1-RC13"] [com.openhtmltopdf/openhtmltopdf-svg-support "0.0.1-RC13"] + [org.jsoup/jsoup "1.11.3"] [commons-io/commons-io "2.5"] [hiccup "1.0.5"]] diff --git a/src/clj_htmltopdf/core.clj b/src/clj_htmltopdf/core.clj index 56c33cb..6683bc1 100644 --- a/src/clj_htmltopdf/core.clj +++ b/src/clj_htmltopdf/core.clj @@ -10,13 +10,14 @@ [java.io InputStream OutputStream PipedInputStream PipedOutputStream] [java.net URLConnection] [java.util Base64] - [com.openhtmltopdf DOMBuilder] [com.openhtmltopdf.pdfboxout PdfRendererBuilder] [com.openhtmltopdf.svgsupport BatikSVGDrawer] [com.openhtmltopdf.util XRLog] [org.apache.commons.io IOUtils] [org.jsoup Jsoup] - [org.jsoup.nodes Document])) + [org.jsoup.helper W3CDom] + [org.jsoup.nodes Document] + [org.jsoup.parser HtmlTreeBuilder Parser ParseSettings])) (defn embed-image "Reads an image (provided as a filename, InputStream or byte array) and encodes it as a base64 string suitable for @@ -58,10 +59,23 @@ (XRLog/setLoggingEnabled true)) (XRLog/setLoggingEnabled false))) +(defn- ->jsoup-parser + ^Parser [] + (doto + (Parser/htmlParser) + (.settings ParseSettings/preserveCase))) + +(defn- jsoup->w3c + ^org.w3c.dom.Document [^Document jsoup-doc] + (let [converter (W3CDom.)] + (.fromJsoup converter jsoup-doc))) + (defn- prepare-html [in options] - (let [html (read-html-string in) - html-doc (Jsoup/parse html)] + (let [base-uri (opt/->base-uri options) + html (read-html-string in) + parser (->jsoup-parser) + html-doc (Jsoup/parse html base-uri parser)] (opt/inject-options-into-html! html-doc options) (if (get-in options [:debug :display-html?]) (println (str html-doc))) @@ -70,10 +84,11 @@ (defn- write-pdf! [^Document html-doc options] (let [builder (PdfRendererBuilder.) - base-uri (opt/->base-uri options)] + base-uri (opt/->base-uri options) + w3c-doc (jsoup->w3c html-doc)] (obj/set-object-drawer-factory builder options) (.useSVGDrawer builder (BatikSVGDrawer.)) - (.withW3cDocument builder (DOMBuilder/jsoup2DOM html-doc) base-uri) + (.withW3cDocument builder w3c-doc base-uri) (let [piped-in (PipedInputStream.) piped-out (PipedOutputStream. piped-in) renderer (future diff --git a/src/clj_htmltopdf/options.clj b/src/clj_htmltopdf/options.clj index 4a7e079..970820c 100644 --- a/src/clj_htmltopdf/options.clj +++ b/src/clj_htmltopdf/options.clj @@ -46,7 +46,7 @@ final-options)) (defn ->base-uri - [options] + ^String [options] (str (:base-uri options))) (defn ->page-size-css