diff --git a/project.clj b/project.clj index eb8d744..7aa35ed 100644 --- a/project.clj +++ b/project.clj @@ -5,10 +5,10 @@ :url "https://www.gnu.org/licenses/lgpl.html"} :dependencies [[com.openhtmltopdf/openhtmltopdf-core "0.0.1-RC13"] - [com.openhtmltopdf/openhtmltopdf-jsoup-dom-converter "0.0.1-RC13"] [com.openhtmltopdf/openhtmltopdf-pdfbox "0.0.1-RC13"] [com.openhtmltopdf/openhtmltopdf-rtl-support "0.0.1-RC13"] [com.openhtmltopdf/openhtmltopdf-svg-support "0.0.1-RC13"] + [org.jsoup/jsoup "1.11.3"] [commons-io/commons-io "2.5"] [hiccup "1.0.5"]] diff --git a/src/clj_htmltopdf/core.clj b/src/clj_htmltopdf/core.clj index 56c33cb..6683bc1 100644 --- a/src/clj_htmltopdf/core.clj +++ b/src/clj_htmltopdf/core.clj @@ -10,13 +10,14 @@ [java.io InputStream OutputStream PipedInputStream PipedOutputStream] [java.net URLConnection] [java.util Base64] - [com.openhtmltopdf DOMBuilder] [com.openhtmltopdf.pdfboxout PdfRendererBuilder] [com.openhtmltopdf.svgsupport BatikSVGDrawer] [com.openhtmltopdf.util XRLog] [org.apache.commons.io IOUtils] [org.jsoup Jsoup] - [org.jsoup.nodes Document])) + [org.jsoup.helper W3CDom] + [org.jsoup.nodes Document] + [org.jsoup.parser HtmlTreeBuilder Parser ParseSettings])) (defn embed-image "Reads an image (provided as a filename, InputStream or byte array) and encodes it as a base64 string suitable for @@ -58,10 +59,23 @@ (XRLog/setLoggingEnabled true)) (XRLog/setLoggingEnabled false))) +(defn- ->jsoup-parser + ^Parser [] + (doto + (Parser/htmlParser) + (.settings ParseSettings/preserveCase))) + +(defn- jsoup->w3c + ^org.w3c.dom.Document [^Document jsoup-doc] + (let [converter (W3CDom.)] + (.fromJsoup converter jsoup-doc))) + (defn- prepare-html [in options] - (let [html (read-html-string in) - html-doc (Jsoup/parse html)] + (let [base-uri (opt/->base-uri options) + html (read-html-string in) + parser (->jsoup-parser) + html-doc (Jsoup/parse html base-uri parser)] (opt/inject-options-into-html! html-doc options) (if (get-in options [:debug :display-html?]) (println (str html-doc))) @@ -70,10 +84,11 @@ (defn- write-pdf! [^Document html-doc options] (let [builder (PdfRendererBuilder.) - base-uri (opt/->base-uri options)] + base-uri (opt/->base-uri options) + w3c-doc (jsoup->w3c html-doc)] (obj/set-object-drawer-factory builder options) (.useSVGDrawer builder (BatikSVGDrawer.)) - (.withW3cDocument builder (DOMBuilder/jsoup2DOM html-doc) base-uri) + (.withW3cDocument builder w3c-doc base-uri) (let [piped-in (PipedInputStream.) piped-out (PipedOutputStream. piped-in) renderer (future diff --git a/src/clj_htmltopdf/options.clj b/src/clj_htmltopdf/options.clj index 4a7e079..970820c 100644 --- a/src/clj_htmltopdf/options.clj +++ b/src/clj_htmltopdf/options.clj @@ -46,7 +46,7 @@ final-options)) (defn ->base-uri - [options] + ^String [options] (str (:base-uri options))) (defn ->page-size-css