1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.onehippo.forge.hst.pdf.renderer;
20
21 import java.io.ByteArrayInputStream;
22 import java.io.ByteArrayOutputStream;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.OutputStream;
27 import java.io.OutputStreamWriter;
28 import java.io.Reader;
29 import java.io.StringWriter;
30 import java.net.URI;
31 import java.util.Properties;
32
33 import javax.xml.parsers.DocumentBuilder;
34 import javax.xml.parsers.DocumentBuilderFactory;
35 import javax.xml.parsers.ParserConfigurationException;
36
37 import org.apache.commons.io.IOUtils;
38 import org.apache.commons.lang.StringUtils;
39 import org.slf4j.Logger;
40 import org.slf4j.LoggerFactory;
41 import org.w3c.dom.Document;
42 import org.w3c.dom.Element;
43 import org.w3c.dom.Node;
44 import org.w3c.dom.NodeList;
45 import org.w3c.tidy.Tidy;
46 import org.xhtmlrenderer.extend.UserAgentCallback;
47 import org.xhtmlrenderer.pdf.ITextFontResolver;
48 import org.xhtmlrenderer.pdf.ITextRenderer;
49 import org.xml.sax.InputSource;
50 import org.xml.sax.SAXException;
51
52 import com.lowagie.text.DocumentException;
53 import com.lowagie.text.pdf.BaseFont;
54
55
56
57
58
59
60
61
62 public class HtmlPDFRenderer {
63
64 private static Logger log = LoggerFactory.getLogger(HtmlPDFRenderer.class);
65
66 private final Properties tidyProps;
67 private boolean removeExistingCssLinks = true;
68 private URI [] cssURIs;
69 private int bufferSize = 4096;
70 private UserAgentCallback userAgentCallback;
71 private String [] fontPaths;
72 private boolean useFullyQualifiedLinks = true;
73
74 public HtmlPDFRenderer() {
75 this(new Properties());
76 }
77
78 public HtmlPDFRenderer(final Properties tidyProps) {
79 this.tidyProps = tidyProps;
80 }
81
82 public boolean isRemoveExistingCssLinks() {
83 return removeExistingCssLinks;
84 }
85
86 public void setRemoveExistingCssLinks(boolean removeExistingCssLinks) {
87 this.removeExistingCssLinks = removeExistingCssLinks;
88 }
89
90 public URI [] getCssURIs() {
91 return cssURIs;
92 }
93
94 public void setCssURIs(URI [] cssURIs) {
95 this.cssURIs = cssURIs;
96 }
97
98 public int getBufferSize() {
99 return bufferSize;
100 }
101
102 public void setBufferSize(int bufferSize) {
103 this.bufferSize = bufferSize;
104 }
105
106 public UserAgentCallback getUserAgentCallback() {
107 return userAgentCallback;
108 }
109
110 public void setUserAgentCallback(UserAgentCallback userAgentCallback) {
111 this.userAgentCallback = userAgentCallback;
112 }
113
114 public String [] getFontPaths() {
115 return fontPaths;
116 }
117
118 public void setFontPaths(String [] fontPaths) {
119 this.fontPaths = fontPaths;
120 }
121
122 public boolean isUseFullyQualifiedLinks() {
123 return useFullyQualifiedLinks;
124 }
125
126 public void setUseFullyQualifiedLinks(boolean useFullyQualifiedLinks) {
127 this.useFullyQualifiedLinks = useFullyQualifiedLinks;
128 }
129
130 public void renderHtmlToPDF(InputStream htmlInput, String inputHtmlEncoding, boolean convertToXHTML, OutputStream pdfOutput, String documentURL, String externalLinkBaseURL) throws IOException {
131 InputStreamReader htmlReader = new InputStreamReader(htmlInput, inputHtmlEncoding);
132 renderHtmlToPDF(htmlReader, convertToXHTML, pdfOutput, documentURL, externalLinkBaseURL);
133 }
134
135 public void renderHtmlToPDF(Reader htmlInput, boolean convertToXHTML, OutputStream pdfOutput, String documentURL, String externalLinkBaseURL) throws IOException {
136 Reader xhtmlReader = null;
137
138 try {
139 if (convertToXHTML) {
140 xhtmlReader = convertHtmlReaderToXhtmlReader(htmlInput);
141 } else {
142 xhtmlReader = htmlInput;
143 }
144
145 ITextRenderer renderer = new ITextRenderer();
146
147 if (fontPaths != null && fontPaths.length > 0) {
148 ITextFontResolver fontResolver = renderer.getFontResolver();
149
150 for (String fontPath : fontPaths) {
151 fontResolver.addFont(fontPath, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
152 }
153 }
154
155 DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
156 Document document = builder.parse(new InputSource(xhtmlReader));
157
158 if (removeExistingCssLinks) {
159 removeExistingCssLinks(document);
160 }
161
162 if (cssURIs != null && cssURIs.length > 0) {
163 appendCssLinkElementToXhtmlDocument(document, cssURIs);
164 }
165
166 if (useFullyQualifiedLinks && !StringUtils.isEmpty(externalLinkBaseURL)) {
167 replaceLinksByFullyQualifiedURLs(document, "a", externalLinkBaseURL, documentURL);
168 replaceLinksByFullyQualifiedURLs(document, "A", externalLinkBaseURL, documentURL);
169 }
170
171 if (userAgentCallback != null) {
172 renderer.getSharedContext().setUserAgentCallback(userAgentCallback);
173 }
174
175 renderer.setDocument(document, documentURL);
176 renderer.layout();
177 renderer.createPDF(pdfOutput);
178 } catch (ParserConfigurationException e) {
179 log.error("Parse configuration exception.", e);
180 } catch (SAXException e) {
181 log.error("XML parsing exception.", e);
182 } catch (DocumentException e) {
183 log.error("pdf generation exception.", e);
184 } finally {
185 if (xhtmlReader != htmlInput) {
186 IOUtils.closeQuietly(xhtmlReader);
187 }
188 }
189 }
190
191 private Reader convertHtmlReaderToXhtmlReader(Reader htmlReader) throws IOException {
192 Tidy tidy = new Tidy();
193 tidy.setConfigurationFromProps(tidyProps);
194
195 if (log.isDebugEnabled()) {
196 StringWriter writer = new StringWriter();
197 tidy.getConfiguration().printConfigOptions(writer, true);
198 log.debug("Tidy configuration: \n{}", writer.toString());
199 }
200
201 ByteArrayOutputStream tidyOut = null;
202 OutputStreamWriter osw = null;
203 byte [] bytes = null;
204
205 try {
206 tidyOut = new ByteArrayOutputStream(bufferSize);
207 osw = new OutputStreamWriter(tidyOut, "UTF-8");
208 tidy.parse(htmlReader, osw);
209 osw.flush();
210 bytes = tidyOut.toByteArray();
211 } finally {
212 IOUtils.closeQuietly(osw);
213 IOUtils.closeQuietly(tidyOut);
214 }
215
216 return new InputStreamReader(new ByteArrayInputStream(bytes), "UTF-8");
217 }
218
219 private static Element getFirstChildElement(Element base, String nodeName) {
220 NodeList childNodeList = base.getChildNodes();
221
222 if (childNodeList != null) {
223 int length = childNodeList.getLength();
224
225 for (int i = 0; i < length; i++) {
226 Node childNode = childNodeList.item(i);
227
228 if (childNode.getNodeType() == Node.ELEMENT_NODE) {
229 if (nodeName == null) {
230 return (Element) childNode;
231 } else if (StringUtils.equalsIgnoreCase(childNode.getNodeName(), nodeName)) {
232 return (Element) childNode;
233 }
234 }
235 }
236 }
237
238 return null;
239 }
240
241 private static void removeExistingCssLinks(Document document) {
242 Element headElem = getFirstChildElement(document.getDocumentElement(), "head");
243
244 if (headElem == null) {
245 return;
246 }
247
248 NodeList nodeList = headElem.getChildNodes();
249
250 if (nodeList != null) {
251 int length = nodeList.getLength();
252
253 for (int i = length - 1; i >= 0; i--) {
254 Node childNode = nodeList.item(i);
255
256 if (childNode.getNodeType() == Node.ELEMENT_NODE) {
257 Element childElem = (Element) childNode;
258
259 if (StringUtils.equalsIgnoreCase("link", childElem.getNodeName())) {
260 if (StringUtils.equalsIgnoreCase("text/css", childElem.getAttribute("type"))) {
261 headElem.removeChild(childElem);
262 }
263 }
264 }
265 }
266 }
267 }
268
269 private static void appendCssLinkElementToXhtmlDocument(Document document, URI [] cssURIs) {
270 Element headElem = getFirstChildElement(document.getDocumentElement(), "head");
271
272 if (headElem == null) {
273 return;
274 }
275
276 for (URI cssURI : cssURIs) {
277 Element linkElem = document.createElement("link");
278 linkElem.setAttribute("type", "text/css");
279 linkElem.setAttribute("rel", "stylesheet");
280 linkElem.setAttribute("href", cssURI.toString());
281 linkElem.setAttribute("media", "print");
282 headElem.appendChild(linkElem);
283 }
284 }
285
286 private static void replaceLinksByFullyQualifiedURLs(Document document, String linkTagName, String externalLinkBaseURL, String documentURL) {
287 URI documentURI = null;
288 NodeList linkList = document.getElementsByTagName(linkTagName);
289
290 if (linkList != null) {
291 int length = linkList.getLength();
292
293 for (int i = 0; i < length; i++) {
294 Node linkNode = linkList.item(i);
295
296 if (linkNode.getNodeType() != Node.ELEMENT_NODE) {
297 continue;
298 }
299
300 Element linkElem = (Element) linkNode;
301 String href = StringUtils.trim(linkElem.getAttribute("href"));
302
303 if (StringUtils.isEmpty(href)) {
304 href = StringUtils.trim(linkElem.getAttribute("HREF"));
305 }
306
307 if (StringUtils.isEmpty(href)) {
308 continue;
309 }
310
311 if (StringUtils.startsWith(href, "http:") || StringUtils.startsWith(href, "https:")) {
312 continue;
313 }
314
315 if (StringUtils.startsWith(href, "/")) {
316 linkElem.setAttribute("href", externalLinkBaseURL + href);
317 } else {
318 if (documentURI == null) {
319 documentURI = URI.create(documentURL);
320 }
321
322 String documentURIPath = documentURI.getPath();
323 String basePath = externalLinkBaseURL + StringUtils.substringBeforeLast(documentURIPath, "/");
324 linkElem.setAttribute("href", basePath + "/" + href);
325 }
326 }
327 }
328 }
329 }