View Javadoc

1   /**
2    * Copyright 2012 Hippo.
3    * 
4    * This file is part of HST PDF Renderer.
5    * 
6    * HST PDF Renderer is free software: you can redistribute it and/or modify it 
7    * under the terms of the GNU General Public License as published by the Free 
8    * Software Foundation, either version 3 of the License, or (at your option) 
9    * any later version.
10   * 
11   * HST PDF Renderer is distributed in the hope that it will be useful, but 
12   * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
13   * or FITNESS FOR A PARTICULAR PURPOSE.
14   * See the GNU General Public License for more details.
15   * 
16   * You should have received a copy of the GNU General Public License along with
17   * HST PDF Renderer. If not, see http://www.gnu.org/licenses/.
18   */
19  package org.onehippo.forge.hst.pdf.renderer;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.InputStreamReader;
26  import java.io.OutputStream;
27  import java.io.OutputStreamWriter;
28  import java.io.Reader;
29  import java.io.StringWriter;
30  import java.net.URI;
31  import java.util.Properties;
32  
33  import javax.xml.parsers.DocumentBuilder;
34  import javax.xml.parsers.DocumentBuilderFactory;
35  import javax.xml.parsers.ParserConfigurationException;
36  
37  import org.apache.commons.io.IOUtils;
38  import org.apache.commons.lang.StringUtils;
39  import org.slf4j.Logger;
40  import org.slf4j.LoggerFactory;
41  import org.w3c.dom.Document;
42  import org.w3c.dom.Element;
43  import org.w3c.dom.Node;
44  import org.w3c.dom.NodeList;
45  import org.w3c.tidy.Tidy;
46  import org.xhtmlrenderer.extend.UserAgentCallback;
47  import org.xhtmlrenderer.pdf.ITextFontResolver;
48  import org.xhtmlrenderer.pdf.ITextRenderer;
49  import org.xml.sax.InputSource;
50  import org.xml.sax.SAXException;
51  
52  import com.lowagie.text.DocumentException;
53  import com.lowagie.text.pdf.BaseFont;
54  
55  /**
56   * HtmlPDFRenderer
57   * <P>
58   * This class is designed to be used as singleton object.
59   * So, {@link #renderHtmlToPDF(InputStream, String, boolean, OutputStream)} and {@link #renderHtmlToPDF(Reader, boolean, OutputStream)} should be thread-safe.
60   * </P>
61   */
62  public class HtmlPDFRenderer {
63  
64      private static Logger log = LoggerFactory.getLogger(HtmlPDFRenderer.class);
65  
66      private final Properties tidyProps;
67      private boolean removeExistingCssLinks = true;
68      private URI [] cssURIs;
69      private int bufferSize = 4096;
70      private UserAgentCallback userAgentCallback;
71      private String [] fontPaths;
72      private boolean useFullyQualifiedLinks = true;
73  
74      public HtmlPDFRenderer() {
75          this(new Properties());
76      }
77  
78      public HtmlPDFRenderer(final Properties tidyProps) {
79          this.tidyProps = tidyProps;
80      }
81  
82      public boolean isRemoveExistingCssLinks() {
83          return removeExistingCssLinks;
84      }
85  
86      public void setRemoveExistingCssLinks(boolean removeExistingCssLinks) {
87          this.removeExistingCssLinks = removeExistingCssLinks;
88      }
89  
90      public URI [] getCssURIs() {
91          return cssURIs;
92      }
93  
94      public void setCssURIs(URI [] cssURIs) {
95          this.cssURIs = cssURIs;
96      }
97  
98      public int getBufferSize() {
99          return bufferSize;
100     }
101 
102     public void setBufferSize(int bufferSize) {
103         this.bufferSize = bufferSize;
104     }
105 
106     public UserAgentCallback getUserAgentCallback() {
107         return userAgentCallback;
108     }
109 
110     public void setUserAgentCallback(UserAgentCallback userAgentCallback) {
111         this.userAgentCallback = userAgentCallback;
112     }
113 
114     public String [] getFontPaths() {
115         return fontPaths;
116     }
117 
118     public void setFontPaths(String [] fontPaths) {
119         this.fontPaths = fontPaths;
120     }
121 
122     public boolean isUseFullyQualifiedLinks() {
123         return useFullyQualifiedLinks;
124     }
125 
126     public void setUseFullyQualifiedLinks(boolean useFullyQualifiedLinks) {
127         this.useFullyQualifiedLinks = useFullyQualifiedLinks;
128     }
129 
130     public void renderHtmlToPDF(InputStream htmlInput, String inputHtmlEncoding, boolean convertToXHTML, OutputStream pdfOutput, String documentURL, String externalLinkBaseURL) throws IOException {
131         InputStreamReader htmlReader = new InputStreamReader(htmlInput, inputHtmlEncoding);
132         renderHtmlToPDF(htmlReader, convertToXHTML, pdfOutput, documentURL, externalLinkBaseURL);
133     }
134 
135     public void renderHtmlToPDF(Reader htmlInput, boolean convertToXHTML, OutputStream pdfOutput, String documentURL, String externalLinkBaseURL) throws IOException {
136         Reader xhtmlReader = null;
137 
138         try {
139             if (convertToXHTML) {
140                 xhtmlReader = convertHtmlReaderToXhtmlReader(htmlInput);
141             } else {
142                 xhtmlReader = htmlInput;
143             }
144 
145             ITextRenderer renderer = new ITextRenderer();
146 
147             if (fontPaths != null && fontPaths.length > 0) {
148                 ITextFontResolver fontResolver = renderer.getFontResolver();
149 
150                 for (String fontPath : fontPaths) {
151                     fontResolver.addFont(fontPath, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
152                 }
153             }
154 
155             DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
156             Document document = builder.parse(new InputSource(xhtmlReader));
157 
158             if (removeExistingCssLinks) {
159                 removeExistingCssLinks(document);
160             }
161 
162             if (cssURIs != null && cssURIs.length > 0) {
163                 appendCssLinkElementToXhtmlDocument(document, cssURIs);
164             }
165 
166             if (useFullyQualifiedLinks && !StringUtils.isEmpty(externalLinkBaseURL)) {
167                 replaceLinksByFullyQualifiedURLs(document, "a", externalLinkBaseURL, documentURL);
168                 replaceLinksByFullyQualifiedURLs(document, "A", externalLinkBaseURL, documentURL);
169             }
170 
171             if (userAgentCallback != null) {
172                 renderer.getSharedContext().setUserAgentCallback(userAgentCallback);
173             }
174 
175             renderer.setDocument(document, documentURL);
176             renderer.layout();
177             renderer.createPDF(pdfOutput);
178         } catch (ParserConfigurationException e) {
179             log.error("Parse configuration exception.", e);
180         } catch (SAXException e) {
181             log.error("XML parsing exception.", e);
182         } catch (DocumentException e) {
183             log.error("pdf generation exception.", e);
184         } finally {
185             if (xhtmlReader != htmlInput) {
186                 IOUtils.closeQuietly(xhtmlReader);
187             }
188         }
189     }
190 
191     private Reader convertHtmlReaderToXhtmlReader(Reader htmlReader) throws IOException {
192         Tidy tidy = new Tidy();
193         tidy.setConfigurationFromProps(tidyProps);
194 
195         if (log.isDebugEnabled()) {
196             StringWriter writer = new StringWriter();
197             tidy.getConfiguration().printConfigOptions(writer, true);
198             log.debug("Tidy configuration: \n{}", writer.toString());
199         }
200 
201         ByteArrayOutputStream tidyOut = null;
202         OutputStreamWriter osw = null;
203         byte [] bytes = null;
204 
205         try {
206             tidyOut = new ByteArrayOutputStream(bufferSize);
207             osw = new OutputStreamWriter(tidyOut, "UTF-8");
208             tidy.parse(htmlReader, osw);
209             osw.flush();
210             bytes = tidyOut.toByteArray();
211         } finally {
212             IOUtils.closeQuietly(osw);
213             IOUtils.closeQuietly(tidyOut);
214         }
215 
216         return new InputStreamReader(new ByteArrayInputStream(bytes), "UTF-8");
217     }
218 
219     private static Element getFirstChildElement(Element base, String nodeName) {
220         NodeList childNodeList = base.getChildNodes();
221 
222         if (childNodeList != null) {
223             int length = childNodeList.getLength();
224 
225             for (int i = 0; i < length; i++) {
226                 Node childNode = childNodeList.item(i);
227 
228                 if (childNode.getNodeType() == Node.ELEMENT_NODE) {
229                     if (nodeName == null) {
230                         return (Element) childNode;
231                     } else if (StringUtils.equalsIgnoreCase(childNode.getNodeName(), nodeName)) {
232                         return (Element) childNode;
233                     }
234                 }
235             }
236         }
237 
238         return null;
239     }
240 
241     private static void removeExistingCssLinks(Document document) {
242         Element headElem = getFirstChildElement(document.getDocumentElement(), "head");
243 
244         if (headElem == null) {
245             return;
246         }
247 
248         NodeList nodeList = headElem.getChildNodes();
249 
250         if (nodeList != null) {
251             int length = nodeList.getLength();
252 
253             for (int i = length - 1; i >= 0; i--) {
254                 Node childNode = nodeList.item(i);
255 
256                 if (childNode.getNodeType() == Node.ELEMENT_NODE) {
257                     Element childElem = (Element) childNode;
258 
259                     if (StringUtils.equalsIgnoreCase("link", childElem.getNodeName())) {
260                         if (StringUtils.equalsIgnoreCase("text/css", childElem.getAttribute("type"))) {
261                             headElem.removeChild(childElem);
262                         }
263                     }
264                 }
265             }
266         }
267     }
268 
269     private static void appendCssLinkElementToXhtmlDocument(Document document, URI [] cssURIs) {
270         Element headElem = getFirstChildElement(document.getDocumentElement(), "head");
271 
272         if (headElem == null) {
273             return;
274         }
275 
276         for (URI cssURI : cssURIs) {
277             Element linkElem = document.createElement("link");
278             linkElem.setAttribute("type", "text/css");
279             linkElem.setAttribute("rel", "stylesheet");
280             linkElem.setAttribute("href", cssURI.toString());
281             linkElem.setAttribute("media", "print");
282             headElem.appendChild(linkElem);
283         }
284     }
285 
286     private static void replaceLinksByFullyQualifiedURLs(Document document, String linkTagName, String externalLinkBaseURL, String documentURL) {
287         URI documentURI = null;
288         NodeList linkList = document.getElementsByTagName(linkTagName);
289 
290         if (linkList != null) {
291             int length = linkList.getLength();
292 
293             for (int i = 0; i < length; i++) {
294                 Node linkNode = linkList.item(i);
295 
296                 if (linkNode.getNodeType() != Node.ELEMENT_NODE) {
297                     continue;
298                 }
299 
300                 Element linkElem = (Element) linkNode;
301                 String href = StringUtils.trim(linkElem.getAttribute("href"));
302 
303                 if (StringUtils.isEmpty(href)) {
304                     href = StringUtils.trim(linkElem.getAttribute("HREF"));
305                 }
306 
307                 if (StringUtils.isEmpty(href)) {
308                     continue;
309                 }
310 
311                 if (StringUtils.startsWith(href, "http:") || StringUtils.startsWith(href, "https:")) {
312                     continue;
313                 }
314 
315                 if (StringUtils.startsWith(href, "/")) {
316                     linkElem.setAttribute("href", externalLinkBaseURL + href);
317                 } else {
318                     if (documentURI == null) {
319                         documentURI = URI.create(documentURL);
320                     }
321 
322                     String documentURIPath = documentURI.getPath();
323                     String basePath = externalLinkBaseURL + StringUtils.substringBeforeLast(documentURIPath, "/");
324                     linkElem.setAttribute("href", basePath + "/" + href);
325                 }
326             }
327         }
328     }
329 }