View Javadoc
1   /*
2    * Copyright 2016-2024 Bloomreach B.V. (https://www.bloomreach.com)
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *         http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.onehippo.forge.content.exim.core.util;
17  
18  import java.io.ByteArrayInputStream;
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.util.ArrayList;
22  import java.util.List;
23  
24  import javax.jcr.Binary;
25  import javax.jcr.Node;
26  import javax.jcr.NodeIterator;
27  import javax.jcr.RepositoryException;
28  import javax.jcr.Session;
29  
30  import org.apache.commons.io.IOUtils;
31  import org.apache.commons.lang3.StringUtils;
32  import org.apache.tika.exception.TikaException;
33  import org.hippoecm.repository.api.HippoNodeType;
34  import org.hippoecm.repository.api.WorkflowException;
35  
36  /**
37   * Internal utility for Hippo binary related nodes.
38   */
39  public class HippoBinaryNodeUtils {
40  
41      private HippoBinaryNodeUtils() {
42      }
43  
44      /**
45       * Checks if all the binary folders exist in the given {@code absPath} and creates binary folders if not existing.
46       * @param session JCR session
47       * @param absPath absolute binary folder node path
48       * @param primaryTypeName primary folder node type name
49       * @param folderTypes folderTypes
50       * @param galleryTypes galleryTypes
51       * @return the final folder node if successful
52       * @throws RepositoryException if any repository exception occurs
53       * @throws WorkflowException if any workflow exception occurs
54       */
55      public static Node createMissingHippoBinaryFolders(final Session session, String absPath, String primaryTypeName,
56              String [] folderTypes, String [] galleryTypes) throws RepositoryException, WorkflowException {
57          String[] folderNames = StringUtils.split(ContentPathUtils.encodeNodePath(ContentPathUtils.removeIndexNotationInNodePath(absPath)), "/");
58  
59          Node rootNode = session.getRootNode();
60          Node curNode = rootNode;
61          Node tempFolderNode;
62          String folderNodePath;
63  
64          for (String folderName : folderNames) {
65  
66              if (curNode == rootNode) {
67                  folderNodePath = "/" + folderName;
68              } else {
69                  folderNodePath = curNode.getPath() + "/" + folderName;
70              }
71  
72              Node existingFolderNode = HippoNodeUtils.getExistingHippoFolderNode(session, folderNodePath);
73  
74              if (existingFolderNode == null) {
75                  tempFolderNode = curNode.addNode(folderName, primaryTypeName);
76                  tempFolderNode.addMixin("mix:referenceable");
77  
78                  if (folderTypes != null) {
79                      tempFolderNode.setProperty("hippostd:foldertype", folderTypes);
80                  }
81  
82                  if (galleryTypes != null) {
83                      tempFolderNode.setProperty("hippostd:gallerytype", galleryTypes);
84                  }
85  
86                  curNode = tempFolderNode;
87              } else {
88                  curNode = existingFolderNode;
89              }
90  
91              curNode = HippoNodeUtils.getHippoCanonicalNode(curNode);
92  
93              if (HippoNodeUtils.isHippoMirrorNode(curNode)) {
94                  curNode = HippoNodeUtils.getRereferencedNodeByHippoMirror(curNode);
95              }
96          }
97  
98          return curNode;
99      }
100 
101     /**
102      * Finds binary resource node ({@code hippo:resource}) under the {@code handlePath}, extracts text content
103      * and saves {@code hippo:text} property if the binary data is {@code application/pdf} content.
104      * @param session JCR session
105      * @param handlePath binary handle node path
106      * @throws RepositoryException if repository exception occurs
107      * @throws IOException if IO exception occurs
108      * @throws TikaException if TIKA exception occurs
109      */
110     public static void extractTextFromBinariesAndSaveHippoTextsUnderHandlePath(final Session session, final String handlePath)
111             throws RepositoryException, IOException, TikaException {
112         if (StringUtils.isBlank(handlePath)) {
113             return;
114         }
115 
116         if (!session.nodeExists(handlePath)) {
117             return;
118         }
119 
120         extractTextFromBinariesAndSaveHippoTexts(session, session.getNode(handlePath));
121     }
122 
123     /**
124      * Finds binary resource node ({@code hippo:resource}) under the {@code handle}, extracts text content
125      * and saves {@code hippo:text} property if the binary data is {@code application/pdf} content.
126      * @param session JCR session
127      * @param handle binary handle node
128      * @throws RepositoryException if repository exception occurs
129      * @throws IOException if IO exception occurs
130      * @throws TikaException if TIKA exception occurs
131      */
132     public static void extractTextFromBinariesAndSaveHippoTexts(final Session session, final Node handle)
133             throws RepositoryException, IOException, TikaException {
134         List<Node> resourceNodes = new ArrayList<>();
135 
136         if (handle.isNodeType(HippoNodeType.NT_RESOURCE)) {
137             resourceNodes.add(handle);
138         } else if (handle.getParent().isNodeType(HippoNodeType.NT_HANDLE)) {
139             Node resourceNode;
140             for (NodeIterator nodeIt = handle.getNodes(); nodeIt.hasNext(); ) {
141                 resourceNode = nodeIt.nextNode();
142                 if (resourceNode != null && resourceNode.isNodeType(HippoNodeType.NT_RESOURCE)) {
143                     resourceNodes.add(resourceNode);
144                 }
145             }
146         } else if (handle.isNodeType(HippoNodeType.NT_HANDLE)) {
147             Node assetSetNode;
148             Node resourceNode;
149             for (NodeIterator nodeIt1 = handle.getNodes(handle.getName()); nodeIt1.hasNext(); ) {
150                 assetSetNode = nodeIt1.nextNode();
151                 if (assetSetNode != null) {
152                     for (NodeIterator nodeIt2 = assetSetNode.getNodes(); nodeIt2.hasNext(); ) {
153                         resourceNode = nodeIt2.nextNode();
154                         if (resourceNode != null && resourceNode.isNodeType(HippoNodeType.NT_RESOURCE)) {
155                             resourceNodes.add(resourceNode);
156                         }
157                     }
158                 }
159             }
160         }
161 
162         String mimeType = null;
163         InputStream dataInput = null;
164         String textContent = null;
165         InputStream textInput = null;
166         Binary textBinary = null;
167 
168         for (Node resourceNode : resourceNodes) {
169             mimeType = (resourceNode.hasProperty("jcr:mimeType")) ? resourceNode.getProperty("jcr:mimeType").getString()
170                     : null;
171 
172             if (!StringUtils.equals("application/pdf", mimeType)) {
173                 continue;
174             }
175 
176             try {
177                 dataInput = resourceNode.getProperty("jcr:data").getBinary().getStream();
178                 textContent = TikaUtils.parsePdfToString(dataInput);
179                 textInput = new ByteArrayInputStream(textContent.getBytes());
180                 textBinary = session.getValueFactory().createBinary(textInput);
181                 resourceNode.setProperty(HippoNodeType.HIPPO_TEXT, textBinary);
182             } finally {
183                 IOUtils.closeQuietly(textInput);
184             }
185         }
186     }
187 
188 }