View Javadoc
1   /*
2    * Copyright 2024 Bloomreach B.V. (https://www.bloomreach.com)
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *         http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.onehippo.forge.content.exim.repository.jaxrs;
17  
18  import java.io.BufferedOutputStream;
19  import java.io.File;
20  import java.io.FileOutputStream;
21  import java.io.PrintStream;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.Set;
25  
26  import javax.jcr.Node;
27  import javax.jcr.Property;
28  import javax.jcr.Session;
29  import javax.jcr.query.Query;
30  import jakarta.servlet.http.HttpServletRequest;
31  import jakarta.ws.rs.Consumes;
32  import jakarta.ws.rs.POST;
33  import jakarta.ws.rs.Path;
34  import jakarta.ws.rs.Produces;
35  import jakarta.ws.rs.core.Context;
36  import jakarta.ws.rs.core.MediaType;
37  import jakarta.ws.rs.core.Response;
38  import jakarta.ws.rs.core.SecurityContext;
39  
40  import org.apache.commons.collections4.CollectionUtils;
41  import org.apache.commons.io.FileUtils;
42  import org.apache.commons.io.IOUtils;
43  import org.apache.commons.lang3.ArrayUtils;
44  import org.apache.commons.lang3.StringUtils;
45  import org.apache.commons.vfs2.FileObject;
46  import org.apache.commons.vfs2.VFS;
47  import org.apache.cxf.jaxrs.ext.multipart.Attachment;
48  import org.apache.cxf.jaxrs.ext.multipart.Multipart;
49  import org.apache.cxf.jaxrs.ext.multipart.MultipartBody;
50  import org.hippoecm.repository.api.HippoNodeType;
51  import org.hippoecm.repository.util.JcrUtils;
52  import org.onehippo.forge.content.exim.core.ContentMigrationRecord;
53  import org.onehippo.forge.content.exim.core.DocumentManager;
54  import org.onehippo.forge.content.exim.core.impl.DefaultBinaryImportTask;
55  import org.onehippo.forge.content.exim.core.impl.WorkflowDocumentManagerImpl;
56  import org.onehippo.forge.content.exim.core.impl.WorkflowDocumentVariantImportTask;
57  import org.onehippo.forge.content.exim.core.util.AntPathMatcher;
58  import org.onehippo.forge.content.exim.core.util.ContentNodeUtils;
59  import org.onehippo.forge.content.exim.core.util.ContentPathUtils;
60  import org.onehippo.forge.content.exim.core.util.HippoBinaryNodeUtils;
61  import org.onehippo.forge.content.exim.core.util.HippoNodeUtils;
62  import org.onehippo.forge.content.exim.repository.jaxrs.param.ExecutionParams;
63  import org.onehippo.forge.content.exim.repository.jaxrs.param.Result;
64  import org.onehippo.forge.content.exim.repository.jaxrs.status.ProcessStatus;
65  import org.onehippo.forge.content.pojo.model.ContentNode;
66  import org.slf4j.Logger;
67  import org.slf4j.LoggerFactory;
68  
69  import com.fasterxml.jackson.core.JsonProcessingException;
70  
71  /**
72   * Content-EXIM Import JAX-RS Service.
73   */
74  @Path("/import")
75  public class ContentEximImportService extends AbstractContentEximService {
76  
77      private static Logger log = LoggerFactory.getLogger(ContentEximImportService.class);
78  
79      public ContentEximImportService() {
80          super();
81      }
82  
83      @Path("/")
84      @Consumes(MediaType.MULTIPART_FORM_DATA)
85      @Produces("multipart/mixed")
86      @POST
87      public Response importContentFromZip(@Context SecurityContext securityContext, @Context HttpServletRequest request,
88              @Multipart(value = "batchSize", required = false) String batchSizeParam,
89              @Multipart(value = "throttle", required = false) String throttleParam,
90              @Multipart(value = "publishOnImport", required = false) String publishOnImportParam,
91              @Multipart(value = "dataUrlSizeThreshold", required = false) String dataUrlSizeThresholdParam,
92              @Multipart(value = "docbasePropNames", required = false) String docbasePropNamesParam,
93              @Multipart(value = "documentTags", required = false) String documentTagsParam,
94              @Multipart(value = "binaryTags", required = false) String binaryTagsParam,
95              @Multipart(value = "paramsJson", required = false) String paramsJsonParam,
96              @Multipart(value = "params", required = false) Attachment paramsAttachment,
97              @Multipart(value = "package", required = true) Attachment packageAttachment)
98              throws JsonProcessingException {
99  
100         List<Attachment> attachments = new ArrayList<>();
101 
102         Logger procLogger = log;
103 
104         Result result = new Result();
105 
106         File tempLogFile = null;
107         PrintStream tempLogOut = null;
108         // The physical uploaded zip java.io.file.
109         File tempZipFile = null;
110         // The logical zip file folder in commons-VFS FileObject. This is the reading source.
111         FileObject baseFolder = null;
112         Session session = null;
113         ExecutionParams params = new ExecutionParams();
114         ProcessStatus processStatus = null;
115 
116         try {
117             tempLogFile = File.createTempFile(TEMP_PREFIX, ".log");
118             tempLogOut = new PrintStream(new BufferedOutputStream(new FileOutputStream(tempLogFile)));
119             procLogger = createTeeLogger(log, tempLogOut);
120 
121             if (getProcessMonitor() != null) {
122                 processStatus = getProcessMonitor().startProcess();
123                 fillProcessStatusByRequestInfo(processStatus, securityContext, request);
124                 processStatus.setLogFile(tempLogFile);
125             }
126 
127             tempZipFile = File.createTempFile(TEMP_PREFIX, ".zip");
128             procLogger.info("ContentEximService#importContentFromZip begins with {}", tempZipFile.getPath());
129 
130             if (packageAttachment == null) {
131                 result.addError("No zip attachment.");
132                 return Response.serverError().entity(toJsonString(result)).build();
133             }
134 
135             if (paramsAttachment != null) {
136                 final String json = attachmentToString(paramsAttachment, "UTF-8");
137                 if (StringUtils.isNotBlank(json)) {
138                     params = getObjectMapper().readValue(json, ExecutionParams.class);
139                 }
140             } else {
141                 if (StringUtils.isNotBlank(paramsJsonParam)) {
142                     params = getObjectMapper().readValue(paramsJsonParam, ExecutionParams.class);
143                 }
144             }
145             overrideExecutionParamsByParameters(params, batchSizeParam, throttleParam, publishOnImportParam,
146                     dataUrlSizeThresholdParam, docbasePropNamesParam, documentTagsParam, binaryTagsParam);
147 
148             if (processStatus != null) {
149                 processStatus.setExecutionParams(params);
150             }
151 
152             transferAttachmentToFile(packageAttachment, tempZipFile);
153 
154             baseFolder = VFS.getManager().resolveFile("zip:" + tempZipFile.toURI());
155 
156             session = createSession();
157 
158             DocumentManager documentManager = new WorkflowDocumentManagerImpl(session);
159 
160             final DefaultBinaryImportTask binaryImportTask = new DefaultBinaryImportTask(documentManager);
161             binaryImportTask.setLogger(procLogger);
162 
163             final WorkflowDocumentVariantImportTask documentImportTask = new WorkflowDocumentVariantImportTask(
164                     documentManager);
165             documentImportTask.setLogger(procLogger);
166 
167             FileObject[] jsonFiles = binaryImportTask.findFilesByNamePattern(baseFolder, "^.+\\.json$", 1, 20);
168 
169             int batchCount = 0;
170 
171             try {
172                 binaryImportTask.start();
173                 batchCount = importBinaries(procLogger, processStatus, jsonFiles, params, baseFolder, binaryImportTask,
174                         result, batchCount);
175             } finally {
176                 binaryImportTask.stop();
177             }
178 
179             try {
180                 documentImportTask.start();
181                 batchCount = importDocuments(procLogger, processStatus, jsonFiles, params, baseFolder,
182                         documentImportTask, result, batchCount);
183             } finally {
184                 documentImportTask.stop();
185             }
186 
187             batchCount = cleanMirrorDocbaseValues(procLogger, processStatus, session, params, result, batchCount);
188             batchCount = cleanAllDocbaseFieldValues(procLogger, processStatus, session, params, result, batchCount);
189 
190             if (processStatus != null) {
191                 processStatus.setProgress(1.0);
192             }
193 
194             procLogger.info("ContentEximService#importContentFromZip ends.");
195 
196             attachments.add(
197                     new Attachment("logs", MediaType.TEXT_PLAIN, FileUtils.readFileToString(tempLogFile, "UTF-8")));
198             attachments.add(new Attachment("summary", MediaType.APPLICATION_JSON, toJsonString(result)));
199 
200         } catch (Exception e) {
201             procLogger.error("Failed to import content.", e);
202             result.addError(e.toString());
203             return Response.serverError().entity(toJsonString(result)).build();
204         } finally {
205             procLogger.info("ContentEximService#importContentFromZip finally ends.");
206 
207             if (getProcessMonitor() != null) {
208                 try {
209                     getProcessMonitor().stopProcess(processStatus);
210                 } catch (Exception e) {
211                     procLogger.error("Failed to stop process.", e);
212                 }
213             }
214 
215             if (session != null) {
216                 try {
217                     session.logout();
218                 } catch (Exception e) {
219                     procLogger.error("Failed to logout JCR session.", e);
220                 }
221             }
222 
223             // NOTE: Close the open connection to the logical VFS FileObject folder wrapping the temp zip file.
224             //       Otherwise, the open file descriptor by the zip VFS FileObject doesn't seem to be released,
225             //       and so OS cannot remove the temp zip file.
226             if (baseFolder != null) {
227                 try {
228                     baseFolder.close();  // Contributed by Freenet (Dev: Mark Kaloukh)
229                 } catch (Exception e) {
230                     procLogger.error("Failed to remove VFS zip file folder lock.", e);
231                 }
232             }
233 
234             if (tempZipFile != null) {
235                 try {
236                     tempZipFile.delete();
237                 } catch (Exception e) {
238                     procLogger.error("Failed to delete temporary zip file.", e);
239                 }
240             }
241 
242             if (tempLogOut != null) {
243                 IOUtils.closeQuietly(tempLogOut);
244             }
245 
246             if (tempLogFile != null) {
247                 try {
248                     tempLogFile.delete();
249                 } catch (Exception e) {
250                     log.error("Failed to delete temporary log file.", e);
251                 }
252             }
253         }
254 
255         return Response.ok(new MultipartBody(attachments, true)).build();
256     }
257 
258     private int importBinaries(Logger procLogger, ProcessStatus processStatus, FileObject[] jsonFiles,
259             ExecutionParams params, FileObject baseFolder, DefaultBinaryImportTask importTask, Result result,
260             int batchCount) throws Exception {
261         final String baseFolderUrlPrefix = baseFolder.getURL().toString();
262         final AntPathMatcher pathMatcher = new AntPathMatcher();
263 
264         for (FileObject file : jsonFiles) {
265             if (isStopRequested(baseFolder)) {
266                 procLogger.info("Stop requested by file at {}/{}", baseFolder.getName().getPath(),
267                         STOP_REQUEST_FILE_REL_PATH);
268                 break;
269             }
270 
271             ContentNode contentNode = importTask.readContentNodeFromJsonFile(file);
272 
273             String primaryTypeName = contentNode.getPrimaryType();
274             String path = contentNode.getProperty("jcr:path").getValue();
275 
276             if (!isBinaryPathIncluded(pathMatcher, params, path)) {
277                 continue;
278             }
279 
280             if (!HippoNodeUtils.isBinaryPath(path)) {
281                 continue;
282             }
283 
284             ContentMigrationRecord record = null;
285 
286             try {
287                 ContentNodeUtils.prependUrlPrefixInJcrDataValues(contentNode, BINARY_ATTACHMENT_REL_PATH,
288                         baseFolderUrlPrefix);
289 
290                 record = importTask.beginRecord("", path);
291                 record.setAttribute("file", file.getName().getPath());
292                 record.setProcessed(true);
293 
294                 String[] folderPathAndName = ContentPathUtils.splitToFolderPathAndName(path);
295                 String folderPath = folderPathAndName[0];
296                 String name = folderPathAndName[1];
297 
298                 String folderPrimaryType;
299                 String[] folderTypes;
300                 String[] galleryTypes;
301 
302                 if (HippoNodeUtils.isGalleryPath(path)) {
303                     folderPrimaryType = params.getGalleryFolderPrimaryType();
304                     folderTypes = params.getGalleryFolderFolderTypes();
305                     galleryTypes = params.getGalleryFolderGalleryTypes();
306                 } else {
307                     folderPrimaryType = params.getAssetFolderPrimaryType();
308                     folderTypes = params.getAssetFolderFolderTypes();
309                     galleryTypes = params.getAssetFolderGalleryTypes();
310                 }
311 
312                 folderPath = importTask.createOrUpdateBinaryFolder(folderPath, folderPrimaryType, folderTypes,
313                         galleryTypes);
314 
315                 applyTagContentProperties(contentNode, params.getBinaryTags());
316 
317                 String updatedPath = importTask.createOrUpdateBinaryFromContentNode(contentNode, primaryTypeName,
318                         folderPath, name);
319 
320                 HippoBinaryNodeUtils.extractTextFromBinariesAndSaveHippoTextsUnderHandlePath(
321                         importTask.getDocumentManager().getSession(), updatedPath);
322 
323                 record.setSucceeded(true);
324             } catch (Exception e) {
325                 procLogger.error("Failed to process record: {}", record, e);
326                 if (record != null) {
327                     record.setErrorMessage(e.toString());
328                 }
329             } finally {
330                 if (record != null) {
331                     importTask.endRecord();
332                     result.addItem(recordToResultItem(record));
333                     result.incrementTotalBinaryCount();
334                     if (record.isSucceeded()) {
335                         result.incrementSucceededBinaryCount();
336                     } else {
337                         result.incrementFailedBinaryCount();
338                     }
339                     if (processStatus != null) {
340                         // the remaining 5% for cleaning paths to convert those to uuids.
341                         processStatus.setProgress(0.95 * ((double) batchCount) / ((double) jsonFiles.length));
342                     }
343                 }
344                 ++batchCount;
345                 if (batchCount % params.getBatchSize() == 0) {
346                     importTask.getDocumentManager().getSession().save();
347                     importTask.getDocumentManager().getSession().refresh(false);
348                     if (params.getThrottle() > 0) {
349                         Thread.sleep(params.getThrottle());
350                     }
351                 }
352             }
353         }
354 
355         importTask.getDocumentManager().getSession().save();
356         importTask.getDocumentManager().getSession().refresh(false);
357 
358         return batchCount;
359     }
360 
361     private int importDocuments(Logger procLogger, ProcessStatus processStatus, FileObject[] jsonFiles,
362             ExecutionParams params, FileObject baseFolder, WorkflowDocumentVariantImportTask importTask, Result result,
363             int batchCount) throws Exception {
364         final String baseFolderUrlPrefix = baseFolder.getURL().toString();
365         final AntPathMatcher pathMatcher = new AntPathMatcher();
366 
367         for (FileObject file : jsonFiles) {
368             if (isStopRequested(baseFolder)) {
369                 procLogger.info("Stop requested by file at {}/{}", baseFolder.getName().getPath(),
370                         STOP_REQUEST_FILE_REL_PATH);
371                 break;
372             }
373 
374             ContentNode contentNode = importTask.readContentNodeFromJsonFile(file);
375 
376             String primaryTypeName = contentNode.getPrimaryType();
377             String path = contentNode.getProperty("jcr:path").getValue();
378 
379             if (!isDocumentPathIncluded(pathMatcher, params, path)) {
380                 continue;
381             }
382 
383             if (!HippoNodeUtils.isDocumentPath(path)) {
384                 continue;
385             }
386 
387             ContentMigrationRecord record = null;
388 
389             try {
390                 ContentNodeUtils.prependUrlPrefixInJcrDataValues(contentNode, BINARY_ATTACHMENT_REL_PATH,
391                         baseFolderUrlPrefix);
392 
393                 record = importTask.beginRecord("", path);
394                 record.setAttribute("file", file.getName().getPath());
395                 record.setProcessed(true);
396 
397                 String locale = (contentNode.hasProperty("hippotranslation:locale"))
398                         ? contentNode.getProperty("hippotranslation:locale").getValue()
399                         : null;
400                 String localizedName = contentNode.getProperty("jcr:localizedName").getValue();
401 
402                 applyTagContentProperties(contentNode, params.getDocumentTags());
403 
404                 String updatedPath = importTask.createOrUpdateDocumentFromVariantContentNode(contentNode,
405                         primaryTypeName, path, locale, localizedName);
406 
407                 boolean isToPublish = ExecutionParams.PUBLISH_ON_IMPORT_ALL.equals(params.getPublishOnImport());
408 
409                 if (!isToPublish && ExecutionParams.PUBLISH_ON_IMPORT_LIVE.equals(params.getPublishOnImport())) {
410                     isToPublish = ContentNodeUtils.containsStringValueInProperty(contentNode,
411                             HippoNodeType.HIPPO_AVAILABILITY, "live");
412                 }
413 
414                 if (isToPublish) {
415                     importTask.getDocumentManager().depublishDocument(updatedPath);
416                     importTask.getDocumentManager().publishDocument(updatedPath);
417                 }
418 
419                 record.setSucceeded(true);
420             } catch (Exception e) {
421                 procLogger.error("Failed to process record: {}", record, e);
422                 if (record != null) {
423                     record.setErrorMessage(e.toString());
424                 }
425             } finally {
426                 if (record != null) {
427                     importTask.endRecord();
428                     result.addItem(recordToResultItem(record));
429                     result.incrementTotalDocumentCount();
430                     if (record.isSucceeded()) {
431                         result.incrementSucceededDocumentCount();
432                     } else {
433                         result.incrementFailedDocumentCount();
434                     }
435                     if (processStatus != null) {
436                         // the remaining 5% for cleaning paths to convert those to uuids.
437                         processStatus.setProgress(0.95 * ((double) batchCount) / ((double) jsonFiles.length));
438                     }
439                 }
440                 ++batchCount;
441                 if (batchCount % params.getBatchSize() == 0) {
442                     importTask.getDocumentManager().getSession().save();
443                     importTask.getDocumentManager().getSession().refresh(false);
444                     if (params.getThrottle() > 0) {
445                         Thread.sleep(params.getThrottle());
446                     }
447                 }
448             }
449         }
450 
451         importTask.getDocumentManager().getSession().save();
452         importTask.getDocumentManager().getSession().refresh(false);
453 
454         return batchCount;
455     }
456 
457     private int cleanMirrorDocbaseValues(Logger procLogger, ProcessStatus processStatus, Session session,
458             ExecutionParams params, Result result, int batchCount) throws Exception {
459         Set<String> mirrorNodePaths = getQueriedNodePaths(session,
460                 "//element(*)[jcr:like(@hippo:docbase,'/content/%')]", Query.XPATH);
461         session.refresh(false);
462 
463         for (String mirrorNodePath : mirrorNodePaths) {
464             try {
465                 if (!session.nodeExists(mirrorNodePath)) {
466                     continue;
467                 }
468 
469                 Node mirrorNode = session.getNode(mirrorNodePath);
470                 String docbasePath = mirrorNode.getProperty("hippo:docbase").getString();
471 
472                 if (StringUtils.startsWith(docbasePath, "/") && session.nodeExists(docbasePath)) {
473                     JcrUtils.ensureIsCheckedOut(mirrorNode);
474                     String docbase = session.getNode(docbasePath).getIdentifier();
475                     mirrorNode.setProperty("hippo:docbase", docbase);
476                 }
477             } catch (Exception e) {
478                 String message = "Failed to clean mirror docbase value at " + mirrorNodePath + ". " + e;
479                 result.addError(message);
480                 procLogger.error("Failed to clean mirror docbase value at {}.", mirrorNodePath, e);
481             } finally {
482                 ++batchCount;
483                 if (batchCount % params.getBatchSize() == 0) {
484                     session.save();
485                     session.refresh(false);
486                     if (params.getThrottle() > 0) {
487                         Thread.sleep(params.getThrottle());
488                     }
489                 }
490             }
491         }
492 
493         session.save();
494         session.refresh(false);
495 
496         return batchCount;
497     }
498 
499     private int cleanAllDocbaseFieldValues(Logger procLogger, ProcessStatus processStatus, Session session,
500             ExecutionParams params, Result result, int batchCount) throws Exception {
501         Set<String> docbasePropNames = params.getDocbasePropNames();
502 
503         if (CollectionUtils.isEmpty(docbasePropNames)) {
504             return batchCount;
505         }
506 
507         for (String docbasePropName : docbasePropNames) {
508             if (StringUtils.isNotBlank(docbasePropName)) {
509                 batchCount = cleanSingleDocbaseFieldValues(procLogger, processStatus, session, params,
510                         StringUtils.trim(docbasePropName), result, batchCount);
511             }
512         }
513 
514         return batchCount;
515     }
516 
517     private int cleanSingleDocbaseFieldValues(Logger procLogger, ProcessStatus processStatus, Session session,
518             ExecutionParams params, String docbasePropName, Result result, int batchCount) throws Exception {
519         Set<String> nodePaths = getQueriedNodePaths(session,
520                 "//element(*)[jcr:like(@" + docbasePropName + ",'/content/%')]", Query.XPATH);
521 
522         session.refresh(false);
523 
524         for (String nodePath : nodePaths) {
525             try {
526                 if (!session.nodeExists(nodePath)) {
527                     continue;
528                 }
529 
530                 Node node = session.getNode(nodePath);
531 
532                 if (!node.hasProperty(docbasePropName)) {
533                     continue;
534                 }
535 
536                 Property docbaseProp = node.getProperty(docbasePropName);
537 
538                 if (docbaseProp.isMultiple()) {
539                     String[] docbasePaths = JcrUtils.getMultipleStringProperty(node, docbasePropName, null);
540                     if (ArrayUtils.isNotEmpty(docbasePaths)) {
541                         boolean updated = false;
542                         for (int i = 0; i < docbasePaths.length; i++) {
543                             String docbasePath = docbasePaths[i];
544                             if (StringUtils.startsWith(docbasePath, "/") && session.nodeExists(docbasePath)) {
545                                 String docbase = session.getNode(docbasePath).getIdentifier();
546                                 docbasePaths[i] = docbase;
547                                 updated = true;
548                             }
549                         }
550                         if (updated) {
551                             JcrUtils.ensureIsCheckedOut(node);
552                             node.setProperty(docbasePropName, docbasePaths);
553                         }
554                     }
555                 } else {
556                     String docbasePath = JcrUtils.getStringProperty(node, docbasePropName, null);
557                     if (StringUtils.startsWith(docbasePath, "/") && session.nodeExists(docbasePath)) {
558                         JcrUtils.ensureIsCheckedOut(node);
559                         String docbase = session.getNode(docbasePath).getIdentifier();
560                         node.setProperty(docbasePropName, docbase);
561                     }
562                 }
563             } catch (Exception e) {
564                 String message = "Failed to clean mirror docbase value at " + nodePath + "/@" + docbasePropName + ". "
565                         + e;
566                 result.addError(message);
567                 procLogger.error("Failed to clean mirror docbase value at {}/@{}.", nodePath, docbasePropName, e);
568             } finally {
569                 ++batchCount;
570                 if (batchCount % params.getBatchSize() == 0) {
571                     session.save();
572                     session.refresh(false);
573                     if (params.getThrottle() > 0) {
574                         Thread.sleep(params.getThrottle());
575                     }
576                 }
577             }
578         }
579 
580         session.save();
581         session.refresh(false);
582 
583         return batchCount;
584     }
585 }