Fork me on GitHub

Tutorials - Exporting Documents

The code snippets are excerpts (or slightly simplififed ones) from Example_Export_Published_Documents.

The example below exports data to JSON files by default. You may see the source linked above if you want to export data to XML files instead. The script in the link uses fileInJson parameter to choose either JSON or XML format.

Introduction

A document content consists of a handle node and variant nodes (up to max 3: live, preview and draft).

For example, a document content looks like the following (excerpt from /content/documents/contenteximdemo/news/2016/02/the-medusa-news in the demo project):

<?xml version="1.0" encoding="UTF-8"?>
<sv:node sv:name="the-medusa-news" xmlns:sv="http://www.jcp.org/jcr/sv/1.0">
  <sv:property sv:name="jcr:primaryType" sv:type="Name">
    <sv:value>hippo:handle</sv:value>
  </sv:property>
  <sv:property sv:multiple="true" sv:name="jcr:mixinTypes" sv:type="Name">
    <sv:value>mix:referenceable</sv:value>
    <sv:value>hippo:translated</sv:value>
  </sv:property>
  <sv:property sv:name="jcr:uuid" sv:type="String">
    <sv:value>c580ac64-3874-4717-a6d9-e5ad72080abe</sv:value>
  </sv:property>
  <sv:node sv:name="hippo:translation">
    <sv:property sv:name="jcr:primaryType" sv:type="Name">
      <sv:value>hippo:translation</sv:value>
    </sv:property>
    <sv:property sv:name="hippo:language" sv:type="String">
      <sv:value>en</sv:value>
    </sv:property>
    <sv:property sv:name="hippo:message" sv:type="String">
      <sv:value>The medusa news</sv:value>
    </sv:property>
  </sv:node>
  <sv:node sv:name="the-medusa-news">
    <sv:property sv:name="jcr:primaryType" sv:type="Name">
      <sv:value>contenteximdemo:newsdocument</sv:value>
    </sv:property>
    <sv:property sv:multiple="true" sv:name="jcr:mixinTypes" sv:type="Name">
      <sv:value>mix:referenceable</sv:value>
    </sv:property>
    <sv:property sv:name="jcr:uuid" sv:type="String">
      <sv:value>6e4ed59a-d5a7-4254-95a2-c278208f2f5b</sv:value>
    </sv:property>
    <sv:property sv:name="contenteximdemo:author" sv:type="String">
      <sv:value>Alfred Anonymous</sv:value>
    </sv:property>
    <sv:property sv:name="contenteximdemo:date" sv:type="Date">
      <sv:value>2016-02-08T13:38:00.000-05:00</sv:value>
    </sv:property>
    <sv:property sv:name="contenteximdemo:documenttype" sv:type="String">
      <sv:value>news</sv:value>
    </sv:property>
    <sv:property sv:name="contenteximdemo:introduction" sv:type="String">
      <sv:value>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum</sv:value>
    </sv:property>
    <sv:property sv:name="contenteximdemo:location" sv:type="String">
      <sv:value>Rotterdam</sv:value>
    </sv:property>
    <sv:property sv:name="contenteximdemo:source" sv:type="String">
      <sv:value/>
    </sv:property>
    <sv:property sv:name="contenteximdemo:title" sv:type="String">
      <sv:value>The medusa news</sv:value>
    </sv:property>
    <sv:property sv:multiple="true" sv:name="hippo:availability" sv:type="String">
      <sv:value>live</sv:value>
      <sv:value>preview</sv:value>
    </sv:property>
    <sv:property sv:name="hippo:related___pathreference" sv:type="String">
      <sv:value>the-medusa-news</sv:value>
    </sv:property>
    <sv:property sv:name="hippostd:holder" sv:type="String">
      <sv:value>admin</sv:value>
    </sv:property>
    <sv:property sv:name="hippostd:state" sv:type="String">
      <sv:value>published</sv:value>
    </sv:property>
    <sv:property sv:name="hippostd:stateSummary" sv:type="String">
      <sv:value>live</sv:value>
    </sv:property>
    <sv:property sv:name="hippostdpubwf:createdBy" sv:type="String">
      <sv:value>admin</sv:value>
    </sv:property>
    <sv:property sv:name="hippostdpubwf:creationDate" sv:type="Date">
      <sv:value>2013-11-12T13:03:00.000+01:00</sv:value>
    </sv:property>
    <sv:property sv:name="hippostdpubwf:lastModificationDate" sv:type="Date">
      <sv:value>2013-11-12T13:04:00.000+01:00</sv:value>
    </sv:property>
    <sv:property sv:name="hippostdpubwf:lastModifiedBy" sv:type="String">
      <sv:value>admin</sv:value>
    </sv:property>
    <sv:property sv:name="hippostdpubwf:publicationDate" sv:type="Date">
      <sv:value>2013-11-12T13:04:00.000+01:00</sv:value>
    </sv:property>
    <sv:property sv:name="hippotranslation:id" sv:type="String">
      <sv:value>81a831f8-9184-48e3-a5e8-20c2f32f54d4</sv:value>
    </sv:property>
    <sv:property sv:name="hippotranslation:locale" sv:type="String">
      <sv:value>en</sv:value>
    </sv:property>
    <sv:node sv:name="contenteximdemo:content">
      <sv:property sv:name="jcr:primaryType" sv:type="Name">
        <sv:value>hippostd:html</sv:value>
      </sv:property>
      <sv:property sv:name="hippostd:content" sv:type="String">
        <sv:value>&lt;html&gt;
          &lt;body&gt;

          &lt;p&gt;Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
          tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
          quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
          consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
          dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,
          sunt in culpa qui officia deserunt mollit anim id est laborum&lt;/p&gt;

          &lt;p&gt;Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
          tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
          quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
          consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
          dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,
          sunt in culpa qui officia deserunt mollit anim id est laborum&lt;/p&gt;

          &lt;p&gt;Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
          tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
          quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
          consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum
          dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,
          sunt in culpa qui officia deserunt mollit anim id est laborum &lt;/p&gt;

          &lt;/body&gt;
          &lt;/html&gt;</sv:value>
      </sv:property>
    </sv:node>
    <sv:node sv:name="contenteximdemo:image">
      <sv:property sv:name="jcr:primaryType" sv:type="Name">
        <sv:value>hippogallerypicker:imagelink</sv:value>
      </sv:property>
      <sv:property sv:name="hippo:docbase" sv:type="String">
        <sv:value>4709f797-61ae-4c36-93d4-63b5d50fc200</sv:value>
      </sv:property>
      <sv:property sv:multiple="true" sv:name="hippo:facets" sv:type="String"/>
      <sv:property sv:multiple="true" sv:name="hippo:modes" sv:type="String"/>
      <sv:property sv:multiple="true" sv:name="hippo:values" sv:type="String"/>
    </sv:node>
  </sv:node>
</sv:node>
        

When exporting a document content, the wrapper handle node has no significant information in content migration context. Document content handle node may contain multiple variant node, but normally we're interested in migrating the live variant or sometimes preview variant only. Therefore, we can simply select only one variant (live or previe) and export the document variant content.

Select Live (or Preview) Variant Nodes in Groovy Updater

You can use JCR query like the following example in a Groovy Updater script to select all the live variant nodes to export.

Then the Groovy Updater script will iterate all the nodes and invoke the #doUpdate(Node) method of the script on each visiting node.

/jcr:root/content/documents//element(*,hippostd:publishable)[@hippostd:state='published']
          
          

You can use the following instead if you want to export preview variants nodes:

/jcr:root/content/documents//element(*,hippostd:publishable)[@hippostd:state='unpublished']
          
          

Sometimes, maybe you want to select either live or preview variant based on some conditions. In that case, you may use the following to select document handle nodes instead and you can implement your script to choose a proper variant node in the script block:

/jcr:root/content/documents//element(*,hippostd:publishable)[@hippostd:state='unpublished']/..
          
          

Initializing Export Task

    // 1. Initialize (workflow) document manager and binary exporting task using the default implementation.
    def documentManager = new WorkflowDocumentManagerImpl(session)
    def exportTask = new WorkflowDocumentVariantExportTask(documentManager)
    // 1.1. Sets the logger to the export task, which is useful when running in groovy updater.
    exportTask.setLogger(log)

    // 2. Starting export task, meaning the task starts the execution record bookkeeping.
    exportTask.start()
    exportTask.start()
          
          

Export Live (or Preview) Variant Node to ContentNode bean

      // 1. Begin a record to capture the execution status of the current unit of export task.
      //    each record is stored in the export task until stopped and can be useful in the final phase to report all the result (i.e, exportTask#logSummary()).
      def record = exportTask.beginRecord(node.identifier, node.path)

      // 2. Get the binary content handle path from the currently visiting node and decide the export file location.
      def handlePath = node.parent.path
      def relPath = StringUtils.removeStart(handlePath, "/")
      def targetBaseFolder = VFS.getManager().resolveFile("file:///var/data/docdata")
      def file = targetBaseFolder.resolveFile(relPath + ".json")

      // 3. Mark this unit of export task is being processed before real processing below.
      record.setProcessed(true)

      // 4. Export the document variant node.
      //    Create a Document instance and pass it to the export task.
      def document = new Document(node.identifier)
      def contentNode = exportTask.exportVariantToContentNode(document)

      // 5. Because hippo:docbase UUID values in the exported data is kind of meaningless in many cases.
      //    For example, if the referenced content is also exported and imported again later, then the UUID will change.
      // 5.1. So, you don't have to, but let's just replace hippo:docbase UUID by the node path of the referenced content node in the exported data.
      ContentNodeUtils.replaceDocbasesByPaths(documentManager.session, contentNode, ContentNodeUtils.MIRROR_DOCBASES_XPATH)
      // 5.2. Also, let's replace String Docbase properties in a document by a node path as well.
      ContentNodeUtils.replaceDocbasePropertiesByPaths(documentManager.session, contentNode, "properties[@itemName='contenteximdemo:relatedarticle']")

      // 6. Record the exported file path for reporting later.
      record.setAttribute("file", file.name.path)

      // 7. Marshal the exported ContentNode object to a json file.
      exportTask.writeContentNodeToJsonFile(contentNode, file)

      // 8. Mark this unit of export task successful.
      record.setSucceeded(true)
          
          

Log execution summary

    // 1. Stop the export task after processing, which means you stop execution recording.
    exportTask.stop()

    // 2. Log the execution summary finally for administrator.
    exportTask.logSummary()
          
          

The summary logs above will look like the following:

INFO 2016-02-19 13:08:40 

===============================================================================================================
Execution Summary:
---------------------------------------------------------------------------------------------------------------
Total: 18, Processed: 18, Suceeded: 18, Failed: 0, Duration: 636ms
---------------------------------------------------------------------------------------------------------------
Details (in CSV format):
---------------------------------------------------------------------------------------------------------------
SEQ,PROCESSED,SUCCEEDED,ID,PATH,TYPE,ATTRIBUTES,ERROR
1,true,true,b07db5ec-0df9-4adb-9bf1-416e0dad67ca,/content/documents/administration/labels/global/global,resourcebundle:resourcebundle,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/administration/labels/global.json},
2,true,true,dd4b28ca-03f5-4d8b-bb90-07825219e227,/content/documents/administration/labels/homepage/homepage,resourcebundle:resourcebundle,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/administration/labels/homepage.json},
3,true,true,9b261f74-4960-4e18-b3c0-4aca1f168822,/content/documents/administration/labels/pagenotfound/pagenotfound,resourcebundle:resourcebundle,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/administration/labels/pagenotfound.json},
4,true,true,df0a4785-1a50-4ca8-973f-b67799bbd292,/content/documents/administration/labels/pagination/pagination,resourcebundle:resourcebundle,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/administration/labels/pagination.json},
5,true,true,7821fda5-1334-454c-bf08-27ba6c6cfee6,/content/documents/contenteximdemo/banners/banner1/banner1,contenteximdemo:bannerdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/banners/banner1.json},
6,true,true,966e2575-39a3-4b61-bf05-e10975118a3c,/content/documents/contenteximdemo/banners/banner2/banner2,contenteximdemo:bannerdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/banners/banner2.json},
7,true,true,78d885c7-9701-491f-af71-472c38ed5b57,/content/documents/contenteximdemo/content/another-sample-document/another-sample-document,contenteximdemo:contentdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/content/another-sample-document.json},
8,true,true,f0df8091-a42c-4b4c-b7a3-38637a44fcad,/content/documents/contenteximdemo/content/sample-document/sample-document,contenteximdemo:contentdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/content/sample-document.json},
9,true,true,53dc3a58-9174-4222-8ccd-4ec800b934c4,/content/documents/contenteximdemo/events/2016/02/introduction-speech/introduction-speech,contenteximdemo:eventsdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/events/2016/02/introduction-speech.json},
10,true,true,2fd11c4c-e2a5-4910-973f-b83417ed683d,/content/documents/contenteximdemo/events/2016/02/breakfast/breakfast,contenteximdemo:eventsdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/events/2016/02/breakfast.json},
11,true,true,212f7c00-f309-4924-a25d-acbd7d3d7307,/content/documents/contenteximdemo/events/2016/02/workshop/workshop,contenteximdemo:eventsdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/events/2016/02/workshop.json},
12,true,true,57ec3b12-4722-44f9-bbed-46b993e693fb,/content/documents/contenteximdemo/faq/about-faq-items/faq-item-1/faq-item-1,contenteximdemo:faqitem,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/faq/about-faq-items/faq-item-1.json},
13,true,true,3d25c1f7-52c6-4ee6-ba7f-b25505c1b00e,/content/documents/contenteximdemo/faq/about-faq/about-faq,contenteximdemo:faqlist,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/faq/about-faq.json},
14,true,true,73ba5982-0755-4934-90d2-f75c0c53931f,/content/documents/contenteximdemo/faq/about-faq-items/faq-item-2/faq-item-2,contenteximdemo:faqitem,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/faq/about-faq-items/faq-item-2.json},
15,true,true,f545543b-ed2b-4e71-883b-fc6f6072cf92,/content/documents/contenteximdemo/news/2016/02/the-gastropoda-news/the-gastropoda-news,contenteximdemo:newsdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/news/2016/02/the-gastropoda-news.json},
16,true,true,6e4ed59a-d5a7-4254-95a2-c278208f2f5b,/content/documents/contenteximdemo/news/2016/02/the-medusa-news/the-medusa-news,contenteximdemo:newsdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/news/2016/02/the-medusa-news.json},
17,true,true,8f3b599e-80e2-40fd-893b-e3bec836ea7f,/content/documents/contenteximdemo/news/2016/02/2013-harvest/2013-harvest,contenteximdemo:newsdocument,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/news/2016/02/2013-harvest.json},
18,true,true,fe0e5a84-e030-4b54-80d4-4b806fdd294a,/content/documents/contenteximdemo/videos/video-example/video-example,contenteximdemo:video,{file=/home/test/content-exim-demo-TRUNK/target/tomcat8x/temp/content-exim-demo/docdata/content/documents/contenteximdemo/videos/video-example.json},
===============================================================================================================

          
          

Exported file(s)

{
  "name" : "the-medusa-news",
  "primaryType" : "contenteximdemo:newsdocument",
  "mixinTypes" : [ "mix:referenceable" ],
  "properties" : [ {
    "name" : "contenteximdemo:title",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "The medusa news" ]
  }, {
    "name" : "contenteximdemo:author",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "Alfred Anonymous" ]
  }, {
    "name" : "hippotranslation:locale",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "en" ]
  }, {
    "name" : "contenteximdemo:location",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "Rotterdam" ]
  }, {
    "name" : "contenteximdemo:source",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "" ]
  }, {
    "name" : "contenteximdemo:relatedarticle",
    "type" : "STRING",
    "multiple" : true,
    "values" : [ "/content/documents/contenteximdemo/news/2016/02/2013-harvest", "/content/documents/contenteximdemo/news/2016/02/the-gastropoda-news" ]
  }, {
    "name" : "hippotranslation:id",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "81a831f8-9184-48e3-a5e8-20c2f32f54d4" ]
  }, {
    "name" : "contenteximdemo:documenttype",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "news" ]
  }, {
    "name" : "contenteximdemo:date",
    "type" : "DATE",
    "multiple" : false,
    "values" : [ "2016-02-08T13:38:00.000-05:00" ]
  }, {
    "name" : "contenteximdemo:introduction",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum" ]
  }, {
    "name" : "jcr:path",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "/content/documents/contenteximdemo/news/2016/02/the-medusa-news" ]
  }, {
    "name" : "jcr:localizedName",
    "type" : "STRING",
    "multiple" : false,
    "values" : [ "The medusa news" ]
  } ],
  "nodes" : [ {
    "name" : "contenteximdemo:content",
    "primaryType" : "hippostd:html",
    "mixinTypes" : [ ],
    "properties" : [ {
      "name" : "hippostd:content",
      "type" : "STRING",
      "multiple" : false,
      "values" : [ "<html>\n          <body>\n\n          <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n          tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n          quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n          consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum\n          dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,\n          sunt in culpa qui officia deserunt mollit anim id est laborum</p>\n\n          <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n          tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n          quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n          consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum\n          dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,\n          sunt in culpa qui officia deserunt mollit anim id est laborum</p>\n\n          <p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod\n          tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,\n          quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo\n          consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum\n          dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident,\n          sunt in culpa qui officia deserunt mollit anim id est laborum </p>\n\n          </body>\n          </html>" ]
    } ],
    "nodes" : [ ]
  }, {
    "name" : "contenteximdemo:image",
    "primaryType" : "hippogallerypicker:imagelink",
    "mixinTypes" : [ ],
    "properties" : [ {
      "name" : "hippo:facets",
      "type" : "STRING",
      "multiple" : true,
      "values" : [ ]
    }, {
      "name" : "hippo:values",
      "type" : "STRING",
      "multiple" : true,
      "values" : [ ]
    }, {
      "name" : "hippo:docbase",
      "type" : "STRING",
      "multiple" : false,
      "values" : [ "/content/gallery/contenteximdemo/samples/animal-2883_640.jpg" ]
    }, {
      "name" : "hippo:modes",
      "type" : "STRING",
      "multiple" : true,
      "values" : [ ]
    } ],
    "nodes" : [ ]
  } ]
}