View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.mets.model;
20  
21  import java.io.IOException;
22  import java.net.URISyntaxException;
23  import java.time.Instant;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.HashMap;
27  import java.util.Iterator;
28  import java.util.List;
29  import java.util.Map;
30  import java.util.Objects;
31  import java.util.Optional;
32  import java.util.stream.StreamSupport;
33  
34  import org.apache.logging.log4j.LogManager;
35  import org.apache.logging.log4j.Logger;
36  import org.mycore.common.MCRException;
37  import org.mycore.common.xml.MCRXMLFunctions;
38  import org.mycore.datamodel.metadata.MCRDerivate;
39  import org.mycore.datamodel.metadata.MCRMetaDerivateLink;
40  import org.mycore.datamodel.metadata.MCRMetaElement;
41  import org.mycore.datamodel.metadata.MCRMetadataManager;
42  import org.mycore.datamodel.metadata.MCRObject;
43  import org.mycore.datamodel.metadata.MCRObjectID;
44  import org.mycore.datamodel.metadata.MCRObjectUtils;
45  import org.mycore.datamodel.niofs.MCRContentTypes;
46  import org.mycore.datamodel.niofs.MCRPath;
47  import org.mycore.mets.model.files.FLocat;
48  import org.mycore.mets.model.files.File;
49  import org.mycore.mets.model.files.FileGrp;
50  import org.mycore.mets.model.files.FileSec;
51  import org.mycore.mets.model.header.MetsHdr;
52  import org.mycore.mets.model.sections.AmdSec;
53  import org.mycore.mets.model.sections.DmdSec;
54  import org.mycore.mets.model.struct.Area;
55  import org.mycore.mets.model.struct.Fptr;
56  import org.mycore.mets.model.struct.LOCTYPE;
57  import org.mycore.mets.model.struct.LogicalDiv;
58  import org.mycore.mets.model.struct.LogicalStructMap;
59  import org.mycore.mets.model.struct.PhysicalDiv;
60  import org.mycore.mets.model.struct.PhysicalStructMap;
61  import org.mycore.mets.model.struct.PhysicalSubDiv;
62  import org.mycore.mets.model.struct.Seq;
63  import org.mycore.mets.model.struct.SmLink;
64  import org.mycore.mets.model.struct.StructLink;
65  import org.mycore.mets.tools.MCRMetsSave;
66  
67  /**
68   * This class generates a METS xml file for the METS-Editor. In difference to the default
69   * implementation, the hierarchy of the MCRObjects and their derivate links are considered.
70   * Starting from the root element, all children are hierarchically recorded in the logical
71   * structure map of the METS file. If your application supports derivate links, the struct link
72   * part links to those files.
73   *
74   * @author Matthias Eichner
75   */
76  public abstract class MCRMETSHierarchyGenerator extends MCRMETSAbstractGenerator {
77  
78      private static final Logger LOGGER = LogManager.getLogger(MCRMETSHierarchyGenerator.class);
79  
80      protected MCRDerivate mcrDer;
81  
82      protected MCRObject rootObj;
83  
84      protected MetsHdr metsHdr;
85  
86      protected AmdSec amdSection;
87  
88      protected DmdSec dmdSection;
89  
90      protected FileSec fileSection;
91  
92      protected PhysicalStructMap physicalStructMap;
93  
94      protected LogicalStructMap logicalStructMap;
95  
96      protected StructLink structLink;
97  
98      private List<FileRef> files;
99  
100     /**
101      * Hashmap to store logical and physical ids. An entry is added
102      * for each derivate link.
103      */
104     private Map<String, List<String>> structLinkMap;
105 
106     public MCRMETSHierarchyGenerator() {
107         this.files = new ArrayList<>();
108     }
109 
110     @Override
111     public synchronized Mets generate() throws MCRException {
112         long startTime = System.currentTimeMillis();
113         String derivateId = getOwner();
114         setup(derivateId);
115         try {
116             Mets mets = createMets();
117             LOGGER.info("mets creation for derivate {} took {}ms!", derivateId, System.currentTimeMillis() - startTime);
118             return mets;
119         } catch (Exception exc) {
120             throw new MCRException("Unable to create mets.xml of " + derivateId, exc);
121         }
122     }
123 
124     /**
125      * Initializes the derivate and the root object.
126      *
127      * @param derivateId the derivate id to setup
128      */
129     protected void setup(String derivateId) {
130         // get derivate
131         MCRObjectID derId = MCRObjectID.getInstance(derivateId);
132         this.mcrDer = MCRMetadataManager.retrieveMCRDerivate(derId);
133         // get mycore object
134         MCRObjectID objId = this.mcrDer.getDerivate().getMetaLink().getXLinkHrefID();
135         this.rootObj = MCRMetadataManager.retrieveMCRObject(objId);
136     }
137 
138     /**
139      * Does the mets creation.
140      *
141      * @return the new created mets
142      * @throws IOException files of the path couldn't be read
143      */
144     protected Mets createMets() throws IOException {
145         LOGGER.info("create mets for derivate {}...", this.mcrDer.getId());
146 
147         this.structLinkMap = new HashMap<>();
148 
149         // create mets sections
150         this.metsHdr = createMetsHdr();
151         this.amdSection = createAmdSection();
152         this.dmdSection = createDmdSection();
153         this.fileSection = createFileSection();
154         this.physicalStructMap = createPhysicalStruct();
155         this.logicalStructMap = createLogicalStruct();
156         this.structLink = createStructLink();
157 
158         // add to mets
159         Mets mets = new Mets();
160         mets.setMetsHdr(metsHdr);
161         mets.addAmdSec(this.amdSection);
162         mets.addDmdSec(this.dmdSection);
163         mets.setFileSec(this.fileSection);
164         mets.addStructMap(this.physicalStructMap);
165         mets.addStructMap(this.logicalStructMap);
166         mets.setStructLink(this.structLink);
167         return mets;
168     }
169 
170     /**
171      * Creates a new mets header with current dates and record status = autogenerated.
172      *
173      * @return generated mets header section.
174      */
175     protected MetsHdr createMetsHdr() {
176         MetsHdr hdr = new MetsHdr();
177         hdr.setCreateDate(Instant.now());
178         hdr.setLastModDate(Instant.now());
179         hdr.setRecordStatus("autogenerated");
180         return hdr;
181     }
182 
183     /**
184      * Creates a new empty amd section. Id is amd_{derivate id}.
185      *
186      * @return generated amd section.
187      */
188     protected AmdSec createAmdSection() {
189         String amdId = "amd_" + this.mcrDer.getId();
190         return new AmdSec(amdId);
191     }
192 
193     /**
194      * Creates a new empty dmd section. Id is dmd_{derivate id}.
195      *
196      * @return generated dmd section.
197      */
198     protected DmdSec createDmdSection() {
199         String dmdSec = "dmd_" + this.mcrDer.getId();
200         return new DmdSec(dmdSec);
201     }
202 
203     /**
204      * Creates the file section.
205      *
206      * @return generated file secion.
207      */
208     protected FileSec createFileSection() throws IOException {
209         FileSec fileSec = new FileSec();
210 
211         List<MCRPath> filePaths = MCRMetsSave.listFiles(getDerivatePath(), getIgnorePaths());
212         List<FileGrp> fileGrps = MCRMetsSave.buildFileGroups(filePaths);
213         fileGrps.forEach(fileSec::addFileGrp);
214 
215         for (MCRPath file : filePaths) {
216             String contentType = MCRContentTypes.probeContentType(file);
217             FileRef ref = new FileRef(file, contentType);
218             this.files.add(ref);
219         }
220 
221         for (FileRef ref : this.files) {
222             String use = MCRMetsModelHelper.getUseForHref(ref.path.getOwnerRelativePath()).orElse("UNKNOWN");
223             FileGrp fileGrp = fileGrps.stream().filter(grp -> grp.getUse().equals(use)).findFirst().orElse(null);
224             if (fileGrp == null) {
225                 LOGGER.warn("Unable to add file '" + ref.toId() + "' because cannot find corresponding group "
226                         + " with @USE='" + use + "'. Ignore file and continue.");
227                 continue;
228             }
229             addFile(ref.toId(), fileGrp, ref.getPath(), ref.getContentType());
230         }
231 
232         return fileSec;
233     }
234 
235     private void addFile(String id, FileGrp fileGroup, MCRPath path, String mimeType) {
236         File imageFile = new File(id, mimeType);
237         try {
238             final String href = MCRXMLFunctions.encodeURIPath(path.getOwnerRelativePath().substring(1), true);
239             FLocat fLocat = new FLocat(LOCTYPE.URL, href);
240             imageFile.setFLocat(fLocat);
241             fileGroup.addFile(imageFile);
242         } catch (URISyntaxException uriSyntaxException) {
243             LOGGER.error("invalid href", uriSyntaxException);
244         }
245     }
246 
247     /**
248      * This method creates the physical structure map.
249      *
250      * @return generated pyhiscal struct map secion.
251      */
252     protected PhysicalStructMap createPhysicalStruct() {
253         PhysicalStructMap pstr = new PhysicalStructMap();
254         PhysicalDiv physicalDiv = new PhysicalDiv("phys_" + this.mcrDer.getId(), PhysicalDiv.TYPE_PHYS_SEQ);
255         pstr.setDivContainer(physicalDiv);
256         // run through file references
257         for (FileRef ref : this.files) {
258             String physId = ref.toPhysId();
259             PhysicalSubDiv page = physicalDiv.get(physId);
260             if (page == null) {
261                 page = new PhysicalSubDiv(physId, PhysicalSubDiv.TYPE_PAGE);
262                 getOrderLabel(ref.toId()).ifPresent(page::setOrderLabel);
263                 physicalDiv.add(page);
264             }
265             page.add(new Fptr(ref.toId()));
266         }
267         return pstr;
268     }
269 
270     /**
271      * Returns the order label for the given file.
272      *
273      * @param fileId of the mets:file in the mets:fileSec
274      * @return optional order label
275      */
276     protected Optional<String> getOrderLabel(String fileId) {
277         return getOldMets().map(oldMets -> {
278             PhysicalSubDiv subDiv = oldMets.getPhysicalStructMap().getDivContainer().byFileId(fileId);
279             if (subDiv == null) {
280                 LOGGER.warn("Unable to get @ORDERLABEL of physical div '{}'.", fileId);
281                 return null;
282             }
283             return subDiv.getOrderLabel();
284         });
285     }
286 
287     /**
288      * Creates the logical struct map.
289      *
290      * @return a newly created logical struct map
291      */
292     protected LogicalStructMap createLogicalStruct() {
293         LogicalStructMap lsm = newLogicalStructMap();
294         mergeOldLogicalStructMap(lsm);
295         return lsm;
296     }
297 
298     protected LogicalStructMap newLogicalStructMap() {
299         LogicalStructMap lstr = new LogicalStructMap();
300         MCRObjectID objId = this.rootObj.getId();
301         // create main div
302         String amdId = this.amdSection.getId();
303         String dmdId = this.dmdSection.getId();
304         LogicalDiv logicalDiv = new LogicalDiv(objId.toString(), getType(this.rootObj), getLabel(this.rootObj), amdId,
305                 dmdId);
306         lstr.setDivContainer(logicalDiv);
307         // run through all children
308         newLogicalStructMap(this.rootObj, logicalDiv);
309         // remove not linked logical divs
310         logicalDiv.getChildren().removeIf(child -> !validateLogicalStruct(child));
311         return lstr;
312     }
313 
314     /**
315      * Creates the logical structure recursive. 
316      *
317      * @param parentObject mycore object
318      * @param parentLogicalDiv parent div
319      */
320     protected void newLogicalStructMap(MCRObject parentObject, LogicalDiv parentLogicalDiv) {
321         // run through all children
322         List<MCRObject> children = getChildren(parentObject);
323         children.forEach(childObject -> {
324             // create new logical sub div
325             String id = childObject.getId().toString();
326             LogicalDiv logicalChildDiv = new LogicalDiv(id, getType(childObject), getLabel(childObject));
327             // add to parent
328             parentLogicalDiv.add(logicalChildDiv);
329             // check if a derivate link exists and get the linked file
330             updateStructLinkMapUsingDerivateLinks(logicalChildDiv, childObject);
331             // do recursive call for children
332             newLogicalStructMap(childObject, logicalChildDiv);
333         });
334     }
335 
336     /**
337      * Runs through the logical part of the old mets and copies the ALTO part (mets:fptr/mets:seq/mets:area)
338      * to the newly created logical struct map. This is done by comparing the mets:div @ID's of the old and the new
339      * logical struct map. If two @ID's are equal, we can assume that it is the same mets:div and we just copy all
340      * the old mets:fptr's.
341      *
342      * @param logicalStructMap the logical struct map to enhance
343      */
344     protected void mergeOldLogicalStructMap(LogicalStructMap logicalStructMap) {
345         if (!this.getOldMets().isPresent()) {
346             return;
347         }
348         Mets oldMets = this.getOldMets().get();
349         LogicalStructMap oldLsm = oldMets.getLogicalStructMap();
350         FileGrp oldAltoGroup = oldMets.getFileSec().getFileGroup("ALTO");
351         FileGrp newAltoGroup = this.fileSection.getFileGroup("ALTO");
352 
353         List<LogicalDiv> descendants = oldLsm.getDivContainer().getDescendants();
354         descendants.stream().filter(div -> !div.getFptrList().isEmpty()).forEach(oldDiv -> {
355             String id = oldDiv.getId();
356             LogicalDiv newDiv = logicalStructMap.getDivContainer().getLogicalSubDiv(id);
357             if (newDiv != null) {
358                 for (Fptr fptr : oldDiv.getFptrList()) {
359                     copyFptr(oldAltoGroup, newAltoGroup, fptr).ifPresent(newFptr -> newDiv.getFptrList().add(newFptr));
360                 }
361                 updateStructLinkMapUsingALTO(newDiv);
362             } else {
363                 LOGGER.warn("Unable to find logical div @ID='" + id + "'"
364                         + " of previous mets.xml in this generated mets.xml. Be aware that the content of the 'old' "
365                         + "logical div cannot be copied and therefore cannot be preserved!");
366             }
367         });
368     }
369 
370     private Optional<Fptr> copyFptr(FileGrp oldGrp, FileGrp newGrp, Fptr oldFptr) {
371         Fptr newFptr = new Fptr();
372         for (Seq oldSeq : oldFptr.getSeqList()) {
373             Seq newSeq = new Seq();
374             for (Area oldArea : oldSeq.getAreaList()) {
375                 if (oldArea.getBetype() == null) {
376                     continue;
377                 }
378                 String oldFileID = oldArea.getFileId();
379                 File oldFile = oldGrp.getFileById(oldFileID);
380                 String href = oldFile.getFLocat().getHref();
381                 File newFile = newGrp.getFileByHref(href);
382 
383                 Area newArea = new Area();
384                 newArea.setBegin(oldArea.getBegin());
385                 newArea.setEnd(oldArea.getEnd());
386                 newArea.setFileId(newFile.getId());
387                 newArea.setBetype("IDREF");
388                 newSeq.getAreaList().add(newArea);
389             }
390             if (!newSeq.getAreaList().isEmpty()) {
391                 newFptr.getSeqList().add(newSeq);
392             }
393         }
394         return newFptr.getSeqList().isEmpty() ? Optional.empty() : Optional.of(newFptr);
395     }
396 
397     /**
398      * Fills the structLinkMap for a single logical mets:div using derivate link information.
399      *
400      * @param logicalDiv the logical div to handle
401      * @param mcrObject the mycore object linked in the logical div (mets:div/@ID == mycore object id)
402      */
403     protected void updateStructLinkMapUsingDerivateLinks(LogicalDiv logicalDiv, MCRObject mcrObject) {
404         // by derivate link
405         Optional<String> linkedFileOptional = getLinkedFile(mcrObject);
406         linkedFileOptional.flatMap(this::getFileId).ifPresent(fileId -> {
407             PhysicalSubDiv physicalDiv = getPhysicalDiv(fileId);
408             addToStructLinkMap(logicalDiv, physicalDiv);
409         });
410     }
411 
412     /**
413      * Fills the structLinkMap for a single logical mets:div using mets:area/@FILEID information.
414      *
415      * @param logicalDiv the logical div to handle
416      */
417     protected void updateStructLinkMapUsingALTO(final LogicalDiv logicalDiv) {
418         logicalDiv.getFptrList()
419                   .stream()
420                   .flatMap(fptr -> fptr.getSeqList().stream())
421                   .flatMap(seq -> seq.getAreaList().stream())
422                   .map(Area::getFileId)
423                   .map(this::getPhysicalDiv)
424                   .forEach(physicalDiv -> addToStructLinkMap(logicalDiv, physicalDiv));
425     }
426 
427     /**
428      * Adds the logical div to the physical div. Required to build the mets:structLink section.
429      *
430      * @param from logical div
431      * @param to physical div
432      */
433     protected void addToStructLinkMap(LogicalDiv from, PhysicalSubDiv to) {
434         if (from == null || to == null) {
435             return;
436         }
437         List<String> logChildDivIDs = this.structLinkMap.getOrDefault(to.getId(), new ArrayList<>());
438         logChildDivIDs.add(from.getId());
439         this.structLinkMap.put(to.getId(), logChildDivIDs);
440     }
441 
442     /**
443      * Returns all children id's of this MCRObject.
444      *
445      * @param parentObject the mycore object
446      */
447     protected List<MCRObject> getChildren(MCRObject parentObject) {
448         return MCRObjectUtils.getChildren(parentObject);
449     }
450 
451     /**
452      * Its important to remove not linked logical divs without children to
453      * get a valid logical structure.
454      *
455      * @param logicalDiv the logical div to check
456      * @return true if the logical struct is valid otherwise false
457      */
458     private boolean validateLogicalStruct(LogicalDiv logicalDiv) {
459         // has link
460         String logicalDivId = logicalDiv.getId();
461         for (List<String> logivalDivIDs : structLinkMap.values()) {
462             if (logivalDivIDs.contains(logicalDivId)) {
463                 return true;
464             }
465         }
466         // has children with link
467         Iterator<LogicalDiv> it = logicalDiv.getChildren().iterator();
468         while (it.hasNext()) {
469             LogicalDiv child = it.next();
470             if (validateLogicalStruct(child)) {
471                 return true;
472             }
473             // nothing -> delete it
474             it.remove();
475         }
476         return false;
477     }
478 
479     /**
480      * Creates the mets:structLink part of the mets.xml
481      *
482      * @return a newly generated StructLink.
483      */
484     protected StructLink createStructLink() {
485         StructLink structLink = new StructLink();
486         String currentLogicalDivId = logicalStructMap.getDivContainer().getId();
487         PhysicalDiv physicalDiv = this.physicalStructMap.getDivContainer();
488         List<PhysicalSubDiv> subDivList = physicalDiv.getChildren();
489         for (PhysicalSubDiv physLink : subDivList) {
490             if (structLinkMap.containsKey(physLink.getId())) {
491                 ArrayList<String> logicalIdList = new ArrayList<>(structLinkMap.get(physLink.getId()));
492                 Collections.sort(logicalIdList);
493                 for (String logicalId : logicalIdList) {
494                     currentLogicalDivId = logicalId;
495                     structLink.addSmLink(new SmLink(currentLogicalDivId, physLink.getId()));
496                 }
497             } else {
498                 structLink.addSmLink(new SmLink(currentLogicalDivId, physLink.getId()));
499             }
500         }
501         return structLink;
502     }
503 
504     /**
505      * Runs through all USE="MASTER" files and tries to find the corresponding
506      * mets:file @ID.
507      *
508      * @param uriEncodedLinkedFile the file to find
509      * @return the fileSec @ID
510      */
511     private Optional<String> getFileId(String uriEncodedLinkedFile) {
512         FileGrp masterGroup = this.fileSection.getFileGroup(FileGrp.USE_MASTER);
513         return masterGroup.getFileList().stream().filter(file -> {
514             String href = file.getFLocat().getHref();
515             boolean equals = href.equals(uriEncodedLinkedFile);
516             boolean equalsWithoutSlash =
517                     uriEncodedLinkedFile.startsWith("/") && href.equals(uriEncodedLinkedFile.substring(1));
518             return equals || equalsWithoutSlash;
519         }).map(File::getId).findFirst();
520     }
521 
522     /**
523      * Returns a physical sub div by the given fileId.
524      *
525      * @param fileId id of a file element in fileGrp
526      * @return finds a physical div by the given file id
527      */
528     private PhysicalSubDiv getPhysicalDiv(String fileId) {
529         if (fileId == null) {
530             return null;
531         }
532         PhysicalDiv mainDiv = this.physicalStructMap.getDivContainer();
533         return mainDiv.getChildren()
534                       .stream()
535                       .filter(subDiv -> Objects.nonNull(subDiv.getFptr(fileId)))
536                       .findAny()
537                       .orElse(null);
538     }
539 
540     /**
541      * Returns the URI encoded file path of the first derivate link.
542      *
543      * @param mcrObj object which contains the derivate link
544      */
545     protected Optional<String> getLinkedFile(MCRObject mcrObj) {
546         MCRMetaElement me = mcrObj.getMetadata().getMetadataElement(getEnclosingDerivateLinkName());
547         // no derivate link
548         if (me == null) {
549             return Optional.empty();
550         }
551         return StreamSupport.stream(me.spliterator(), false)
552                             .filter(metaInterface -> metaInterface instanceof MCRMetaDerivateLink)
553                             .map(MCRMetaDerivateLink.class::cast)
554                             .filter(link -> this.mcrDer.getId().equals(MCRObjectID.getInstance(link.getOwner())))
555                             .map(MCRMetaDerivateLink::getRawPath)
556                             .findFirst();
557     }
558 
559     /**
560      * Type attribute used in logical structure. Something like journal, article,
561      * book...
562      */
563     protected abstract String getType(MCRObject obj);
564 
565     /**
566      * Returns the label of an object. Used in logical structure.
567      */
568     protected abstract String getLabel(MCRObject obj);
569 
570     /**
571      * Enclosing name of the derivate link element.
572      * In journals this is 'derivateLinks', in archive 'def.derivateLink'.
573      */
574     protected abstract String getEnclosingDerivateLinkName();
575 
576     /**
577      * Name of the derivate link element. E.g. 'derivateLink'.
578      */
579     protected abstract String getDerivateLinkName();
580 
581     class FileRef {
582 
583         private MCRPath path;
584 
585         private String contentType;
586 
587         FileRef(MCRPath path, String contentType) {
588             this.path = path;
589             this.contentType = contentType;
590         }
591 
592         public String toId() {
593             return MCRMetsSave.getFileId(path);
594         }
595 
596         public String toPhysId() {
597             return PhysicalSubDiv.ID_PREFIX + MCRMetsSave.getFileBase(path);
598         }
599 
600         public MCRPath getPath() {
601             return path;
602         }
603 
604         public String getContentType() {
605             return contentType;
606         }
607 
608     }
609 
610 }