View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.solr.index.file;
20  
21  import java.io.IOException;
22  import java.nio.file.Path;
23  import java.nio.file.ProviderMismatchException;
24  import java.nio.file.attribute.BasicFileAttributes;
25  import java.util.Collection;
26  import java.util.Date;
27  import java.util.HashSet;
28  import java.util.concurrent.TimeUnit;
29  
30  import org.apache.logging.log4j.LogManager;
31  import org.apache.logging.log4j.Logger;
32  import org.apache.solr.common.SolrInputDocument;
33  import org.mycore.common.MCRCache;
34  import org.mycore.datamodel.classifications2.MCRCategLinkReference;
35  import org.mycore.datamodel.classifications2.MCRCategLinkServiceFactory;
36  import org.mycore.datamodel.classifications2.MCRCategory;
37  import org.mycore.datamodel.classifications2.MCRCategoryDAO;
38  import org.mycore.datamodel.classifications2.MCRCategoryDAOFactory;
39  import org.mycore.datamodel.classifications2.MCRCategoryID;
40  import org.mycore.datamodel.common.MCRISO8601Date;
41  import org.mycore.datamodel.common.MCRXMLMetadataManager;
42  import org.mycore.datamodel.metadata.MCRMetadataManager;
43  import org.mycore.datamodel.metadata.MCRObjectID;
44  import org.mycore.datamodel.niofs.MCRContentTypes;
45  import org.mycore.datamodel.niofs.MCRPath;
46  
47  import com.google.common.io.Files;
48  
49  public class MCRSolrFileIndexBaseAccumulator implements MCRSolrFileIndexAccumulator {
50  
51      private static Logger LOGGER = LogManager.getLogger(MCRSolrFileIndexBaseAccumulator.class);
52  
53      private static MCRXMLMetadataManager XML_MANAGER = MCRXMLMetadataManager.instance();
54  
55      private static final MCRCategoryDAO CATEGORY_DAO = MCRCategoryDAOFactory.getInstance();
56  
57      private static final MCRCache<String, String> DERIVATE_MODIFIED_CACHE = new MCRCache<>(10000,
58          "derivateID ISODateString cache");
59  
60      @Override
61      public void accumulate(SolrInputDocument doc, Path input, BasicFileAttributes attr) throws IOException {
62          doc.setField("id", input.toUri().toString());
63          String absolutePath = '/' + input.subpath(0, input.getNameCount()).toString();
64          try {
65              MCRPath mcrPath = MCRPath.toMCRPath(input); //check if this is an MCRPath -> more metadata
66              MCRObjectID mcrObjID = MCRMetadataManager.getObjectId(MCRObjectID.getInstance(mcrPath.getOwner()), 10,
67                  TimeUnit.SECONDS);
68              if (mcrObjID == null) {
69                  LOGGER.warn("Could not determine MCRObject for file {}", absolutePath);
70                  doc.setField("returnId", mcrPath.getOwner());
71              } else {
72                  doc.setField("returnId", mcrObjID.toString());
73                  doc.setField("objectProject", mcrObjID.getProjectId());
74              }
75              String ownerID = mcrPath.getOwner();
76              doc.setField("derivateID", ownerID);
77              doc.setField("derivateModified", getDerivateModified(ownerID));
78              Collection<MCRCategoryID> linksFromReference = MCRCategLinkServiceFactory.getInstance()
79                  .getLinksFromReference(new MCRCategLinkReference(mcrPath));
80              HashSet<MCRCategoryID> linkedCategories = new HashSet<>(linksFromReference);
81              for (MCRCategoryID category : linksFromReference) {
82                  for (MCRCategory parent : CATEGORY_DAO.getParents(category)) {
83                      linkedCategories.add(parent.getId());
84                  }
85              }
86              for (MCRCategoryID category : linkedCategories) {
87                  doc.addField("fileCategory", category.toString());
88              }
89          } catch (ProviderMismatchException e) {
90              LOGGER.warn("Cannot build all fields as input is not an instance of MCRPath: {}", input);
91          }
92          doc.setField("objectType", "data_file");
93          doc.setField("fileName", input.getFileName().toString());
94          doc.setField("filePath", absolutePath);
95          doc.setField("stream_size", attr.size());
96          doc.setField("stream_name", absolutePath);
97          doc.setField("stream_source_info", input.toString());
98          doc.setField("stream_content_type", MCRContentTypes.probeContentType(input));
99          doc.setField("extension", Files.getFileExtension(input.getFileName().toString()));
100         MCRISO8601Date iDate = new MCRISO8601Date();
101         iDate.setDate(new Date(attr.lastModifiedTime().toMillis()));
102         doc.setField("modified", iDate.getISOString());
103     }
104 
105     /**
106      * returns ISO8601 formated string of when derivate was last modified
107      *
108      * @param derivateID
109      * @throws IOException
110      *             thrown by {@link MCRCache.ModifiedHandle#getLastModified()}
111      */
112     private static String getDerivateModified(final String derivateID) throws IOException {
113         MCRObjectID derID = MCRObjectID.getInstance(derivateID);
114         MCRCache.ModifiedHandle modifiedHandle = XML_MANAGER.getLastModifiedHandle(derID, 30, TimeUnit.SECONDS);
115         String modified = DERIVATE_MODIFIED_CACHE.getIfUpToDate(derivateID, modifiedHandle);
116         if (modified == null) {
117             Date date = new Date(modifiedHandle.getLastModified());
118             MCRISO8601Date date2 = new MCRISO8601Date();
119             date2.setDate(date);
120             modified = date2.getISOString();
121             DERIVATE_MODIFIED_CACHE.put(derivateID, modified);
122         }
123         return modified;
124     }
125 }