View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  package org.mycore.oai;
19  
20  import static org.mycore.oai.pmh.OAIConstants.NS_OAI;
21  
22  import java.util.Collections;
23  import java.util.Comparator;
24  import java.util.HashMap;
25  import java.util.List;
26  import java.util.Map;
27  import java.util.concurrent.Executors;
28  import java.util.concurrent.ScheduledExecutorService;
29  import java.util.concurrent.TimeUnit;
30  import java.util.stream.Collectors;
31  
32  import org.apache.logging.log4j.LogManager;
33  import org.apache.logging.log4j.Logger;
34  import org.jdom2.Element;
35  import org.mycore.common.MCRException;
36  import org.mycore.common.MCRSystemUserInformation;
37  import org.mycore.common.config.MCRConfiguration2;
38  import org.mycore.common.events.MCRShutdownHandler;
39  import org.mycore.common.xml.MCRURIResolver;
40  import org.mycore.datamodel.classifications2.MCRCategoryDAOFactory;
41  import org.mycore.oai.classmapping.MCRClassificationAndSetMapper;
42  import org.mycore.oai.pmh.Description;
43  import org.mycore.oai.pmh.OAIConstants;
44  import org.mycore.oai.pmh.OAIDataList;
45  import org.mycore.oai.pmh.Set;
46  import org.mycore.oai.set.MCROAISetConfiguration;
47  import org.mycore.oai.set.MCROAISetHandler;
48  import org.mycore.oai.set.MCROAISolrSetConfiguration;
49  import org.mycore.oai.set.MCRSet;
50  import org.mycore.util.concurrent.MCRFixedUserCallable;
51  
52  /**
53   * Manager class to handle OAI-PMH set specific behavior.
54   * For a data provider instance, set support is optional and must be configured as described below.
55   * Typically, sets are mapped to categories of a classification in MyCoRe. The set specifications are read from one or
56   * more URIs using MCRURIResolver. This allows for sets that are typically built by applying an xsl stylesheet to the
57   * output of the classification URI resolver, but also for other ways to dynamically create set specifications, or for
58   * a static set specification that is read from an xml file.
59   * Example:
60   * <pre>
61   * MCR.OAIDataProvider.OAI.Sets.OA=webapp:oai/open_access.xml
62   * MCR.OAIDataProvider.OAI.Sets.DDC=xslStyle:classification2sets:classification:DDC
63   * </pre>
64   * The first line reads a set specification from a static xml file stored in the web application. The DINI certificate
65   * demands that there always is a set open_access that contains all public Open Access documents. Since this set always
66   * exists, its set specification can be read from a static file. The second line uses the classification resolver to
67   * read in a classification, then transforms the xml to build set specifications from the listed categories. It is
68   * recommended not to list sets that are completely empty, to simplify harvesting. The fastest way to accomplish this
69   * is to somehow ensure that no set specifications from empty sets are delivered from the URIs, which means that the
70   * classification resolver filters out empty categories, or the xsl stylesheet somehow decides to filter empty sets.
71   * Another way to filter out empty sets can be activated by setting a property: 
72   * <code>MCR.OAIDataProvider.OAI.FilterEmptySets=true</code> When set to true, the MCRSetManager handler filters out
73   * empty sets itself after reading in the URIs. This is done by constructing a query for each set and looking for
74   * matching hits. Set queries are built using the OAI Adapter's buildSetCondition method. Filtering empty sets this way
75   * may be useful for some implementations, but it is slower and should be avoided for large set hierarchies.
76   *
77   * @see MCRURIResolver
78   * @author Frank L\u00fctzenkirchen
79   * @author Matthias Eichner
80   */
81  public class MCROAISetManager {
82  
83      protected static final Logger LOGGER = LogManager.getLogger(MCROAISetManager.class);
84  
85      protected String configPrefix;
86  
87      protected final Map<String, MCROAISetConfiguration<?, ?, ?>> setConfigurationMap;
88  
89      /**
90       * Time in milliseconds when the classification changed.
91       */
92      protected long classLastModified;
93  
94      /**
95       * Time in minutes.
96       */
97      protected int cacheMaxAge;
98  
99      protected final OAIDataList<MCRSet> cachedSetList;
100 
101     public MCROAISetManager() {
102         this.setConfigurationMap = Collections.synchronizedMap(new HashMap<>());
103         this.cachedSetList = new OAIDataList<>();
104         this.classLastModified = Long.MIN_VALUE;
105     }
106 
107     public void init(String configPrefix, int cacheMaxAge) {
108         this.configPrefix = configPrefix;
109         this.cacheMaxAge = cacheMaxAge;
110         updateURIs();
111         if (this.cacheMaxAge != 0) {
112             ScheduledExecutorService updateSetExecutor = Executors.newScheduledThreadPool(1);
113             try {
114                 updateCachedSetList();
115                 updateSetExecutor.scheduleWithFixedDelay(getUpdateRunnable(), cacheMaxAge, cacheMaxAge,
116                     TimeUnit.MINUTES);
117             } finally {
118                 MCRShutdownHandler.getInstance().addCloseable(updateSetExecutor::shutdown);
119             }
120         }
121     }
122 
123     private Runnable getUpdateRunnable() {
124         MCRFixedUserCallable<Object> callable = new MCRFixedUserCallable<>(
125             Executors.callable(this::updateCachedSetList), MCRSystemUserInformation.getSystemUserInstance());
126         return () -> {
127             try {
128                 callable.call();
129             } catch (RuntimeException e) {
130                 throw e;
131             } catch (Exception e) {
132                 throw new MCRException(e);
133             }
134         };
135     }
136 
137     private void updateCachedSetList() {
138         LOGGER.info("update oai set list");
139         synchronized (cachedSetList) {
140             OAIDataList<MCRSet> setList = createSetList();
141             cachedSetList.clear();
142             cachedSetList.addAll(setList);
143         }
144     }
145 
146     protected void updateURIs() {
147         Map<String, MCROAISolrSetConfiguration> newVersion = getDefinedSetIds().stream()
148             .map(String::trim)
149             .map(setId -> new MCROAISolrSetConfiguration(this.configPrefix, setId))
150             .collect(Collectors.toMap(MCROAISolrSetConfiguration::getId, c -> c));
151         setConfigurationMap.entrySet().removeIf(c -> !newVersion.containsKey(c.getKey()));
152         setConfigurationMap.putAll(newVersion);
153     }
154 
155     public List<String> getDefinedSetIds() {
156         return MCRConfiguration2.getString(this.configPrefix + "Sets")
157             .map(MCRConfiguration2::splitValue)
158             .map(s -> s.collect(Collectors.toList()))
159             .orElseGet(Collections::emptyList);
160     }
161 
162     /**
163      * Returns a list of OAI-PMH sets defined by MyCoRe.
164      *
165      * @return list of oai sets
166      */
167     @SuppressWarnings("unchecked")
168     public OAIDataList<MCRSet> get() {
169         // no cache
170         if (this.cacheMaxAge == 0) {
171             return createSetList();
172         }
173         OAIDataList<MCRSet> oaiDataList = getDirectList();
174         // create a shallow copy of the set list
175         synchronized (oaiDataList) {
176             return (OAIDataList<MCRSet>) oaiDataList.clone();
177         }
178     }
179 
180     public OAIDataList<MCRSet> getDirectList() {
181         if (this.cacheMaxAge == 0) {
182             return createSetList();
183         }
184         // cache
185         // check if classification changed
186         long lastModified = MCRCategoryDAOFactory.getInstance().getLastModified();
187         if (lastModified != this.classLastModified) {
188             this.classLastModified = lastModified;
189             synchronized (this.cachedSetList) {
190                 OAIDataList<MCRSet> setList = createSetList();
191                 cachedSetList.clear();
192                 cachedSetList.addAll(setList);
193             }
194         }
195         return cachedSetList;
196     }
197 
198     /**
199      * Returns the {@link MCROAISetConfiguration} for the given set id.
200      *
201      * @param <Q> value of the configuration
202      * @param <R> Result collection type
203      * @param <K> Key value type for a single hit
204      * @param setId the set identifier
205      * @return the configuration for this set
206      */
207     @SuppressWarnings("unchecked")
208     public <Q, R, K> MCROAISetConfiguration<Q, R, K> getConfig(String setId) {
209         return (MCROAISetConfiguration<Q, R, K>) this.setConfigurationMap.get(setId);
210     }
211 
212     protected OAIDataList<MCRSet> createSetList() {
213         OAIDataList<MCRSet> setList = new OAIDataList<>();
214         synchronized (this.setConfigurationMap) {
215             for (MCROAISetConfiguration<?, ?, ?> conf : this.setConfigurationMap.values()) {
216                 MCROAISetHandler<?, ?, ?> handler = conf.getHandler();
217                 Map<String, MCRSet> setMap = handler.getSetMap();
218                 synchronized (setMap) {
219                     setMap.clear();
220                     Element resolved = MCRURIResolver.instance().resolve(conf.getURI());
221                     if (resolved == null) {
222                         throw new MCRException(
223                             "Could not resolve set URI " + conf.getURI() + " for set " + conf.getId() + ".");
224                     }
225                     for (Element setElement : resolved.getChildren("set", OAIConstants.NS_OAI)) {
226                         MCRSet set = createSet(conf.getId(), setElement);
227                         setMap.put(set.getSpec(), set);
228                         if (!contains(set.getSpec(), setList)) {
229                             if (!handler.filter(set)) {
230                                 setList.add(set);
231                             }
232                         }
233                     }
234                 }
235             }
236         }
237         Collections.sort(setList, Comparator.comparing(Set::getSpec));
238         return setList;
239     }
240 
241     private MCRSet createSet(String setId, Element setElement) {
242         String setSpec = setElement.getChildText("setSpec", NS_OAI);
243         String setName = setElement.getChildText("setName", NS_OAI);
244         MCRSet set = new MCRSet(setId, getSetSpec(setSpec), setName);
245         set.getDescription()
246             .addAll(setElement.getChildren("setDescription", NS_OAI)
247                 .stream() //all setDescription
248                 .flatMap(e -> e.getChildren().stream().limit(1)) //first childElement of setDescription
249                 .peek(Element::detach)
250                 .map(d -> (Description) new Description() {
251                     @Override
252                     public Element toXML() {
253                         return d;
254                     }
255 
256                     @Override
257                     public void fromXML(Element descriptionElement) {
258                         throw new UnsupportedOperationException();
259                     }
260                 })
261                 .collect(Collectors.toList()));
262         return set;
263     }
264 
265     private String getSetSpec(String elementText) {
266         if (elementText.contains(":")) {
267             StringBuilder setSpec = new StringBuilder();
268             String classID = elementText.substring(0, elementText.indexOf(':')).trim();
269             classID = MCRClassificationAndSetMapper.mapClassificationToSet(this.configPrefix, classID);
270             setSpec.append(classID).append(elementText.substring(elementText.indexOf(':')));
271             return setSpec.toString();
272         } else {
273             return elementText;
274         }
275     }
276 
277     /**
278      * Returns the set with the specified setSpec from the set list or null, if no set with that setSpec is found.
279      *
280      * @param setSpec
281      *            identifier of the set
282      * @param setList
283      *            list of sets
284      * @return the set with setSpec
285      */
286     public static <T extends Set> T get(String setSpec, OAIDataList<T> setList) {
287         return setList.stream().filter(s -> s.getSpec().equals(setSpec)).findFirst().orElse(null);
288     }
289 
290     /**
291      * Returns true if setList contains a set with specified setSpec.
292      *
293      * @param setSpec
294      *            identifier of the set
295      * @param setList
296      *            list of sets
297      * @return true if the list contains the set
298      */
299     public static boolean contains(String setSpec, OAIDataList<? extends Set> setList) {
300         return get(setSpec, setList) != null;
301     }
302 
303 }