View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.mods.enrichment;
20  
21  import java.util.HashSet;
22  import java.util.List;
23  import java.util.Set;
24  import java.util.stream.Collectors;
25  
26  import org.apache.logging.log4j.LogManager;
27  import org.apache.logging.log4j.Logger;
28  import org.jdom2.Element;
29  
30  /**
31   * Tracks all identifiers found in a given publication.
32   * Each resolving and merge step may add new identifiers
33   * returned by data sources. For example,
34   * the publication may initially only have a PubMed ID,
35   * but after resolving that ID the data source may have returned
36   * the DOI of the publication. So we keep a list to distinguish
37   * newly found identifiers from already known and resolved identifiers.
38   *
39   * @author Frank L\u00FCtzenkirchen
40   */
41  class MCRIdentifierPool {
42  
43      private static final Logger LOGGER = LogManager.getLogger();
44  
45      /** Set of already known identifiers resolved in the last round */
46      private Set<MCRIdentifier> oldIdentifiers = new HashSet<>();
47  
48      /** Set of currently processed identifiers */
49      private Set<MCRIdentifier> currentIdentifiers = new HashSet<>();
50  
51      /** Set of new identifiers returned with data from external sources in the current resolving round */
52      private Set<MCRIdentifier> newIdentifiers = new HashSet<>();
53  
54      /** Add all new identifiers that can be found in the given MODS object */
55      synchronized void addIdentifiersFrom(Element object) {
56          for (MCRIdentifierType type : MCRIdentifierTypeFactory.instance().getTypes()) {
57              newIdentifiers.addAll(type.getIdentifiers(object));
58          }
59          newIdentifiers.removeAll(currentIdentifiers);
60          newIdentifiers.removeAll(oldIdentifiers);
61      }
62  
63      /** Remember all currently known identifiers, mark them as "old" **/
64      void prepareNextIteration() {
65          currentIdentifiers.clear();
66          currentIdentifiers.addAll(newIdentifiers);
67          oldIdentifiers.addAll(newIdentifiers);
68          newIdentifiers.clear();
69      }
70  
71      boolean hasNewIdentifiers() {
72          for (MCRIdentifier id : newIdentifiers) {
73              LOGGER.info("new identifier " + id);
74          }
75  
76          return !newIdentifiers.isEmpty();
77      }
78  
79      Set<MCRIdentifier> getNewIdentifiers() {
80          return newIdentifiers;
81      }
82  
83      List<MCRIdentifier> getCurrentIdentifiersOfType(MCRIdentifierType type) {
84          return currentIdentifiers.stream().filter(id -> id.getType().equals(type)).collect(Collectors.toList());
85      }
86  }