1 /*
2 * This file is part of *** M y C o R e ***
3 * See http://www.mycore.de/ for details.
4 *
5 * MyCoRe is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * MyCoRe is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with MyCoRe. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 package org.mycore.mods.enrichment;
20
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Set;
24 import java.util.stream.Collectors;
25
26 import org.apache.logging.log4j.LogManager;
27 import org.apache.logging.log4j.Logger;
28 import org.jdom2.Element;
29
30 /**
31 * Tracks all identifiers found in a given publication.
32 * Each resolving and merge step may add new identifiers
33 * returned by data sources. For example,
34 * the publication may initially only have a PubMed ID,
35 * but after resolving that ID the data source may have returned
36 * the DOI of the publication. So we keep a list to distinguish
37 * newly found identifiers from already known and resolved identifiers.
38 *
39 * @author Frank L\u00FCtzenkirchen
40 */
41 class MCRIdentifierPool {
42
43 private static final Logger LOGGER = LogManager.getLogger();
44
45 /** Set of already known identifiers resolved in the last round */
46 private Set<MCRIdentifier> oldIdentifiers = new HashSet<>();
47
48 /** Set of currently processed identifiers */
49 private Set<MCRIdentifier> currentIdentifiers = new HashSet<>();
50
51 /** Set of new identifiers returned with data from external sources in the current resolving round */
52 private Set<MCRIdentifier> newIdentifiers = new HashSet<>();
53
54 /** Add all new identifiers that can be found in the given MODS object */
55 synchronized void addIdentifiersFrom(Element object) {
56 for (MCRIdentifierType type : MCRIdentifierTypeFactory.instance().getTypes()) {
57 newIdentifiers.addAll(type.getIdentifiers(object));
58 }
59 newIdentifiers.removeAll(currentIdentifiers);
60 newIdentifiers.removeAll(oldIdentifiers);
61 }
62
63 /** Remember all currently known identifiers, mark them as "old" **/
64 void prepareNextIteration() {
65 currentIdentifiers.clear();
66 currentIdentifiers.addAll(newIdentifiers);
67 oldIdentifiers.addAll(newIdentifiers);
68 newIdentifiers.clear();
69 }
70
71 boolean hasNewIdentifiers() {
72 for (MCRIdentifier id : newIdentifiers) {
73 LOGGER.info("new identifier " + id);
74 }
75
76 return !newIdentifiers.isEmpty();
77 }
78
79 Set<MCRIdentifier> getNewIdentifiers() {
80 return newIdentifiers;
81 }
82
83 List<MCRIdentifier> getCurrentIdentifiersOfType(MCRIdentifierType type) {
84 return currentIdentifiers.stream().filter(id -> id.getType().equals(type)).collect(Collectors.toList());
85 }
86 }