View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.pi;
20  
21  import java.text.DecimalFormat;
22  import java.text.DecimalFormatSymbols;
23  import java.text.SimpleDateFormat;
24  import java.util.ArrayList;
25  import java.util.Arrays;
26  import java.util.Comparator;
27  import java.util.Date;
28  import java.util.HashMap;
29  import java.util.List;
30  import java.util.Locale;
31  import java.util.Map;
32  import java.util.Objects;
33  import java.util.Optional;
34  import java.util.concurrent.atomic.AtomicInteger;
35  import java.util.function.Predicate;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  import java.util.stream.Collectors;
39  import java.util.stream.IntStream;
40  
41  import org.apache.logging.log4j.LogManager;
42  import org.apache.logging.log4j.Logger;
43  import org.jdom2.Attribute;
44  import org.jdom2.Element;
45  import org.jdom2.Text;
46  import org.jdom2.filter.Filters;
47  import org.jdom2.xpath.XPathExpression;
48  import org.jdom2.xpath.XPathFactory;
49  import org.mycore.common.MCRConstants;
50  import org.mycore.common.MCRException;
51  import org.mycore.common.config.MCRConfigurationException;
52  import org.mycore.common.config.annotation.MCRPostConstruction;
53  import org.mycore.datamodel.metadata.MCRBase;
54  import org.mycore.datamodel.metadata.MCRObjectID;
55  import org.mycore.datamodel.metadata.MCRObjectService;
56  import org.mycore.pi.exceptions.MCRPersistentIdentifierException;
57  
58  /**
59   *
60   * MCR.PI.Generator.myGenerator=org.mycore.pi.urn.MCRGenericPIGenerator
61   *
62   * Set a generic pattern.
63   *
64   * MCR.PI.Generator.myGenerator.GeneralPattern=urn:nbn:de:gbv:$CurrentDate-$1-$2-$ObjectType-$ObjectProject-$ObjectNumber-$Count-
65   * MCR.PI.Generator.myGenerator.GeneralPattern=urn:nbn:de:gbv:$ObjectDate-$ObjectType-$Count
66   * MCR.PI.Generator.myGenerator.GeneralPattern=urn:nbn:de:gbv:$ObjectDate-$Count
67   * MCR.PI.Generator.myGenerator.GeneralPattern=urn:nbn:de:gbv:$ObjectType-$Count
68   * MCR.PI.Generator.myGenerator.GeneralPattern=urn:nbn:de:gbv:$0-$1-$Count
69   *
70   * Set a optional DateFormat, if not set the ddMMyyyy is just used as value. (SimpleDateFormat)
71   *
72   * MCR.PI.Generator.myGenerator.DateFormat=ddMMyyyy
73   *
74   * Set a optional ObjectType mapping, if not set the ObjectType is just used as value
75   *
76   * MCR.PI.Generator.myGenerator.TypeMapping=document:doc,disshab:diss,Thesis:Thesis,bundle:doc,mods:test
77   *
78   * You can also map the projectid
79   *
80   * Set a optional Count precision, if not set or set to -1 the pure number is used (1,2,.., 999).
81   * Count always relativ to type and date.
82   *
83   * MCR.PI.Generator.myGenerator.CountPrecision=3 # will produce 001, 002, ... , 999
84   *
85   * Set the Type of the generated pi.
86   *
87   * MCR.PI.Generator.myGenerator.Type=dnbURN
88   *
89   *
90   * Set the Xpaths
91   *
92   * MCR.PI.Generator.myGenerator.XPath.1=/mycoreobject/metadata/def.shelf/shelf/
93   * MCR.PI.Generator.myGenerator.XPath.2=/mycoreobject/metadata/def.path2/path2/
94   *
95   * @author Sebastian Hofmann
96   */
97  public class MCRGenericPIGenerator extends MCRPIGenerator<MCRPersistentIdentifier> {
98  
99      static final String PLACE_HOLDER_CURRENT_DATE = "$CurrentDate";
100 
101     static final String PLACE_HOLDER_OBJECT_DATE = "$ObjectDate";
102 
103     static final String PLACE_HOLDER_OBJECT_TYPE = "$ObjectType";
104 
105     static final String PLACE_HOLDER_OBJECT_PROJECT = "$ObjectProject";
106 
107     static final String PLACE_HOLDER_COUNT = "$Count";
108 
109     static final String PLACE_HOLDER_OBJECT_NUMBER = "$ObjectNumber";
110 
111     private static final Logger LOGGER = LogManager.getLogger();
112 
113     private static final String PROPERTY_KEY_GENERAL_PATTERN = "GeneralPattern";
114 
115     private static final String PROPERTY_KEY_DATE_FORMAT = "DateFormat";
116 
117     private static final String PROPERTY_KEY_OBJECT_TYPE_MAPPING = "ObjectTypeMapping";
118 
119     private static final String PROPERTY_KEY_OBJECT_PROJECT_MAPPING = "ObjectProjectMapping";
120 
121     private static final String PROPERTY_KEY_COUNT_PRECISION = "CountPrecision";
122 
123     private static final String PROPERTY_KEY_XPATH = "XPath";
124 
125     private static final String PROPERTY_KEY_TYPE = "Type";
126 
127     private static final Map<String, AtomicInteger> PATTERN_COUNT_MAP = new HashMap<>();
128 
129     private static final Pattern XPATH_PATTERN = Pattern.compile("\\$([0-9]+)", Pattern.DOTALL);
130 
131     private String generalPattern;
132 
133     private SimpleDateFormat dateFormat;
134 
135     private String objectTypeMapping;
136 
137     private String objectProjectMapping;
138 
139     private int countPrecision;
140 
141     private String type;
142 
143     private String[] xpath;
144 
145     public MCRGenericPIGenerator() {
146         super();
147     }
148 
149     @MCRPostConstruction
150     public void init(String property) {
151         super.init(property);
152         final Map<String, String> properties = getProperties();
153 
154         setGeneralPattern(properties.get(PROPERTY_KEY_GENERAL_PATTERN));
155 
156         setDateFormat(Optional.ofNullable(properties.get(PROPERTY_KEY_DATE_FORMAT))
157             .map(format -> new SimpleDateFormat(format, Locale.ROOT))
158             .orElse(new SimpleDateFormat("ddMMyyyy", Locale.ROOT)));
159 
160         setObjectTypeMapping(properties.get(PROPERTY_KEY_OBJECT_TYPE_MAPPING));
161         setObjectProjectMapping(properties.get(PROPERTY_KEY_OBJECT_PROJECT_MAPPING));
162 
163         setCountPrecision(Optional.ofNullable(properties.get(PROPERTY_KEY_COUNT_PRECISION))
164             .map(Integer::parseInt)
165             .orElse(-1));
166 
167         setType(properties.get(PROPERTY_KEY_TYPE));
168 
169         List<String> xpaths = new ArrayList<>();
170         int count = 1;
171         while (properties.containsKey(PROPERTY_KEY_XPATH + "." + count)) {
172             xpaths.add(properties.get(PROPERTY_KEY_XPATH + "." + count));
173             count++;
174         }
175 
176         setXpath(xpaths.toArray(new String[0]));
177         validateProperties();
178     }
179 
180     // for testing purposes
181     MCRGenericPIGenerator(String generalPattern, SimpleDateFormat dateFormat,
182         String objectTypeMapping, String objectProjectMapping,
183         int countPrecision, String type, String... xpaths) {
184         super();
185         setObjectProjectMapping(objectProjectMapping);
186         setGeneralPattern(generalPattern);
187         setDateFormat(dateFormat);
188         setObjectTypeMapping(objectTypeMapping);
189         setCountPrecision(countPrecision);
190         setType(type);
191         validateProperties();
192         setXpath(xpaths);
193     }
194 
195     private void setXpath(String... xpaths) {
196         this.xpath = xpaths;
197     }
198 
199     private void validateProperties() {
200         if (countPrecision == -1 && "dnbUrn".equals(getType())) {
201             throw new MCRConfigurationException(
202                 PROPERTY_KEY_COUNT_PRECISION + "=-1 and " + PROPERTY_KEY_TYPE + "=urn is not supported!");
203         }
204     }
205 
206     @Override
207     public MCRPersistentIdentifier generate(MCRBase mcrBase, String additional)
208         throws MCRPersistentIdentifierException {
209 
210         String resultingPI = getGeneralPattern();
211 
212         if (resultingPI.contains(PLACE_HOLDER_CURRENT_DATE)) {
213             resultingPI = resultingPI.replace(PLACE_HOLDER_CURRENT_DATE, getDateFormat().format(new Date()));
214         }
215 
216         if (resultingPI.contains(PLACE_HOLDER_OBJECT_DATE)) {
217             final Date objectCreateDate = mcrBase.getService().getDate(MCRObjectService.DATE_TYPE_CREATEDATE);
218             resultingPI = resultingPI.replace(PLACE_HOLDER_OBJECT_DATE, getDateFormat().format(objectCreateDate));
219         }
220 
221         if (resultingPI.contains(PLACE_HOLDER_OBJECT_TYPE)) {
222             final String mappedObjectType = getMappedType(mcrBase.getId());
223             resultingPI = resultingPI.replace(PLACE_HOLDER_OBJECT_TYPE, mappedObjectType);
224         }
225 
226         if (resultingPI.contains(PLACE_HOLDER_OBJECT_PROJECT)) {
227             final String mappedObjectProject = getMappedProject(mcrBase.getId());
228             resultingPI = resultingPI.replace(PLACE_HOLDER_OBJECT_PROJECT, mappedObjectProject);
229         }
230 
231         if (resultingPI.contains(PLACE_HOLDER_OBJECT_NUMBER)) {
232             resultingPI = resultingPI.replace(PLACE_HOLDER_OBJECT_NUMBER, mcrBase.getId().getNumberAsString());
233         }
234 
235         if (XPATH_PATTERN.asPredicate().test(resultingPI)) {
236             resultingPI = XPATH_PATTERN.matcher(resultingPI).replaceAll((mr) -> {
237                 final String xpathNumberString = mr.group(1);
238                 final int xpathNumber = Integer.parseInt(xpathNumberString, 10) - 1;
239                 if (this.xpath.length <= xpathNumber || xpathNumber < 0) {
240                     throw new MCRException(
241                         "The index of " + xpathNumber + " is out of bounds of xpath array (" + xpath.length + ")");
242                 }
243 
244                 final String xpathString = this.xpath[xpathNumber];
245                 XPathFactory factory = XPathFactory.instance();
246                 XPathExpression<Object> expr = factory.compile(xpathString, Filters.fpassthrough(), null,
247                     MCRConstants.getStandardNamespaces());
248                 final Object content = expr.evaluateFirst(mcrBase.createXML());
249 
250                 if (content instanceof Text) {
251                     return ((Text) content).getTextNormalize();
252                 } else if (content instanceof Attribute) {
253                     return ((Attribute) content).getValue();
254                 } else if (content instanceof Element) {
255                     return ((Element) content).getTextNormalize();
256                 } else {
257                     return content.toString();
258                 }
259             });
260             System.out.println(resultingPI);
261         }
262 
263         final MCRPIParser<MCRPersistentIdentifier> parser = MCRPIManager.getInstance()
264             .getParserForType(getType());
265 
266         String result;
267 
268         result = applyCount(resultingPI);
269 
270         if (getType().equals("dnbUrn")) {
271             result = result + "C"; // will be replaced by the URN-Parser
272         }
273 
274         String finalResult = result;
275         return parser.parse(finalResult)
276             .orElseThrow(() -> new MCRPersistentIdentifierException("Could not parse " + finalResult));
277 
278     }
279 
280     private String applyCount(String resultingPI) {
281         String result;
282         if (resultingPI.contains(PLACE_HOLDER_COUNT)) {
283             final int countPrecision = getCountPrecision();
284             String regexpStr;
285 
286             if (countPrecision == -1) {
287                 regexpStr = "([0-9]+)";
288             } else {
289                 regexpStr = "("
290                     + IntStream.range(0, countPrecision).mapToObj((i) -> "[0-9]").collect(Collectors.joining(""))
291                     + ")";
292             }
293 
294             String counterPattern = resultingPI.replace(PLACE_HOLDER_COUNT, regexpStr);
295             if (getType().equals("dnbUrn")) {
296                 counterPattern = counterPattern + "[0-9]";
297             }
298 
299             LOGGER.info("Counter pattern is {}", counterPattern);
300 
301             final int count = getCount(counterPattern);
302             LOGGER.info("Count is {}", count);
303             final String pattern = IntStream.range(0, Math.abs(countPrecision)).mapToObj((i) -> "0")
304                 .collect(Collectors.joining(""));
305             DecimalFormat decimalFormat = new DecimalFormat(pattern, DecimalFormatSymbols.getInstance(Locale.ROOT));
306             final String countAsString = countPrecision != -1 ? decimalFormat.format(count) : String.valueOf(count);
307             result = resultingPI.replace(PLACE_HOLDER_COUNT, countAsString);
308         } else {
309             result = resultingPI;
310         }
311         return result;
312     }
313 
314     private String getMappedType(MCRObjectID id) {
315         String mapping = getObjectTypeMapping();
316         String typeID = id.getTypeId();
317 
318         return Optional.ofNullable(mapping)
319             .map(mappingStr -> mappingStr.split(","))
320             .map(Arrays::asList)
321             .filter(o -> o.get(0).equals(typeID))
322             .map(o -> o.get(1))
323             .orElse(typeID);
324     }
325 
326     private String getMappedProject(MCRObjectID id) {
327         String mapping = getObjectProjectMapping();
328         String projectID = id.getProjectId();
329 
330         return Optional.ofNullable(mapping)
331             .map(mappingStr -> mappingStr.split(","))
332             .map(Arrays::asList)
333             .filter(o -> o.get(0).equals(projectID))
334             .map(o -> o.get(1))
335             .orElse(projectID);
336     }
337 
338     protected AtomicInteger readCountFromDatabase(String countPattern) {
339         Pattern regExpPattern = Pattern.compile(countPattern);
340         Predicate<String> matching = regExpPattern.asPredicate();
341 
342         List<MCRPIRegistrationInfo> list = MCRPIManager.getInstance()
343             .getList(getType(), -1, -1);
344 
345         // extract the number of the PI
346         Optional<Integer> highestNumber = list.stream()
347             .map(MCRPIRegistrationInfo::getIdentifier)
348             .filter(matching)
349             .map(pi -> {
350                 // extract the number of the PI
351                 Matcher matcher = regExpPattern.matcher(pi);
352                 if (matcher.find() && matcher.groupCount() == 1) {
353                     String group = matcher.group(1);
354                     return Integer.parseInt(group, 10);
355                 } else {
356                     return null;
357                 }
358             }).filter(Objects::nonNull)
359             .min(Comparator.reverseOrder())
360             .map(n -> n + 1);
361         return new AtomicInteger(highestNumber.orElse(0));
362     }
363 
364     private String getType() {
365         return type;
366     }
367 
368     public void setType(String type) {
369         this.type = type;
370     }
371 
372     public String getGeneralPattern() {
373         return generalPattern;
374     }
375 
376     public void setGeneralPattern(String generalPattern) {
377         this.generalPattern = generalPattern;
378     }
379 
380     public SimpleDateFormat getDateFormat() {
381         return dateFormat;
382     }
383 
384     public void setDateFormat(SimpleDateFormat dateFormat) {
385         this.dateFormat = dateFormat;
386     }
387 
388     public String getObjectTypeMapping() {
389         return objectTypeMapping;
390     }
391 
392     public void setObjectTypeMapping(String typeMapping) {
393         this.objectTypeMapping = typeMapping;
394     }
395 
396     public int getCountPrecision() {
397         return countPrecision;
398     }
399 
400     public void setCountPrecision(int countPrecision) {
401         this.countPrecision = countPrecision;
402     }
403 
404     /**
405      * Gets the count for a specific pattern and increase the internal counter. If there is no internal counter it will
406      * look into the Database and detect the highest count with the pattern.
407      *
408      * @param pattern a reg exp pattern which will be used to detect the highest count. The first group is the count.
409      *                e.G. [0-9]+-mods-2017-([0-9][0-9][0-9][0-9])-[0-9] will match 31-mods-2017-0003-3 and the returned
410      *                count will be 4 (3+1).
411      * @return the next count
412      */
413     public final synchronized int getCount(String pattern) {
414         AtomicInteger count = PATTERN_COUNT_MAP
415             .computeIfAbsent(pattern, this::readCountFromDatabase);
416 
417         return count.getAndIncrement();
418     }
419 
420     public String getObjectProjectMapping() {
421         return objectProjectMapping;
422     }
423 
424     public void setObjectProjectMapping(String objectProjectMapping) {
425         this.objectProjectMapping = objectProjectMapping;
426     }
427 }