View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.mods.enrichment;
20  
21  import java.io.IOException;
22  import java.net.URLEncoder;
23  import java.nio.charset.StandardCharsets;
24  import java.text.MessageFormat;
25  import java.util.Locale;
26  
27  import org.apache.logging.log4j.LogManager;
28  import org.apache.logging.log4j.Logger;
29  import org.jdom2.Document;
30  import org.jdom2.Element;
31  import org.mycore.common.xml.MCRURIResolver;
32  import org.mycore.common.xml.MCRXMLHelper;
33  import org.mycore.mods.MCRMODSCommands;
34  import org.xml.sax.SAXException;
35  
36  /**
37   * Returns publication data in MODS format for a given identifier.
38   * Each resolver belongs to a certain data source, e.g. the data source
39   * "PubMed" may have two resolves to get publication data by DOI or PubMed artice ID.
40   *
41   * The resolver will use an URI to get the publication data.
42   * MCR.MODS.EnrichmentResolver.DataSource.[SourceID].[TypeID].URI=[URI]
43   *
44   * This is typically a HTTP URL followed by a XSL stylesheet to transform the
45   * source format to MODS, e.g.
46   * MCR.MODS.EnrichmentResolver.DataSource.DataCite.doi.URI=xslStyle:datacite2mods:https://data.datacite.org/application/vnd.datacite.datacite+xml/{0}
47   *
48   * Within the URI, the pattern {0} will be replaced by the given identifier value,
49   * optionally the pattern {1} will be replaced by the value uri-encoded as http request parameter
50   *
51   * @author Frank L\u00FCtzenkirchen
52   */
53  class MCRIdentifierResolver {
54  
55      private static final Logger LOGGER = LogManager.getLogger(MCRIdentifierResolver.class);
56  
57      private MCRDataSource ds;
58  
59      private MCRIdentifierType idType;
60  
61      private String uriPattern;
62  
63      MCRIdentifierResolver(MCRDataSource ds, MCRIdentifierType idType, String uriPattern) {
64          this.ds = ds;
65          this.idType = idType;
66          this.uriPattern = uriPattern;
67      }
68  
69      MCRIdentifierType getType() {
70          return idType;
71      }
72  
73      /**
74       * Tries to resolve publication data for the given identifier.
75       *
76       * @param identifier the identifier's value, e.g. a DOI or ISBN
77       * @return the publication data in MODS format, or null if the data source did not return data for this identifier
78       */
79      Element resolve(String identifier) {
80          Object[] params = new Object[] { identifier, URLEncoder.encode(identifier, StandardCharsets.UTF_8) };
81          String uri = new MessageFormat(uriPattern, Locale.ROOT).format(params);
82  
83          Element resolved = null;
84          try {
85              resolved = MCRURIResolver.instance().resolve(uri);
86          } catch (Exception ex) {
87              LOGGER.warn("Exception resolving " + uri, ex);
88              return null;
89          }
90  
91          // Normalize various error/not found cases
92          if (resolved == null || !"mods".equals(resolved.getName()) || resolved.getChildren().isEmpty()) {
93              LOGGER.warn(ds + " returned none or empty MODS for " + idType + " " + identifier);
94              return null;
95          }
96  
97          try {
98              ensureIsValidMODS(resolved);
99              return resolved;
100         } catch (Exception ex) {
101             LOGGER.warn(ds + " returned invalid MODS for " + identifier + ": " + ex.getMessage(), ex);
102             return null;
103         }
104     }
105 
106     void ensureIsValidMODS(Element mods) throws SAXException, IOException {
107         MCRXMLHelper.validate(new Document().addContent(mods.detach()), MCRMODSCommands.MODS_V3_XSD_URI);
108     }
109 }