View Javadoc
1   /*
2    * This file is part of ***  M y C o R e  ***
3    * See http://www.mycore.de/ for details.
4    *
5    * MyCoRe is free software: you can redistribute it and/or modify
6    * it under the terms of the GNU General Public License as published by
7    * the Free Software Foundation, either version 3 of the License, or
8    * (at your option) any later version.
9    *
10   * MyCoRe is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   *
15   * You should have received a copy of the GNU General Public License
16   * along with MyCoRe.  If not, see <http://www.gnu.org/licenses/>.
17   */
18  
19  package org.mycore.common.xml;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.ByteArrayOutputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.StringReader;
26  import java.net.MalformedURLException;
27  import java.net.URI;
28  import java.net.URL;
29  import java.nio.file.Files;
30  import java.nio.file.Path;
31  import java.nio.file.Paths;
32  import java.nio.file.spi.FileSystemProvider;
33  import java.util.Enumeration;
34  import java.util.Objects;
35  
36  import javax.xml.catalog.CatalogException;
37  import javax.xml.catalog.CatalogFeatures;
38  import javax.xml.catalog.CatalogManager;
39  import javax.xml.catalog.CatalogResolver;
40  
41  import org.apache.commons.io.IOUtils;
42  import org.apache.logging.log4j.LogManager;
43  import org.apache.logging.log4j.Logger;
44  import org.mycore.common.MCRCache;
45  import org.mycore.common.MCRClassTools;
46  import org.mycore.common.MCRStreamUtils;
47  import org.mycore.common.MCRUtils;
48  import org.mycore.common.config.MCRConfiguration2;
49  import org.mycore.common.function.MCRThrowFunction;
50  import org.w3c.dom.ls.LSInput;
51  import org.w3c.dom.ls.LSResourceResolver;
52  import org.xml.sax.InputSource;
53  import org.xml.sax.ext.EntityResolver2;
54  
55  /**
56   * MCREntityResolver uses {@link CatalogResolver} for resolving entities or - for compatibility reasons - looks in
57   * classpath to resolve XSD and DTD files.
58   * 
59   * @author Thomas Scheffler (yagee)
60   * @since 2013.10
61   */
62  public class MCREntityResolver implements EntityResolver2, LSResourceResolver {
63  
64      public static final Logger LOGGER = LogManager.getLogger(MCREntityResolver.class);
65  
66      private static final String CONFIG_PREFIX = "MCR.URIResolver.";
67  
68      CatalogResolver catalogResolver;
69  
70      private MCRCache<String, InputSourceProvider> bytesCache;
71  
72      private MCREntityResolver() {
73          Enumeration<URL> systemResources;
74          try {
75              systemResources = MCRClassTools.getClassLoader().getResources("catalog.xml");
76          } catch (IOException e) {
77              throw new ExceptionInInitializerError(e);
78          }
79          URI[] catalogURIs = MCRStreamUtils.asStream(systemResources)
80              .map(URL::toString)
81              .peek(c -> LOGGER.info("Using XML catalog: {}", c))
82              .map(URI::create)
83              .toArray(URI[]::new);
84          catalogResolver = CatalogManager.catalogResolver(CatalogFeatures.defaults(), catalogURIs);
85          int cacheSize = MCRConfiguration2.getInt(CONFIG_PREFIX + "StaticFiles.CacheSize").orElse(100);
86          bytesCache = new MCRCache<>(cacheSize, "EntityResolver Resources");
87      }
88  
89      public static MCREntityResolver instance() {
90          return MCREntityResolverHolder.instance;
91      }
92  
93      private static boolean isAbsoluteURL(String url) {
94          try {
95              URL baseHttp = new URL("http://www.mycore.org");
96              URL baseFile = new URL("file:///");
97              URL relativeHttp = new URL(baseHttp, url);
98              URL relativeFile = new URL(baseFile, url);
99              return relativeFile.equals(relativeHttp);
100         } catch (MalformedURLException e) {
101             return false;
102         }
103     }
104 
105     private InputSource resolveEntity(String publicId, String systemId,
106         MCRThrowFunction<CatalogEntityIdentifier, InputSource, IOException> alternative) throws IOException {
107         try {
108             InputSource entity = catalogResolver.resolveEntity(publicId, systemId);
109             if (entity != null) {
110                 return resolvedEntity(entity);
111             }
112         } catch (CatalogException e) {
113             LOGGER.debug(e.getMessage());
114         }
115         return alternative.apply(new CatalogEntityIdentifier(publicId, systemId));
116     }
117 
118     /* (non-Javadoc)
119      * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String)
120      */
121     @Override
122     public InputSource resolveEntity(String publicId, String systemId) throws IOException {
123         LOGGER.debug("Resolving: \npublicId: {}\nsystemId: {}", publicId, systemId);
124         return resolveEntity(publicId, systemId, id -> resolveClassRessource(id.publicId, id.systemId));
125     }
126 
127     /* (non-Javadoc)
128      * @see org.xml.sax.ext.EntityResolver2#getExternalSubset(java.lang.String, java.lang.String)
129      */
130     @Override
131     public InputSource getExternalSubset(String name, String baseURI) {
132         LOGGER.debug("External Subset: \nname: {}\nbaseURI: {}", name, baseURI);
133         return null;
134     }
135 
136     /* (non-Javadoc)
137      * @see org.xml.sax.ext.EntityResolver2#resolveEntity(java.lang.String, java.lang.String, java.lang.String, java.lang.String)
138      */
139     @Override
140     public InputSource resolveEntity(String name, String publicId, String baseURI, String systemId)
141         throws IOException {
142         LOGGER.debug("Resolving: \nname: {}\npublicId: {}\nbaseURI: {}\nsystemId: {}", name, publicId, baseURI,
143             systemId);
144         return resolveEntity(publicId, systemId, id -> resolveRelativeEntity(baseURI, id));
145     }
146 
147     private InputSource resolveRelativeEntity(String baseURI, CatalogEntityIdentifier id)
148         throws IOException {
149         if (id.systemId == null) {
150             return null; // Use default resolver
151         }
152 
153         if (id.systemId.length() == 0) {
154             // if you overwrite SYSTEM by empty String in XSL
155             return new InputSource(new StringReader(""));
156         }
157 
158         //resolve against base:
159         URI absoluteSystemId = resolveRelativeURI(baseURI, id.systemId);
160         if (absoluteSystemId.isAbsolute()) {
161             if (uriExists(absoluteSystemId)) {
162                 InputSource inputSource = new InputSource(absoluteSystemId.toString());
163                 inputSource.setPublicId(id.publicId);
164                 return resolvedEntity(inputSource);
165             }
166             //resolve absolute URI against catalog first
167             return resolveEntity(id.publicId, absoluteSystemId.toString(),
168                 id2 -> resolveClassRessource(id.publicId, id.systemId));
169         }
170         return resolveClassRessource(id.publicId, id.systemId);
171     }
172 
173     private InputSource resolveClassRessource(String publicId, String systemId) throws IOException {
174         if (MCRUtils.filterTrimmedNotEmpty(systemId).isEmpty()) {
175             return null;
176         }
177         //required for XSD files that are usually classpath resources
178         InputSource is = getCachedResource("/" + systemId);
179         if (is == null) {
180             return null;
181         }
182         is.setPublicId(publicId);
183         return resolvedEntity(is);
184     }
185 
186     private boolean uriExists(URI absoluteSystemId) {
187         if (absoluteSystemId.getScheme().startsWith("http")) {
188             return false; //default resolver handles http anyway
189         }
190         if (absoluteSystemId.getScheme().equals("jar")) {
191             //multithread issues, when using ZIP filesystem with second check
192             try {
193                 URL jarURL = absoluteSystemId.toURL();
194                 try (InputStream is = jarURL.openStream()) {
195                     return is != null;
196                 }
197             } catch (IOException e) {
198                 LOGGER.error("Error while checking (URL) URI: {}", absoluteSystemId, e);
199             }
200         }
201         try {
202             if (isFileSystemAvailable(absoluteSystemId.getScheme())) {
203                 Path pathTest = Paths.get(absoluteSystemId);
204                 LOGGER.debug("Checking: {}", pathTest);
205                 return Files.exists(pathTest);
206             }
207         } catch (Exception e) {
208             LOGGER.error("Error while checking (Path) URI: {}", absoluteSystemId, e);
209         }
210         return false;
211     }
212 
213     private boolean isFileSystemAvailable(String scheme) {
214         return FileSystemProvider
215             .installedProviders()
216             .stream()
217             .map(FileSystemProvider::getScheme)
218             .anyMatch(Objects.requireNonNull(scheme)::equals);
219     }
220 
221     private URI resolveRelativeURI(String baseURI, String systemId) {
222         if (baseURI == null || isAbsoluteURL(systemId)) {
223             return URI.create(systemId);
224         }
225         return URI.create(baseURI).resolve(systemId);
226     }
227 
228     @Override
229     public LSInput resolveResource(String type, String namespaceURI, String publicId, String systemId, String baseURI) {
230         LOGGER.debug("Resolving resource: \ntype: {}\nnamespaceURI: {}\npublicId: {}\nsystemId: {}\nbaseURI: {}",
231             type, namespaceURI, publicId, systemId, baseURI);
232         return catalogResolver.resolveResource(type, namespaceURI, publicId, systemId, baseURI);
233     }
234 
235     private InputSource resolvedEntity(InputSource entity) {
236         String msg = "Resolved to: " + entity.getSystemId() + ".";
237         LOGGER.debug(msg);
238         return entity;
239     }
240 
241     private InputSource getCachedResource(String classResource) throws IOException {
242         URL resourceURL = this.getClass().getResource(classResource);
243         if (resourceURL == null) {
244             LOGGER.debug("{} not found", classResource);
245             return null;
246         }
247         InputSourceProvider is = bytesCache.get(classResource);
248         if (is == null) {
249             LOGGER.debug("Resolving resource {}", classResource);
250             final byte[] bytes;
251             try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
252                 InputStream in = resourceURL.openStream()) {
253                 IOUtils.copy(in, baos);
254                 bytes = baos.toByteArray();
255             }
256             is = new InputSourceProvider(bytes, resourceURL);
257             bytesCache.put(classResource, is);
258         }
259         return is.newInputSource();
260     }
261 
262     private static class MCREntityResolverHolder {
263         public static MCREntityResolver instance = new MCREntityResolver();
264     }
265 
266     private static class InputSourceProvider {
267         byte[] bytes;
268 
269         URL url;
270 
271         InputSourceProvider(byte[] bytes, URL url) {
272             this.bytes = bytes;
273             this.url = url;
274         }
275 
276         public InputSource newInputSource() {
277             InputSource is = new InputSource(url.toString());
278             is.setByteStream(new ByteArrayInputStream(bytes));
279             return is;
280         }
281     }
282 
283     private static class CatalogEntityIdentifier {
284         private String publicId;
285 
286         private String systemId;
287 
288         private CatalogEntityIdentifier(String publicId, String systemId) {
289             this.publicId = publicId;
290             this.systemId = systemId;
291         }
292     }
293 }